CallAwis.getSignatureKey() - Code Metrics - Inspection of "Merge pull request #16 from liehendi11/master" - ashim888/awis - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( a0d81a...878c84 )

by Ashim

created 2018-03-25 05:45 UTC

CallAwis.getSignatureKey() A

↳ Parent: CallAwis.create_v4_signature()

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	1
c	1
b	0
f	0
dl	0
loc	6
rs	9.4285

import datetime
import hashlib
import hmac

import requests  # pip install requests
import xmltodict
from bs4 import BeautifulSoup

try:
    from urllib import quote, urlencode
except ImportError:
    from urllib.parse import quote, urlencode

URLINFO_RESPONSE_GROUPS = ",".join(
    ["RelatedLinks", "Categories", "Rank", "ContactInfo", "RankByCountry",
     "UsageStats", "Speed", "Language", "OwnedDomains", "LinksInCount",
     "SiteData", "AdultContent"])

TRAFFICINFO_RESPONSE_GROUPS = "History"
CATEGORYBROWSE_RESPONSE_GROUPS = ",".join(["Categories", "RelatedCategories", "LanguageCategories", "LetterBars"])


def is_string(obj):
    try:
        return isinstance(obj, basestring)  # python 2
    except NameError:
        return isinstance(obj, str)  # python 3

class CallAwis(object):
    def __init__(self, access_id, secret_access_key):
        self.access_id = access_id
        self.secret_access_key = secret_access_key

    def create_v4_signature(self, request_params):
        '''
        Create URI and signature headers based on AWS V4 signing process.
        Refer to https://docs.aws.amazon.com/AlexaWebInfoService/latest/ApiReferenceArticle.html for request params.
        :param request_params: dictionary of request parameters
        :return: URL and header to be passed to requests.get
        '''

        method = 'GET'
        service = 'awis'
        host = 'awis.us-west-1.amazonaws.com'
        region = 'us-west-1'
        endpoint = 'https://awis.amazonaws.com/api'
        request_parameters = urlencode([(key, request_params[key]) for key in sorted(request_params.keys())])

        # Key derivation functions. See:
        # http://docs.aws.amazon.com/general/latest/gr/signature-v4-examples.html#signature-v4-examples-python
        def sign(key, msg):
            return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()

        def getSignatureKey(key, dateStamp, regionName, serviceName):
            kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
            kRegion = sign(kDate, regionName)
            kService = sign(kRegion, serviceName)
            kSigning = sign(kService, 'aws4_request')
            return kSigning

        # Create a date for headers and the credential string
        t = datetime.datetime.utcnow()
        amzdate = t.strftime('%Y%m%dT%H%M%SZ')
        datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope

        # Create canonical request
        canonical_uri = '/api'
        canonical_querystring = request_parameters
        canonical_headers = 'host:' + host + '\n' + 'x-amz-date:' + amzdate + '\n'
        signed_headers = 'host;x-amz-date'
        payload_hash = hashlib.sha256(''.encode('utf8')).hexdigest()
        canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash

        # Create string to sign
        algorithm = 'AWS4-HMAC-SHA256'
        credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'
        string_to_sign = algorithm + '\n' +  amzdate + '\n' +  credential_scope + '\n' +  hashlib.sha256(canonical_request.encode('utf8')).hexdigest()

        # Calculate signature
        signing_key = getSignatureKey(self.secret_access_key, datestamp, region, service)

        # Sign the string_to_sign using the signing_key
        signature = hmac.new(signing_key, (string_to_sign).encode('utf-8'), hashlib.sha256).hexdigest()

        # Add signing information to the request
        authorization_header = algorithm + ' ' + 'Credential=' + self.access_id + '/' + credential_scope + ', ' +  'SignedHeaders=' + signed_headers + ', ' + 'Signature=' + signature
        headers = {'X-Amz-Date':amzdate, 'Authorization':authorization_header, 'Content-Type': 'application/xml', 'Accept': 'application/xml'}

        # Create request url
        request_url = endpoint + '?' + canonical_querystring

        return request_url, headers

    def urlinfo(self, domain, response_group = URLINFO_RESPONSE_GROUPS):
        '''
        Provide information about supplied domain as specified by the response group
        :param domain: Any valid URL
        :param response_group: Any valid urlinfo response group
        :return: Traffic and/or content data of the domain in XML format
        '''
        params = {
            'Action': "UrlInfo",
            'Url': domain,
            'ResponseGroup': response_group
        }

        url, headers = self.create_v4_signature(params)
        return self.return_output(url, headers)

    def traffichistory(self, domain, response_group=TRAFFICINFO_RESPONSE_GROUPS, myrange=31, start=20070801):
        '''
        Provide traffic history of supplied domain
        :param domain: Any valid URL
        :param response_group: Any valid traffic history response group
        :return: Traffic and/or content data of the domain in XML format
        '''
        params = {
            'Action': "TrafficHistory",
            'Url': domain,
            'ResponseGroup': response_group,
            'Range': myrange,
            'Start': start,
        }

        url, headers = self.create_v4_signature(params)
        return self.return_output(url, headers)

    def cat_browse(self, domain, path, response_group=CATEGORYBROWSE_RESPONSE_GROUPS, descriptions='True'):
        '''
        Provide category browse information of specified domain
        :param domain: Any valid URL
        :param path: Valid category path
        :param response_group: Any valid traffic history response group
        :return: Traffic and/or content data of the domain in XML format
        '''
        params = {
            'Action': "CategoryListings",
            'ResponseGroup': 'Listings',
            'Path': quote(path),
            'Descriptions': descriptions
        }

        url, headers = self.create_v4_signature(params)
        return self.return_output(url, headers)

    def return_output(self, url, headers):
        r = requests.get(url, headers=headers)
        soup = BeautifulSoup(r.text.encode('utf-8'), 'xml')
        return soup


def flatten_urlinfo(urlinfo, shorter_keys=True):
    """ Takes a urlinfo object and returns a flat dictionary."""
    def flatten(value, prefix=""):
        if is_string(value):
            _result[prefix[1:]] = value
            return
        try:
            len(value)
        except (AttributeError, TypeError):  # a leaf
            _result[prefix[1:]] = value
            return

        try:
            items = value.items()
        except AttributeError:  # an iterable, but not a dict
            last_prefix = prefix.split(".")[-1]
            if shorter_keys:
                prefix = "." + last_prefix

            if last_prefix == "Country":
                for v in value:
                    country = v.pop("@Code")
                    flatten(v, ".".join([prefix, country]))
            elif last_prefix in ["RelatedLink", "CategoryData"]:
                for i, v in enumerate(value):
                    flatten(v, ".".join([prefix, str(i)]))
            elif value[0].get("TimeRange"):
                for v in value:
                    time_range = ".".join(tuple(v.pop("TimeRange").items())[0])
                    # python 3 odict_items don't support indexing
                    if v.get("DataUrl"):
                        time_range = ".".join([v.pop("DataUrl"), time_range])
                    flatten(v, ".".join([prefix, time_range]))
            else:
                msg = prefix + " contains a list we don't know how to flatten."
                raise NotImplementedError(msg)
        else:  # a dict, go one level deeper
            for k, v in items:
                flatten(v, ".".join([prefix, k]))

    _result = {}
    info = xmltodict.parse(str(urlinfo))
    flatten(info["aws:UrlInfoResponse"]["Response"]["UrlInfoResult"]["Alexa"])
    _result["OutputTimestamp"] = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
    return _result


1			import datetime
2			import hashlib
3			import hmac
4
5			import requests # pip install requests
6			import xmltodict
7			from bs4 import BeautifulSoup
8
9			try:
10			from urllib import quote, urlencode
11			except ImportError:
12			from urllib.parse import quote, urlencode
13
14			URLINFO_RESPONSE_GROUPS = ",".join(
15			["RelatedLinks", "Categories", "Rank", "ContactInfo", "RankByCountry",
16			"UsageStats", "Speed", "Language", "OwnedDomains", "LinksInCount",
17			"SiteData", "AdultContent"])
18
19			TRAFFICINFO_RESPONSE_GROUPS = "History"
20			CATEGORYBROWSE_RESPONSE_GROUPS = ",".join(["Categories", "RelatedCategories", "LanguageCategories", "LetterBars"])
21
22
23			def is_string(obj):
24			try:
25			return isinstance(obj, basestring) # python 2
26			except NameError:
27			return isinstance(obj, str) # python 3
28
29			class CallAwis(object):
30			def __init__(self, access_id, secret_access_key):
31			self.access_id = access_id
32			self.secret_access_key = secret_access_key
33
34			def create_v4_signature(self, request_params):
35			'''
36			Create URI and signature headers based on AWS V4 signing process.
37			Refer to https://docs.aws.amazon.com/AlexaWebInfoService/latest/ApiReferenceArticle.html for request params.
38			:param request_params: dictionary of request parameters
39			:return: URL and header to be passed to requests.get
40			'''
41
42			method = 'GET'
43			service = 'awis'
44			host = 'awis.us-west-1.amazonaws.com'
45			region = 'us-west-1'
46			endpoint = 'https://awis.amazonaws.com/api'
47			request_parameters = urlencode([(key, request_params[key]) for key in sorted(request_params.keys())])
48
49			# Key derivation functions. See:
50			# http://docs.aws.amazon.com/general/latest/gr/signature-v4-examples.html#signature-v4-examples-python
51			def sign(key, msg):
52			return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()
53
54			def getSignatureKey(key, dateStamp, regionName, serviceName):
55			kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
56			kRegion = sign(kDate, regionName)
57			kService = sign(kRegion, serviceName)
58			kSigning = sign(kService, 'aws4_request')
59			return kSigning
60
61			# Create a date for headers and the credential string
62			t = datetime.datetime.utcnow()
63			amzdate = t.strftime('%Y%m%dT%H%M%SZ')
64			datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope
65
66			# Create canonical request
67			canonical_uri = '/api'
68			canonical_querystring = request_parameters
69			canonical_headers = 'host:' + host + '\n' + 'x-amz-date:' + amzdate + '\n'
70			signed_headers = 'host;x-amz-date'
71			payload_hash = hashlib.sha256(''.encode('utf8')).hexdigest()
72			canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash
73
74			# Create string to sign
75			algorithm = 'AWS4-HMAC-SHA256'
76			credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'
77			string_to_sign = algorithm + '\n' + amzdate + '\n' + credential_scope + '\n' + hashlib.sha256(canonical_request.encode('utf8')).hexdigest()
78
79			# Calculate signature
80			signing_key = getSignatureKey(self.secret_access_key, datestamp, region, service)
81
82			# Sign the string_to_sign using the signing_key
83			signature = hmac.new(signing_key, (string_to_sign).encode('utf-8'), hashlib.sha256).hexdigest()
84
85			# Add signing information to the request
86			authorization_header = algorithm + ' ' + 'Credential=' + self.access_id + '/' + credential_scope + ', ' + 'SignedHeaders=' + signed_headers + ', ' + 'Signature=' + signature
87			headers = {'X-Amz-Date':amzdate, 'Authorization':authorization_header, 'Content-Type': 'application/xml', 'Accept': 'application/xml'}
88
89			# Create request url
90			request_url = endpoint + '?' + canonical_querystring
91
92			return request_url, headers
93
94			def urlinfo(self, domain, response_group = URLINFO_RESPONSE_GROUPS):
95			'''
96			Provide information about supplied domain as specified by the response group
97			:param domain: Any valid URL
98			:param response_group: Any valid urlinfo response group
99			:return: Traffic and/or content data of the domain in XML format
100			'''
101			params = {
102			'Action': "UrlInfo",
103			'Url': domain,
104			'ResponseGroup': response_group
105			}
106
107			url, headers = self.create_v4_signature(params)
108			return self.return_output(url, headers)
109
110			def traffichistory(self, domain, response_group=TRAFFICINFO_RESPONSE_GROUPS, myrange=31, start=20070801):
111			'''
112			Provide traffic history of supplied domain
113			:param domain: Any valid URL
114			:param response_group: Any valid traffic history response group
115			:return: Traffic and/or content data of the domain in XML format
116			'''
117			params = {
118			'Action': "TrafficHistory",
119			'Url': domain,
120			'ResponseGroup': response_group,
121			'Range': myrange,
122			'Start': start,
123			}
124
125			url, headers = self.create_v4_signature(params)
126			return self.return_output(url, headers)
127
128			def cat_browse(self, domain, path, response_group=CATEGORYBROWSE_RESPONSE_GROUPS, descriptions='True'):
129			'''
130			Provide category browse information of specified domain
131			:param domain: Any valid URL
132			:param path: Valid category path
133			:param response_group: Any valid traffic history response group
134			:return: Traffic and/or content data of the domain in XML format
135			'''
136			params = {
137			'Action': "CategoryListings",
138			'ResponseGroup': 'Listings',
139			'Path': quote(path),
140			'Descriptions': descriptions
141			}
142
143			url, headers = self.create_v4_signature(params)
144			return self.return_output(url, headers)
145
146			def return_output(self, url, headers):
147			r = requests.get(url, headers=headers)
148			soup = BeautifulSoup(r.text.encode('utf-8'), 'xml')
149			return soup
150
151
152			def flatten_urlinfo(urlinfo, shorter_keys=True):
153			""" Takes a urlinfo object and returns a flat dictionary."""
154			def flatten(value, prefix=""):
155			if is_string(value):
156			_result[prefix[1:]] = value
157			return
158			try:
159			len(value)
160			except (AttributeError, TypeError): # a leaf
161			_result[prefix[1:]] = value
162			return
163
164			try:
165			items = value.items()
166			except AttributeError: # an iterable, but not a dict
167			last_prefix = prefix.split(".")[-1]
168			if shorter_keys:
169			prefix = "." + last_prefix
170
171			if last_prefix == "Country":
172			for v in value:
173			country = v.pop("@Code")
174			flatten(v, ".".join([prefix, country]))
175			elif last_prefix in ["RelatedLink", "CategoryData"]:
176			for i, v in enumerate(value):
177			flatten(v, ".".join([prefix, str(i)]))
178			elif value[0].get("TimeRange"):
179			for v in value:
180			time_range = ".".join(tuple(v.pop("TimeRange").items())[0])
181			# python 3 odict_items don't support indexing
182			if v.get("DataUrl"):
183			time_range = ".".join([v.pop("DataUrl"), time_range])
184			flatten(v, ".".join([prefix, time_range]))
185			else:
186			msg = prefix + " contains a list we don't know how to flatten."
187			raise NotImplementedError(msg)
188			else: # a dict, go one level deeper
189			for k, v in items:
190			flatten(v, ".".join([prefix, k]))
191
192			_result = {}
193			info = xmltodict.parse(str(urlinfo))
194			flatten(info["aws:UrlInfoResponse"]["Response"]["UrlInfoResult"]["Alexa"])
195			_result["OutputTimestamp"] = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
196			return _result
197

ashim888 / awis

GitHub Access Token became invalid

Push — master ( a0d81a...878c84 )

CallAwis.getSignatureKey() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like