URLFetch - Code Metrics - GEANT/met - Measure and Improve Code Quality continuously with Scrutinizer

URLFetch A
last analyzed 2018-05-28 06:56 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	72
Duplicated Lines	0 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
wmc	13
c	2
b	0
f	0
dl	0
loc	72
rs	10

4 Methods

Rating	Name	Size	Complexity
A	__init__()	21	2
A	_parse_date()	4	2
F	run()	44	9
A	time()	4	2

"""

This module contains various utilities.

"""
from datetime import timedelta, datetime
import tempfile
import traceback
from mako.lookup import TemplateLookup
import os
import pkg_resources
import re
from lxml import etree
from time import gmtime, strftime, clock
from pyff.logs import log
import threading
import requests
import requests_cache
from email.utils import parsedate

__author__ = 'leifj'


class PyffException(Exception):
    pass


def _e(error_log, m=None):
    def _f(x):
        if ":WARNING:" in x:
            return False
        if m is not None and not m in x:
            return False
        return True

    return "\n".join(filter(_f, ["%s" % e for e in error_log]))


def debug_observer(e):
    log.error(repr(e))


def resource_string(name, pfx=None):
    """
Attempt to load and return the contents (as a string) of the resource named by
the first argument in the first location of:

# as name in the current directory
# as name in the `pfx` subdirectory of the current directory if provided
# as name relative to the package
# as pfx/name relative to the package

The last two alternatives is used to locate resources distributed in the package.
This includes certain XSLT and XSD files.

:param name: The string name of a resource
:param pfx: An optional prefix to use in searching for name

    """
    name = os.path.expanduser(name)
    if os.path.exists(name):
        with open(name) as fd:
            return fd.read()
    elif pfx and os.path.exists(os.path.join(pfx, name)):
        with open(os.path.join(pfx, name)) as fd:
            return fd.read()
    elif pkg_resources.resource_exists(__name__, name):
        return pkg_resources.resource_string(__name__, name)
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
        return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))

    return None


def resource_filename(name, pfx=None):
    """
Attempt to find and return the filename of the resource named by the first argument
in the first location of:

# as name in the current directory
# as name in the `pfx` subdirectory of the current directory if provided
# as name relative to the package
# as pfx/name relative to the package

The last two alternatives is used to locate resources distributed in the package.
This includes certain XSLT and XSD files.

:param name: The string name of a resource
:param pfx: An optional prefix to use in searching for name

    """
    if os.path.exists(name):
        return name
    elif pfx and os.path.exists(os.path.join(pfx, name)):
        return os.path.join(pfx, name)
    elif pkg_resources.resource_exists(__name__, name):
        return pkg_resources.resource_filename(__name__, name)
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
        return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))

    return None


def dmerge(a, b):
    """
Deep merge of two isomorphically structured dictionaries.

:param a: The dictionary to merge into
:param b: The dictionary to merge from
    """
    for k in a:
        v = a[k]
        if isinstance(v, dict) and k in b:
            dmerge(v, b[k])
    a.update(b)


def tdelta(input):
    """
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.

:param input: A human-friendly string representation of a timedelta
    """
    keys = ["weeks", "days", "hours", "minutes"]
    regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
    kwargs = {}
    for k, v in re.match(regex, input).groupdict(default="0").items():
        kwargs[k] = int(v)
    return timedelta(**kwargs)


def dumptree(t, pretty_print=False, xml_declaration=True):
    """
Return a string representation of the tree, optionally pretty_print(ed) (default False)

:param t: An ElemenTree to serialize
    """
    return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)


def iso_now():
    """
Current time in ISO format
    """
    return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())


class ResourceResolver(etree.Resolver):
    def resolve(self, system_url, public_id, context):
        """
        Resolves URIs using the resource API
        """
        log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
        path = system_url.split("/")
        fn = path[len(path) - 1]
        if pkg_resources.resource_exists(__name__, fn):
            return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
        elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
            return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
        else:
            raise ValueError("Unable to locate %s" % fn)


_SCHEMA = None


def schema():
    global _SCHEMA
    if _SCHEMA is None:
        try:
            parser = etree.XMLParser()
            parser.resolvers.add(ResourceResolver())
            st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
            _SCHEMA = etree.XMLSchema(st)
        except etree.XMLSchemaParseError, ex:
            log.error(_e(ex.error_log))
            raise ex
    return _SCHEMA


def safe_write(fn, data):
    """Safely write data to a file with name fn
    :param fn: a filename
    :param data: some data to write
    :return: True or False depending on the outcome of the write
    """
    tmpn = None
    try:
        fn = os.path.expanduser(fn)
        dirname, basename = os.path.split(fn)
        with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
            tmp.write(data)
            tmpn = tmp.name
        if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
            os.rename(tmpn, fn)
            return True
    except Exception, ex:
        log.error(ex)
    finally:
        if tmpn is not None and os.path.exists(tmpn):
            try:
                os.unlink(tmpn)
            except Exception, ex:
                log.warn(ex)
                pass
    return False


site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])


def template(name):
    return templates.get_template(name)


class URLFetch(threading.Thread):
    def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
        self.url = url.strip()
        self.verify = verify
        self.id = id
        self.result = None
        self.ex = None
        self.cached = False
        self.enable_cache = enable_cache
        self.cache_ttl = 0
        self.last_modified = None
        self.date = None
        self.tries = 0
        self.resp = None
        self.start_time = 0
        self.end_time = 0
        self.tries = tries

        if self.id is None:
            self.id = self.url

        threading.Thread.__init__(self)

    def time(self):
        if self.isAlive():
            raise ValueError("caller attempted to obtain execution time while fetcher still active")
        return self.end_time - self.start_time

    def run(self):

        def _parse_date(str):
            if str is None:
                return datetime.new()
            return datetime(*parsedate(str)[:6])

        self.start_time = clock()
        try:
            requests_cache.install_cache('.cache')
            if not self.enable_cache:
                log.debug("removing '%s' from cache" % self.url)
                requests_cache.get_cache().delete_url(self.url)

            log.debug("fetching '%s'" % self.url)

            if self.url.startswith('file://'):
                path = self.url[7:]
                if not os.path.exists(path):
                    raise IOError("file not found: %s" % path)

                with open(path, 'r') as fd:
                    self.result = fd.read()
                    self.cached = False
                    self.date = datetime.now()
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
            else:
                self.resp = requests.get(self.url, timeout=60, verify=False)
                self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None)))
                self.date = _parse_date(self.resp.headers['date'])
                self.cached = getattr(self.resp, 'from_cache', False)
                self.status = self.resp.status_code
                if self.resp.status_code != 200:
                    raise IOError(self.resp.reason)
                self.result = self.resp.content

            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
        except Exception, ex:
            traceback.print_exc()
            log.warn("unable to fetch '%s': %s" % (self.url, ex))
            self.ex = ex
            self.result = None
        finally:
            self.end_time = clock()


def root(t):
    if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
        return t.getroot()
    else:
        return t


def duration2timedelta(period):
    regex = re.compile(
        '(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')

    # Fetch the match groups with default value of 0 (not None)
    m = regex.match(period)
    if not m:
        return None

    duration = m.groupdict(0)

    # Create the timedelta object from extracted groups
    delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
                      hours=int(duration['hours']),
                      minutes=int(duration['minutes']),
                      seconds=int(duration['seconds']))

    if duration['sign'] == "-":
        delta *= -1

    return delta


def filter_lang(elts, langs=["en"]):
    def _l(elt):
        return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs

    if elts is None or len(elts) == 0:
        return []

    lst = filter(_l, elts)
    if lst:
        return lst
    else:
        return elts


def xslt_transform(t, stylesheet, params={}):
    xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
    transform = etree.XSLT(xsl)
    return transform(t, **params)


def total_seconds(dt):
    if hasattr(dt, "total_seconds"):
        return dt.total_seconds()
    else:
        return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6


1			"""
2
3			This module contains various utilities.
4
5			"""
6			from datetime import timedelta, datetime
7			import tempfile
8			import traceback
9			from mako.lookup import TemplateLookup
10			import os
11			import pkg_resources
12			import re
13			from lxml import etree
14			from time import gmtime, strftime, clock
15			from pyff.logs import log
16			import threading
17			import requests
18			import requests_cache
19			from email.utils import parsedate
20
21			__author__ = 'leifj'
22
23
24			class PyffException(Exception):
25			pass
26
27
28			def _e(error_log, m=None):
29			def _f(x):
30			if ":WARNING:" in x:
31			return False
32			if m is not None and not m in x:
33			return False
34			return True
35
36			return "\n".join(filter(_f, ["%s" % e for e in error_log]))
37
38
39			def debug_observer(e):
40			log.error(repr(e))
41
42
43			def resource_string(name, pfx=None):
44			"""
45			Attempt to load and return the contents (as a string) of the resource named by
46			the first argument in the first location of:
47
48			# as name in the current directory
49			# as name in the `pfx` subdirectory of the current directory if provided
50			# as name relative to the package
51			# as pfx/name relative to the package
52
53			The last two alternatives is used to locate resources distributed in the package.
54			This includes certain XSLT and XSD files.
55
56			:param name: The string name of a resource
57			:param pfx: An optional prefix to use in searching for name
58
59			"""
60			name = os.path.expanduser(name)
61			if os.path.exists(name):
62			with open(name) as fd:
63			return fd.read()
64			elif pfx and os.path.exists(os.path.join(pfx, name)):
65			with open(os.path.join(pfx, name)) as fd:
66			return fd.read()
67			elif pkg_resources.resource_exists(__name__, name):
68			return pkg_resources.resource_string(__name__, name)
69			elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
70			return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))
71
72			return None
73
74
75			def resource_filename(name, pfx=None):
76			"""
77			Attempt to find and return the filename of the resource named by the first argument
78			in the first location of:
79
80			# as name in the current directory
81			# as name in the `pfx` subdirectory of the current directory if provided
82			# as name relative to the package
83			# as pfx/name relative to the package
84
85			The last two alternatives is used to locate resources distributed in the package.
86			This includes certain XSLT and XSD files.
87
88			:param name: The string name of a resource
89			:param pfx: An optional prefix to use in searching for name
90
91			"""
92			if os.path.exists(name):
93			return name
94			elif pfx and os.path.exists(os.path.join(pfx, name)):
95			return os.path.join(pfx, name)
96			elif pkg_resources.resource_exists(__name__, name):
97			return pkg_resources.resource_filename(__name__, name)
98			elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
99			return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))
100
101			return None
102
103
104			def dmerge(a, b):
105			"""
106			Deep merge of two isomorphically structured dictionaries.
107
108			:param a: The dictionary to merge into
109			:param b: The dictionary to merge from
110			"""
111			for k in a:
112			v = a[k]
113			if isinstance(v, dict) and k in b:
114			dmerge(v, b[k])
115			a.update(b)
116
117
118			def tdelta(input):
119			"""
120			Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.
121
122			:param input: A human-friendly string representation of a timedelta
123			"""
124			keys = ["weeks", "days", "hours", "minutes"]
125			regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
126			kwargs = {}
127			for k, v in re.match(regex, input).groupdict(default="0").items():
128			kwargs[k] = int(v)
129			return timedelta(**kwargs)
130
131
132			def dumptree(t, pretty_print=False, xml_declaration=True):
133			"""
134			Return a string representation of the tree, optionally pretty_print(ed) (default False)
135
136			:param t: An ElemenTree to serialize
137			"""
138			return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)
139
140
141			def iso_now():
142			"""
143			Current time in ISO format
144			"""
145			return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
146
147
148			class ResourceResolver(etree.Resolver):
149			def resolve(self, system_url, public_id, context):
150			"""
151			Resolves URIs using the resource API
152			"""
153			log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
154			path = system_url.split("/")
155			fn = path[len(path) - 1]
156			if pkg_resources.resource_exists(__name__, fn):
157			return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
158			elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
159			return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
160			else:
161			raise ValueError("Unable to locate %s" % fn)
162
163
164			_SCHEMA = None
165
166
167			def schema():
168			global _SCHEMA
169			if _SCHEMA is None:
170			try:
171			parser = etree.XMLParser()
172			parser.resolvers.add(ResourceResolver())
173			st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
174			_SCHEMA = etree.XMLSchema(st)
175			except etree.XMLSchemaParseError, ex:
176			log.error(_e(ex.error_log))
177			raise ex
178			return _SCHEMA
179
180
181			def safe_write(fn, data):
182			"""Safely write data to a file with name fn
183			:param fn: a filename
184			:param data: some data to write
185			:return: True or False depending on the outcome of the write
186			"""
187			tmpn = None
188			try:
189			fn = os.path.expanduser(fn)
190			dirname, basename = os.path.split(fn)
191			with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
192			tmp.write(data)
193			tmpn = tmp.name
194			if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
195			os.rename(tmpn, fn)
196			return True
197			except Exception, ex:
198			log.error(ex)
199			finally:
200			if tmpn is not None and os.path.exists(tmpn):
201			try:
202			os.unlink(tmpn)
203			except Exception, ex:
204			log.warn(ex)
205			pass
206			return False
207
208
209			site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
210			templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])
211
212
213			def template(name):
214			return templates.get_template(name)
215
216
217			class URLFetch(threading.Thread):
218			def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
219			self.url = url.strip()
220			self.verify = verify
221			self.id = id
222			self.result = None
223			self.ex = None
224			self.cached = False
225			self.enable_cache = enable_cache
226			self.cache_ttl = 0
227			self.last_modified = None
228			self.date = None
229			self.tries = 0
230			self.resp = None
231			self.start_time = 0
232			self.end_time = 0
233			self.tries = tries
234
235			if self.id is None:
236			self.id = self.url
237
238			threading.Thread.__init__(self)
239
240			def time(self):
241			if self.isAlive():
242			raise ValueError("caller attempted to obtain execution time while fetcher still active")
243			return self.end_time - self.start_time
244
245			def run(self):
246
247			def _parse_date(str):
248			if str is None:
249			return datetime.new()
250			return datetime(*parsedate(str)[:6])
251
252			self.start_time = clock()
253			try:
254			requests_cache.install_cache('.cache')
255			if not self.enable_cache:
256			log.debug("removing '%s' from cache" % self.url)
257			requests_cache.get_cache().delete_url(self.url)
258
259			log.debug("fetching '%s'" % self.url)
260
261			if self.url.startswith('file://'):
262			path = self.url[7:]
263			if not os.path.exists(path):
264			raise IOError("file not found: %s" % path)
265
266			with open(path, 'r') as fd:
267			self.result = fd.read()
268			self.cached = False
269			self.date = datetime.now()
270			self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
271			else:
272			self.resp = requests.get(self.url, timeout=60, verify=False)
273			self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None)))
274			self.date = _parse_date(self.resp.headers['date'])
275			self.cached = getattr(self.resp, 'from_cache', False)
276			self.status = self.resp.status_code
277			if self.resp.status_code != 200:
278			raise IOError(self.resp.reason)
279			self.result = self.resp.content
280
281			log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
282			except Exception, ex:
283			traceback.print_exc()
284			log.warn("unable to fetch '%s': %s" % (self.url, ex))
285			self.ex = ex
286			self.result = None
287			finally:
288			self.end_time = clock()
289
290
291			def root(t):
292			if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
293			return t.getroot()
294			else:
295			return t
296
297
298			def duration2timedelta(period):
299			regex = re.compile(
300			'(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')
301
302			# Fetch the match groups with default value of 0 (not None)
303			m = regex.match(period)
304			if not m:
305			return None
306
307			duration = m.groupdict(0)
308
309			# Create the timedelta object from extracted groups
310			delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
311			hours=int(duration['hours']),
312			minutes=int(duration['minutes']),
313			seconds=int(duration['seconds']))
314
315			if duration['sign'] == "-":
316			delta *= -1
317
318			return delta
319
320
321			def filter_lang(elts, langs=["en"]):
322			def _l(elt):
323			return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs
324
325			if elts is None or len(elts) == 0:
326			return []
327
328			lst = filter(_l, elts)
329			if lst:
330			return lst
331			else:
332			return elts
333
334
335			def xslt_transform(t, stylesheet, params={}):
336			xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
337			transform = etree.XSLT(xsl)
338			return transform(t, **params)
339
340
341			def total_seconds(dt):
342			if hasattr(dt, "total_seconds"):
343			return dt.total_seconds()
344			else:
345			return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 6) / 10 6
346

GEANT / met

URLFetch A last analyzed 2018-05-28 06:56 UTC

Complexity

Size/Duplication

Importance

4 Methods

Duplication Side-by-Side

Filter issues like

URLFetch A
last analyzed 2018-05-28 06:56 UTC