safe_write() - Code Metrics - Inspection of "Patched pyff to support Lithuanian federation" - GEANT/met - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( b632c8...1f9ce5 )

by Andrea

created 2016-10-03 20:48 UTC

safe_write() D

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
c	1
b	0
f	0
dl	0
loc	26
rs	4
cc	8

"""

This module contains various utilities.

"""
from datetime import timedelta, datetime
import tempfile
import traceback
from mako.lookup import TemplateLookup
import os
import pkg_resources
import re
from lxml import etree
from time import gmtime, strftime, clock
from pyff.logs import log
import threading
import httplib2, httplib
import requests
from email.utils import parsedate

__author__ = 'leifj'


class PyffException(Exception):
    pass


def _e(error_log, m=None):
    def _f(x):
        if ":WARNING:" in x:
            return False
        if m is not None and not m in x:
            return False
        return True

    return "\n".join(filter(_f, ["%s" % e for e in error_log]))


def debug_observer(e):
    log.error(repr(e))


def resource_string(name, pfx=None):
    """
Attempt to load and return the contents (as a string) of the resource named by
the first argument in the first location of:

# as name in the current directory
# as name in the `pfx` subdirectory of the current directory if provided
# as name relative to the package
# as pfx/name relative to the package

The last two alternatives is used to locate resources distributed in the package.
This includes certain XSLT and XSD files.

:param name: The string name of a resource
:param pfx: An optional prefix to use in searching for name

    """
    name = os.path.expanduser(name)
    if os.path.exists(name):
        with open(name) as fd:
            return fd.read()
    elif pfx and os.path.exists(os.path.join(pfx, name)):
        with open(os.path.join(pfx, name)) as fd:
            return fd.read()
    elif pkg_resources.resource_exists(__name__, name):
        return pkg_resources.resource_string(__name__, name)
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
        return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))

    return None


def resource_filename(name, pfx=None):
    """
Attempt to find and return the filename of the resource named by the first argument
in the first location of:

# as name in the current directory
# as name in the `pfx` subdirectory of the current directory if provided
# as name relative to the package
# as pfx/name relative to the package

The last two alternatives is used to locate resources distributed in the package.
This includes certain XSLT and XSD files.

:param name: The string name of a resource
:param pfx: An optional prefix to use in searching for name

    """
    if os.path.exists(name):
        return name
    elif pfx and os.path.exists(os.path.join(pfx, name)):
        return os.path.join(pfx, name)
    elif pkg_resources.resource_exists(__name__, name):
        return pkg_resources.resource_filename(__name__, name)
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
        return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))

    return None


def dmerge(a, b):
    """
Deep merge of two isomorphically structured dictionaries.

:param a: The dictionary to merge into
:param b: The dictionary to merge from
    """
    for k in a:
        v = a[k]
        if isinstance(v, dict) and k in b:
            dmerge(v, b[k])
    a.update(b)


def tdelta(input):
    """
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.

:param input: A human-friendly string representation of a timedelta
    """
    keys = ["weeks", "days", "hours", "minutes"]
    regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
    kwargs = {}
    for k, v in re.match(regex, input).groupdict(default="0").items():
        kwargs[k] = int(v)
    return timedelta(**kwargs)


def dumptree(t, pretty_print=False, xml_declaration=True):
    """
Return a string representation of the tree, optionally pretty_print(ed) (default False)

:param t: An ElemenTree to serialize
    """
    return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)


def iso_now():
    """
Current time in ISO format
    """
    return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())


class ResourceResolver(etree.Resolver):
    def resolve(self, system_url, public_id, context):
        """
        Resolves URIs using the resource API
        """
        log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
        path = system_url.split("/")
        fn = path[len(path) - 1]
        if pkg_resources.resource_exists(__name__, fn):
            return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
        elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
            return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
        else:
            raise ValueError("Unable to locate %s" % fn)


_SCHEMA = None


def schema():
    global _SCHEMA
    if _SCHEMA is None:
        try:
            parser = etree.XMLParser()
            parser.resolvers.add(ResourceResolver())
            st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
            _SCHEMA = etree.XMLSchema(st)
        except etree.XMLSchemaParseError, ex:
            log.error(_e(ex.error_log))
            raise ex
    return _SCHEMA


def safe_write(fn, data):
    """Safely write data to a file with name fn
    :param fn: a filename
    :param data: some data to write
    :return: True or False depending on the outcome of the write
    """
    tmpn = None
    try:
        fn = os.path.expanduser(fn)
        dirname, basename = os.path.split(fn)
        with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
            tmp.write(data)
            tmpn = tmp.name
        if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
            os.rename(tmpn, fn)
            return True
    except Exception, ex:
        log.error(ex)
    finally:
        if tmpn is not None and os.path.exists(tmpn):
            try:
                os.unlink(tmpn)
            except Exception, ex:
                log.warn(ex)
                pass
    return False


site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])


def template(name):
    return templates.get_template(name)


class URLFetch(threading.Thread):
    def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
        self.url = url.strip()
        self.verify = verify
        self.id = id
        self.result = None
        self.ex = None
        self.cached = False
        self.enable_cache = enable_cache
        self.cache_ttl = 0
        self.last_modified = None
        self.date = None
        self.tries = 0
        self.resp = None
        self.start_time = 0
        self.end_time = 0
        self.tries = tries

        if self.id is None:
            self.id = self.url

        threading.Thread.__init__(self)

    def time(self):
        if self.isAlive():
            raise ValueError("caller attempted to obtain execution time while fetcher still active")
        return self.end_time - self.start_time

    def run(self):

        def _parse_date(str):
            if str is None:
                return datetime.new()
            return datetime(*parsedate(str)[:6])

        self.start_time = clock()
        try:
            cache = httplib2.FileCache(".cache")
            if not self.enable_cache:
                log.debug("removing '%s' from cache" % self.url)
                cache.delete(self.url)

            log.debug("fetching '%s'" % self.url)

            if self.url.startswith('file://'):
                path = self.url[7:]
                if not os.path.exists(path):
                    raise IOError("file not found: %s" % path)

                with open(path, 'r') as fd:
                    self.result = fd.read()
                    self.cached = False
                    self.date = datetime.now()
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
            else:
                try:
                    h = httplib2.Http(cache=cache, timeout=60,
                                      disable_ssl_certificate_validation=True)  # trust is done using signatures over here
                    resp, content = h.request(self.url)
                    self.status = resp.status
                    self.last_modified = _parse_date(resp.get('last-modified', resp.get('date', None)))
                    if resp.status != 200:
                        raise IOError(resp.reason)
                    self.result = content
                    self.cached = resp.fromcache
                except Exception, ex:
                    resp = requests.get(self.url)
                    self.status = resp.status_code
                    self.last_modified = _parse_date(resp.headers['last-modified'] or resp.headers['date'])
                    if resp.status_code != 200:
                        raise IOError(httplib.responses[resp.status_code])
                    self.result = resp.content
                    self.cached = False

            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
        except Exception, ex:
            #traceback.print_exc()
            #log.warn("unable to fetch '%s': %s" % (self.url, ex))
            self.ex = ex
            self.result = None
        finally:
            self.end_time = clock()


def root(t):
    if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
        return t.getroot()
    else:
        return t


def duration2timedelta(period):
    regex = re.compile(
        '(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')

    # Fetch the match groups with default value of 0 (not None)
    m = regex.match(period)
    if not m:
        return None

    duration = m.groupdict(0)

    # Create the timedelta object from extracted groups
    delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
                      hours=int(duration['hours']),
                      minutes=int(duration['minutes']),
                      seconds=int(duration['seconds']))

    if duration['sign'] == "-":
        delta *= -1

    return delta


def filter_lang(elts, langs=["en"]):
    def _l(elt):
        return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs

    if elts is None or len(elts) == 0:
        return []

    lst = filter(_l, elts)
    if lst:
        return lst
    else:
        return elts


def xslt_transform(t, stylesheet, params={}):
    xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
    transform = etree.XSLT(xsl)
    return transform(t, **params)


def total_seconds(dt):
    if hasattr(dt, "total_seconds"):
        return dt.total_seconds()
    else:
        return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6


1			"""
2
3			This module contains various utilities.
4
5			"""
6			from datetime import timedelta, datetime
7			import tempfile
8			import traceback
9			from mako.lookup import TemplateLookup
10			import os
11			import pkg_resources
12			import re
13			from lxml import etree
14			from time import gmtime, strftime, clock
15			from pyff.logs import log
16			import threading
17			import httplib2, httplib
18			import requests
19			from email.utils import parsedate
20
21			__author__ = 'leifj'
22
23
24			class PyffException(Exception):
25			pass
26
27
28			def _e(error_log, m=None):
29			def _f(x):
30			if ":WARNING:" in x:
31			return False
32			if m is not None and not m in x:
33			return False
34			return True
35
36			return "\n".join(filter(_f, ["%s" % e for e in error_log]))
37
38
39			def debug_observer(e):
40			log.error(repr(e))
41
42
43			def resource_string(name, pfx=None):
44			"""
45			Attempt to load and return the contents (as a string) of the resource named by
46			the first argument in the first location of:
47
48			# as name in the current directory
49			# as name in the `pfx` subdirectory of the current directory if provided
50			# as name relative to the package
51			# as pfx/name relative to the package
52
53			The last two alternatives is used to locate resources distributed in the package.
54			This includes certain XSLT and XSD files.
55
56			:param name: The string name of a resource
57			:param pfx: An optional prefix to use in searching for name
58
59			"""
60			name = os.path.expanduser(name)
61			if os.path.exists(name):
62			with open(name) as fd:
63			return fd.read()
64			elif pfx and os.path.exists(os.path.join(pfx, name)):
65			with open(os.path.join(pfx, name)) as fd:
66			return fd.read()
67			elif pkg_resources.resource_exists(__name__, name):
68			return pkg_resources.resource_string(__name__, name)
69			elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
70			return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))
71
72			return None
73
74
75			def resource_filename(name, pfx=None):
76			"""
77			Attempt to find and return the filename of the resource named by the first argument
78			in the first location of:
79
80			# as name in the current directory
81			# as name in the `pfx` subdirectory of the current directory if provided
82			# as name relative to the package
83			# as pfx/name relative to the package
84
85			The last two alternatives is used to locate resources distributed in the package.
86			This includes certain XSLT and XSD files.
87
88			:param name: The string name of a resource
89			:param pfx: An optional prefix to use in searching for name
90
91			"""
92			if os.path.exists(name):
93			return name
94			elif pfx and os.path.exists(os.path.join(pfx, name)):
95			return os.path.join(pfx, name)
96			elif pkg_resources.resource_exists(__name__, name):
97			return pkg_resources.resource_filename(__name__, name)
98			elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
99			return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))
100
101			return None
102
103
104			def dmerge(a, b):
105			"""
106			Deep merge of two isomorphically structured dictionaries.
107
108			:param a: The dictionary to merge into
109			:param b: The dictionary to merge from
110			"""
111			for k in a:
112			v = a[k]
113			if isinstance(v, dict) and k in b:
114			dmerge(v, b[k])
115			a.update(b)
116
117
118			def tdelta(input):
119			"""
120			Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.
121
122			:param input: A human-friendly string representation of a timedelta
123			"""
124			keys = ["weeks", "days", "hours", "minutes"]
125			regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
126			kwargs = {}
127			for k, v in re.match(regex, input).groupdict(default="0").items():
128			kwargs[k] = int(v)
129			return timedelta(**kwargs)
130
131
132			def dumptree(t, pretty_print=False, xml_declaration=True):
133			"""
134			Return a string representation of the tree, optionally pretty_print(ed) (default False)
135
136			:param t: An ElemenTree to serialize
137			"""
138			return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)
139
140
141			def iso_now():
142			"""
143			Current time in ISO format
144			"""
145			return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
146
147
148			class ResourceResolver(etree.Resolver):
149			def resolve(self, system_url, public_id, context):
150			"""
151			Resolves URIs using the resource API
152			"""
153			log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
154			path = system_url.split("/")
155			fn = path[len(path) - 1]
156			if pkg_resources.resource_exists(__name__, fn):
157			return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
158			elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
159			return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
160			else:
161			raise ValueError("Unable to locate %s" % fn)
162
163
164			_SCHEMA = None
165
166
167			def schema():
168			global _SCHEMA
169			if _SCHEMA is None:
170			try:
171			parser = etree.XMLParser()
172			parser.resolvers.add(ResourceResolver())
173			st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
174			_SCHEMA = etree.XMLSchema(st)
175			except etree.XMLSchemaParseError, ex:
176			log.error(_e(ex.error_log))
177			raise ex
178			return _SCHEMA
179
180
181			def safe_write(fn, data):
182			"""Safely write data to a file with name fn
183			:param fn: a filename
184			:param data: some data to write
185			:return: True or False depending on the outcome of the write
186			"""
187			tmpn = None
188			try:
189			fn = os.path.expanduser(fn)
190			dirname, basename = os.path.split(fn)
191			with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
192			tmp.write(data)
193			tmpn = tmp.name
194			if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
195			os.rename(tmpn, fn)
196			return True
197			except Exception, ex:
198			log.error(ex)
199			finally:
200			if tmpn is not None and os.path.exists(tmpn):
201			try:
202			os.unlink(tmpn)
203			except Exception, ex:
204			log.warn(ex)
205			pass
206			return False
207
208
209			site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
210			templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])
211
212
213			def template(name):
214			return templates.get_template(name)
215
216
217			class URLFetch(threading.Thread):
218			def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
219			self.url = url.strip()
220			self.verify = verify
221			self.id = id
222			self.result = None
223			self.ex = None
224			self.cached = False
225			self.enable_cache = enable_cache
226			self.cache_ttl = 0
227			self.last_modified = None
228			self.date = None
229			self.tries = 0
230			self.resp = None
231			self.start_time = 0
232			self.end_time = 0
233			self.tries = tries
234
235			if self.id is None:
236			self.id = self.url
237
238			threading.Thread.__init__(self)
239
240			def time(self):
241			if self.isAlive():
242			raise ValueError("caller attempted to obtain execution time while fetcher still active")
243			return self.end_time - self.start_time
244
245			def run(self):
246
247			def _parse_date(str):
248			if str is None:
249			return datetime.new()
250			return datetime(*parsedate(str)[:6])
251
252			self.start_time = clock()
253			try:
254			cache = httplib2.FileCache(".cache")
255			if not self.enable_cache:
256			log.debug("removing '%s' from cache" % self.url)
257			cache.delete(self.url)
258
259			log.debug("fetching '%s'" % self.url)
260
261			if self.url.startswith('file://'):
262			path = self.url[7:]
263			if not os.path.exists(path):
264			raise IOError("file not found: %s" % path)
265
266			with open(path, 'r') as fd:
267			self.result = fd.read()
268			self.cached = False
269			self.date = datetime.now()
270			self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
271			else:
272			try:
273			h = httplib2.Http(cache=cache, timeout=60,
274			disable_ssl_certificate_validation=True) # trust is done using signatures over here
275			resp, content = h.request(self.url)
276			self.status = resp.status
277			self.last_modified = _parse_date(resp.get('last-modified', resp.get('date', None)))
278			if resp.status != 200:
279			raise IOError(resp.reason)
280			self.result = content
281			self.cached = resp.fromcache
282			except Exception, ex:
283			resp = requests.get(self.url)
284			self.status = resp.status_code
285			self.last_modified = _parse_date(resp.headers['last-modified'] or resp.headers['date'])
286			if resp.status_code != 200:
287			raise IOError(httplib.responses[resp.status_code])
288			self.result = resp.content
289			self.cached = False
290
291			log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
292			except Exception, ex:
293			#traceback.print_exc()
294			#log.warn("unable to fetch '%s': %s" % (self.url, ex))
295			self.ex = ex
296			self.result = None
297			finally:
298			self.end_time = clock()
299
300
301			def root(t):
302			if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
303			return t.getroot()
304			else:
305			return t
306
307
308			def duration2timedelta(period):
309			regex = re.compile(
310			'(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')
311
312			# Fetch the match groups with default value of 0 (not None)
313			m = regex.match(period)
314			if not m:
315			return None
316
317			duration = m.groupdict(0)
318
319			# Create the timedelta object from extracted groups
320			delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
321			hours=int(duration['hours']),
322			minutes=int(duration['minutes']),
323			seconds=int(duration['seconds']))
324
325			if duration['sign'] == "-":
326			delta *= -1
327
328			return delta
329
330
331			def filter_lang(elts, langs=["en"]):
332			def _l(elt):
333			return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs
334
335			if elts is None or len(elts) == 0:
336			return []
337
338			lst = filter(_l, elts)
339			if lst:
340			return lst
341			else:
342			return elts
343
344
345			def xslt_transform(t, stylesheet, params={}):
346			xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
347			transform = etree.XSLT(xsl)
348			return transform(t, **params)
349
350
351			def total_seconds(dt):
352			if hasattr(dt, "total_seconds"):
353			return dt.total_seconds()
354			else:
355			return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 6) / 10 6
356

GEANT / met

Push — master ( b632c8...1f9ce5 )

safe_write() D

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like