Completed
Push — master ( b632c8...1f9ce5 )
by Andrea
01:18
created

safe_write()   D

Complexity

Conditions 8

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 26
rs 4
cc 8
1
"""
2
3
This module contains various utilities.
4
5
"""
6
from datetime import timedelta, datetime
7
import tempfile
8
import traceback
9
from mako.lookup import TemplateLookup
10
import os
11
import pkg_resources
12
import re
13
from lxml import etree
14
from time import gmtime, strftime, clock
15
from pyff.logs import log
16
import threading
17
import httplib2, httplib
18
import requests
19
from email.utils import parsedate
20
21
__author__ = 'leifj'
22
23
24
class PyffException(Exception):
25
    pass
26
27
28
def _e(error_log, m=None):
29
    def _f(x):
30
        if ":WARNING:" in x:
31
            return False
32
        if m is not None and not m in x:
33
            return False
34
        return True
35
36
    return "\n".join(filter(_f, ["%s" % e for e in error_log]))
37
38
39
def debug_observer(e):
40
    log.error(repr(e))
41
42
43
def resource_string(name, pfx=None):
44
    """
45
Attempt to load and return the contents (as a string) of the resource named by
46
the first argument in the first location of:
47
48
# as name in the current directory
49
# as name in the `pfx` subdirectory of the current directory if provided
50
# as name relative to the package
51
# as pfx/name relative to the package
52
53
The last two alternatives is used to locate resources distributed in the package.
54
This includes certain XSLT and XSD files.
55
56
:param name: The string name of a resource
57
:param pfx: An optional prefix to use in searching for name
58
59
    """
60
    name = os.path.expanduser(name)
61
    if os.path.exists(name):
62
        with open(name) as fd:
63
            return fd.read()
64
    elif pfx and os.path.exists(os.path.join(pfx, name)):
65
        with open(os.path.join(pfx, name)) as fd:
66
            return fd.read()
67
    elif pkg_resources.resource_exists(__name__, name):
68
        return pkg_resources.resource_string(__name__, name)
69
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
70
        return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))
71
72
    return None
73
74
75
def resource_filename(name, pfx=None):
76
    """
77
Attempt to find and return the filename of the resource named by the first argument
78
in the first location of:
79
80
# as name in the current directory
81
# as name in the `pfx` subdirectory of the current directory if provided
82
# as name relative to the package
83
# as pfx/name relative to the package
84
85
The last two alternatives is used to locate resources distributed in the package.
86
This includes certain XSLT and XSD files.
87
88
:param name: The string name of a resource
89
:param pfx: An optional prefix to use in searching for name
90
91
    """
92
    if os.path.exists(name):
93
        return name
94
    elif pfx and os.path.exists(os.path.join(pfx, name)):
95
        return os.path.join(pfx, name)
96
    elif pkg_resources.resource_exists(__name__, name):
97
        return pkg_resources.resource_filename(__name__, name)
98
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
99
        return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))
100
101
    return None
102
103
104
def dmerge(a, b):
105
    """
106
Deep merge of two isomorphically structured dictionaries.
107
108
:param a: The dictionary to merge into
109
:param b: The dictionary to merge from
110
    """
111
    for k in a:
112
        v = a[k]
113
        if isinstance(v, dict) and k in b:
114
            dmerge(v, b[k])
115
    a.update(b)
116
117
118
def tdelta(input):
119
    """
120
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.
121
122
:param input: A human-friendly string representation of a timedelta
123
    """
124
    keys = ["weeks", "days", "hours", "minutes"]
125
    regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
126
    kwargs = {}
127
    for k, v in re.match(regex, input).groupdict(default="0").items():
128
        kwargs[k] = int(v)
129
    return timedelta(**kwargs)
130
131
132
def dumptree(t, pretty_print=False, xml_declaration=True):
133
    """
134
Return a string representation of the tree, optionally pretty_print(ed) (default False)
135
136
:param t: An ElemenTree to serialize
137
    """
138
    return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)
139
140
141
def iso_now():
142
    """
143
Current time in ISO format
144
    """
145
    return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
146
147
148
class ResourceResolver(etree.Resolver):
149
    def resolve(self, system_url, public_id, context):
150
        """
151
        Resolves URIs using the resource API
152
        """
153
        log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
154
        path = system_url.split("/")
155
        fn = path[len(path) - 1]
156
        if pkg_resources.resource_exists(__name__, fn):
157
            return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
158
        elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
159
            return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
160
        else:
161
            raise ValueError("Unable to locate %s" % fn)
162
163
164
_SCHEMA = None
165
166
167
def schema():
168
    global _SCHEMA
169
    if _SCHEMA is None:
170
        try:
171
            parser = etree.XMLParser()
172
            parser.resolvers.add(ResourceResolver())
173
            st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
174
            _SCHEMA = etree.XMLSchema(st)
175
        except etree.XMLSchemaParseError, ex:
176
            log.error(_e(ex.error_log))
177
            raise ex
178
    return _SCHEMA
179
180
181
def safe_write(fn, data):
182
    """Safely write data to a file with name fn
183
    :param fn: a filename
184
    :param data: some data to write
185
    :return: True or False depending on the outcome of the write
186
    """
187
    tmpn = None
188
    try:
189
        fn = os.path.expanduser(fn)
190
        dirname, basename = os.path.split(fn)
191
        with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
192
            tmp.write(data)
193
            tmpn = tmp.name
194
        if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
195
            os.rename(tmpn, fn)
196
            return True
197
    except Exception, ex:
198
        log.error(ex)
199
    finally:
200
        if tmpn is not None and os.path.exists(tmpn):
201
            try:
202
                os.unlink(tmpn)
203
            except Exception, ex:
204
                log.warn(ex)
205
                pass
206
    return False
207
208
209
site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
210
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])
211
212
213
def template(name):
214
    return templates.get_template(name)
215
216
217
class URLFetch(threading.Thread):
218
    def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
219
        self.url = url.strip()
220
        self.verify = verify
221
        self.id = id
222
        self.result = None
223
        self.ex = None
224
        self.cached = False
225
        self.enable_cache = enable_cache
226
        self.cache_ttl = 0
227
        self.last_modified = None
228
        self.date = None
229
        self.tries = 0
230
        self.resp = None
231
        self.start_time = 0
232
        self.end_time = 0
233
        self.tries = tries
234
235
        if self.id is None:
236
            self.id = self.url
237
238
        threading.Thread.__init__(self)
239
240
    def time(self):
241
        if self.isAlive():
242
            raise ValueError("caller attempted to obtain execution time while fetcher still active")
243
        return self.end_time - self.start_time
244
245
    def run(self):
246
247
        def _parse_date(str):
248
            if str is None:
249
                return datetime.new()
250
            return datetime(*parsedate(str)[:6])
251
252
        self.start_time = clock()
253
        try:
254
            cache = httplib2.FileCache(".cache")
255
            if not self.enable_cache:
256
                log.debug("removing '%s' from cache" % self.url)
257
                cache.delete(self.url)
258
259
            log.debug("fetching '%s'" % self.url)
260
261
            if self.url.startswith('file://'):
262
                path = self.url[7:]
263
                if not os.path.exists(path):
264
                    raise IOError("file not found: %s" % path)
265
266
                with open(path, 'r') as fd:
267
                    self.result = fd.read()
268
                    self.cached = False
269
                    self.date = datetime.now()
270
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
271
            else:
272
                try:
273
                    h = httplib2.Http(cache=cache, timeout=60,
274
                                      disable_ssl_certificate_validation=True)  # trust is done using signatures over here
275
                    resp, content = h.request(self.url)
276
                    self.status = resp.status
277
                    self.last_modified = _parse_date(resp.get('last-modified', resp.get('date', None)))
278
                    if resp.status != 200:
279
                        raise IOError(resp.reason)
280
                    self.result = content
281
                    self.cached = resp.fromcache
282
                except Exception, ex:
283
                    resp = requests.get(self.url)
284
                    self.status = resp.status_code
285
                    self.last_modified = _parse_date(resp.headers['last-modified'] or resp.headers['date'])
286
                    if resp.status_code != 200:
287
                        raise IOError(httplib.responses[resp.status_code])
288
                    self.result = resp.content
289
                    self.cached = False
290
291
            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
292
        except Exception, ex:
293
            #traceback.print_exc()
294
            #log.warn("unable to fetch '%s': %s" % (self.url, ex))
295
            self.ex = ex
296
            self.result = None
297
        finally:
298
            self.end_time = clock()
299
300
301
def root(t):
302
    if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
303
        return t.getroot()
304
    else:
305
        return t
306
307
308
def duration2timedelta(period):
309
    regex = re.compile(
310
        '(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')
311
312
    # Fetch the match groups with default value of 0 (not None)
313
    m = regex.match(period)
314
    if not m:
315
        return None
316
317
    duration = m.groupdict(0)
318
319
    # Create the timedelta object from extracted groups
320
    delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
321
                      hours=int(duration['hours']),
322
                      minutes=int(duration['minutes']),
323
                      seconds=int(duration['seconds']))
324
325
    if duration['sign'] == "-":
326
        delta *= -1
327
328
    return delta
329
330
331
def filter_lang(elts, langs=["en"]):
332
    def _l(elt):
333
        return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs
334
335
    if elts is None or len(elts) == 0:
336
        return []
337
338
    lst = filter(_l, elts)
339
    if lst:
340
        return lst
341
    else:
342
        return elts
343
344
345
def xslt_transform(t, stylesheet, params={}):
346
    xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
347
    transform = etree.XSLT(xsl)
348
    return transform(t, **params)
349
350
351
def total_seconds(dt):
352
    if hasattr(dt, "total_seconds"):
353
        return dt.total_seconds()
354
    else:
355
        return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6
356