ResourceResolver   A
last analyzed

Complexity

Total Complexity 3

Size/Duplication

Total Lines 14
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 3
c 1
b 0
f 0
dl 0
loc 14
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
A resolve() 0 13 3
1
"""
2
3
This module contains various utilities.
4
5
"""
6
from datetime import timedelta, datetime
7
import tempfile
8
import traceback
9
from mako.lookup import TemplateLookup
10
import os
11
import pkg_resources
12
import re
13
from lxml import etree
14
from time import gmtime, strftime, clock
15
from pyff.logs import log
16
import threading
17
import requests
18
import requests_cache
19
from email.utils import parsedate
20
21
__author__ = 'leifj'
22
23
24
class PyffException(Exception):
25
    pass
26
27
28
def _e(error_log, m=None):
29
    def _f(x):
30
        if ":WARNING:" in x:
31
            return False
32
        if m is not None and not m in x:
33
            return False
34
        return True
35
36
    return "\n".join(filter(_f, ["%s" % e for e in error_log]))
37
38
39
def debug_observer(e):
40
    log.error(repr(e))
41
42
43
def resource_string(name, pfx=None):
44
    """
45
Attempt to load and return the contents (as a string) of the resource named by
46
the first argument in the first location of:
47
48
# as name in the current directory
49
# as name in the `pfx` subdirectory of the current directory if provided
50
# as name relative to the package
51
# as pfx/name relative to the package
52
53
The last two alternatives is used to locate resources distributed in the package.
54
This includes certain XSLT and XSD files.
55
56
:param name: The string name of a resource
57
:param pfx: An optional prefix to use in searching for name
58
59
    """
60
    name = os.path.expanduser(name)
61
    if os.path.exists(name):
62
        with open(name) as fd:
63
            return fd.read()
64
    elif pfx and os.path.exists(os.path.join(pfx, name)):
65
        with open(os.path.join(pfx, name)) as fd:
66
            return fd.read()
67
    elif pkg_resources.resource_exists(__name__, name):
68
        return pkg_resources.resource_string(__name__, name)
69
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
70
        return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name))
71
72
    return None
73
74
75
def resource_filename(name, pfx=None):
76
    """
77
Attempt to find and return the filename of the resource named by the first argument
78
in the first location of:
79
80
# as name in the current directory
81
# as name in the `pfx` subdirectory of the current directory if provided
82
# as name relative to the package
83
# as pfx/name relative to the package
84
85
The last two alternatives is used to locate resources distributed in the package.
86
This includes certain XSLT and XSD files.
87
88
:param name: The string name of a resource
89
:param pfx: An optional prefix to use in searching for name
90
91
    """
92
    if os.path.exists(name):
93
        return name
94
    elif pfx and os.path.exists(os.path.join(pfx, name)):
95
        return os.path.join(pfx, name)
96
    elif pkg_resources.resource_exists(__name__, name):
97
        return pkg_resources.resource_filename(__name__, name)
98
    elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)):
99
        return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name))
100
101
    return None
102
103
104
def dmerge(a, b):
105
    """
106
Deep merge of two isomorphically structured dictionaries.
107
108
:param a: The dictionary to merge into
109
:param b: The dictionary to merge from
110
    """
111
    for k in a:
112
        v = a[k]
113
        if isinstance(v, dict) and k in b:
114
            dmerge(v, b[k])
115
    a.update(b)
116
117
118
def tdelta(input):
119
    """
120
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds.
121
122
:param input: A human-friendly string representation of a timedelta
123
    """
124
    keys = ["weeks", "days", "hours", "minutes"]
125
    regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys])
126
    kwargs = {}
127
    for k, v in re.match(regex, input).groupdict(default="0").items():
128
        kwargs[k] = int(v)
129
    return timedelta(**kwargs)
130
131
132
def dumptree(t, pretty_print=False, xml_declaration=True):
133
    """
134
Return a string representation of the tree, optionally pretty_print(ed) (default False)
135
136
:param t: An ElemenTree to serialize
137
    """
138
    return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print)
139
140
141
def iso_now():
142
    """
143
Current time in ISO format
144
    """
145
    return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime())
146
147
148
class ResourceResolver(etree.Resolver):
149
    def resolve(self, system_url, public_id, context):
150
        """
151
        Resolves URIs using the resource API
152
        """
153
        log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id))
154
        path = system_url.split("/")
155
        fn = path[len(path) - 1]
156
        if pkg_resources.resource_exists(__name__, fn):
157
            return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context)
158
        elif pkg_resources.resource_exists(__name__, "schema/%s" % fn):
159
            return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context)
160
        else:
161
            raise ValueError("Unable to locate %s" % fn)
162
163
164
_SCHEMA = None
165
166
167
def schema():
168
    global _SCHEMA
169
    if _SCHEMA is None:
170
        try:
171
            parser = etree.XMLParser()
172
            parser.resolvers.add(ResourceResolver())
173
            st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser)
174
            _SCHEMA = etree.XMLSchema(st)
175
        except etree.XMLSchemaParseError, ex:
176
            log.error(_e(ex.error_log))
177
            raise ex
178
    return _SCHEMA
179
180
181
def safe_write(fn, data):
182
    """Safely write data to a file with name fn
183
    :param fn: a filename
184
    :param data: some data to write
185
    :return: True or False depending on the outcome of the write
186
    """
187
    tmpn = None
188
    try:
189
        fn = os.path.expanduser(fn)
190
        dirname, basename = os.path.split(fn)
191
        with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp:
192
            tmp.write(data)
193
            tmpn = tmp.name
194
        if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0:
195
            os.rename(tmpn, fn)
196
            return True
197
    except Exception, ex:
198
        log.error(ex)
199
    finally:
200
        if tmpn is not None and os.path.exists(tmpn):
201
            try:
202
                os.unlink(tmpn)
203
            except Exception, ex:
204
                log.warn(ex)
205
                pass
206
    return False
207
208
209
site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site")
210
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')])
211
212
213
def template(name):
214
    return templates.get_template(name)
215
216
217
class URLFetch(threading.Thread):
218
    def __init__(self, url, verify, id=None, enable_cache=False, tries=0):
219
        self.url = url.strip()
220
        self.verify = verify
221
        self.id = id
222
        self.result = None
223
        self.ex = None
224
        self.cached = False
225
        self.enable_cache = enable_cache
226
        self.cache_ttl = 0
227
        self.last_modified = None
228
        self.date = None
229
        self.tries = 0
230
        self.resp = None
231
        self.start_time = 0
232
        self.end_time = 0
233
        self.tries = tries
234
235
        if self.id is None:
236
            self.id = self.url
237
238
        threading.Thread.__init__(self)
239
240
    def time(self):
241
        if self.isAlive():
242
            raise ValueError("caller attempted to obtain execution time while fetcher still active")
243
        return self.end_time - self.start_time
244
245
    def run(self):
246
247
        def _parse_date(str):
248
            if str is None:
249
                return datetime.new()
250
            return datetime(*parsedate(str)[:6])
251
252
        self.start_time = clock()
253
        try:
254
            requests_cache.install_cache('.cache')
255
            if not self.enable_cache:
256
                log.debug("removing '%s' from cache" % self.url)
257
                requests_cache.get_cache().delete_url(self.url)
258
259
            log.debug("fetching '%s'" % self.url)
260
261
            if self.url.startswith('file://'):
262
                path = self.url[7:]
263
                if not os.path.exists(path):
264
                    raise IOError("file not found: %s" % path)
265
266
                with open(path, 'r') as fd:
267
                    self.result = fd.read()
268
                    self.cached = False
269
                    self.date = datetime.now()
270
                    self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime)
271
            else:
272
                self.resp = requests.get(self.url, timeout=60, verify=False)
273
                self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None)))
274
                self.date = _parse_date(self.resp.headers['date'])
275
                self.cached = getattr(self.resp, 'from_cache', False)
276
                self.status = self.resp.status_code
277
                if self.resp.status_code != 200:
278
                    raise IOError(self.resp.reason)
279
                self.result = self.resp.content
280
281
            log.debug("got %d bytes from '%s'" % (len(self.result), self.url))
282
        except Exception, ex:
283
            traceback.print_exc()
284
            log.warn("unable to fetch '%s': %s" % (self.url, ex))
285
            self.ex = ex
286
            self.result = None
287
        finally:
288
            self.end_time = clock()
289
290
291
def root(t):
292
    if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'):
293
        return t.getroot()
294
    else:
295
        return t
296
297
298
def duration2timedelta(period):
299
    regex = re.compile(
300
        '(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?')
301
302
    # Fetch the match groups with default value of 0 (not None)
303
    m = regex.match(period)
304
    if not m:
305
        return None
306
307
    duration = m.groupdict(0)
308
309
    # Create the timedelta object from extracted groups
310
    delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365),
311
                      hours=int(duration['hours']),
312
                      minutes=int(duration['minutes']),
313
                      seconds=int(duration['seconds']))
314
315
    if duration['sign'] == "-":
316
        delta *= -1
317
318
    return delta
319
320
321
def filter_lang(elts, langs=["en"]):
322
    def _l(elt):
323
        return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs
324
325
    if elts is None or len(elts) == 0:
326
        return []
327
328
    lst = filter(_l, elts)
329
    if lst:
330
        return lst
331
    else:
332
        return elts
333
334
335
def xslt_transform(t, stylesheet, params={}):
336
    xsl = etree.fromstring(resource_string(stylesheet, "xslt"))
337
    transform = etree.XSLT(xsl)
338
    return transform(t, **params)
339
340
341
def total_seconds(dt):
342
    if hasattr(dt, "total_seconds"):
343
        return dt.total_seconds()
344
    else:
345
        return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6
346