|
1
|
|
|
""" |
|
2
|
|
|
|
|
3
|
|
|
This module contains various utilities. |
|
4
|
|
|
|
|
5
|
|
|
""" |
|
6
|
|
|
from datetime import timedelta, datetime |
|
7
|
|
|
import tempfile |
|
8
|
|
|
import traceback |
|
9
|
|
|
from mako.lookup import TemplateLookup |
|
10
|
|
|
import os |
|
11
|
|
|
import pkg_resources |
|
12
|
|
|
import re |
|
13
|
|
|
from lxml import etree |
|
14
|
|
|
from time import gmtime, strftime, clock |
|
15
|
|
|
from pyff.logs import log |
|
16
|
|
|
import threading |
|
17
|
|
|
import requests |
|
18
|
|
|
import requests_cache |
|
19
|
|
|
from email.utils import parsedate |
|
20
|
|
|
|
|
21
|
|
|
__author__ = 'leifj' |
|
22
|
|
|
|
|
23
|
|
|
|
|
24
|
|
|
class PyffException(Exception): |
|
25
|
|
|
pass |
|
26
|
|
|
|
|
27
|
|
|
|
|
28
|
|
|
def _e(error_log, m=None): |
|
29
|
|
|
def _f(x): |
|
30
|
|
|
if ":WARNING:" in x: |
|
31
|
|
|
return False |
|
32
|
|
|
if m is not None and not m in x: |
|
33
|
|
|
return False |
|
34
|
|
|
return True |
|
35
|
|
|
|
|
36
|
|
|
return "\n".join(filter(_f, ["%s" % e for e in error_log])) |
|
37
|
|
|
|
|
38
|
|
|
|
|
39
|
|
|
def debug_observer(e): |
|
40
|
|
|
log.error(repr(e)) |
|
41
|
|
|
|
|
42
|
|
|
|
|
43
|
|
|
def resource_string(name, pfx=None): |
|
44
|
|
|
""" |
|
45
|
|
|
Attempt to load and return the contents (as a string) of the resource named by |
|
46
|
|
|
the first argument in the first location of: |
|
47
|
|
|
|
|
48
|
|
|
# as name in the current directory |
|
49
|
|
|
# as name in the `pfx` subdirectory of the current directory if provided |
|
50
|
|
|
# as name relative to the package |
|
51
|
|
|
# as pfx/name relative to the package |
|
52
|
|
|
|
|
53
|
|
|
The last two alternatives is used to locate resources distributed in the package. |
|
54
|
|
|
This includes certain XSLT and XSD files. |
|
55
|
|
|
|
|
56
|
|
|
:param name: The string name of a resource |
|
57
|
|
|
:param pfx: An optional prefix to use in searching for name |
|
58
|
|
|
|
|
59
|
|
|
""" |
|
60
|
|
|
name = os.path.expanduser(name) |
|
61
|
|
|
if os.path.exists(name): |
|
62
|
|
|
with open(name) as fd: |
|
63
|
|
|
return fd.read() |
|
64
|
|
|
elif pfx and os.path.exists(os.path.join(pfx, name)): |
|
65
|
|
|
with open(os.path.join(pfx, name)) as fd: |
|
66
|
|
|
return fd.read() |
|
67
|
|
|
elif pkg_resources.resource_exists(__name__, name): |
|
68
|
|
|
return pkg_resources.resource_string(__name__, name) |
|
69
|
|
|
elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)): |
|
70
|
|
|
return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name)) |
|
71
|
|
|
|
|
72
|
|
|
return None |
|
73
|
|
|
|
|
74
|
|
|
|
|
75
|
|
|
def resource_filename(name, pfx=None): |
|
76
|
|
|
""" |
|
77
|
|
|
Attempt to find and return the filename of the resource named by the first argument |
|
78
|
|
|
in the first location of: |
|
79
|
|
|
|
|
80
|
|
|
# as name in the current directory |
|
81
|
|
|
# as name in the `pfx` subdirectory of the current directory if provided |
|
82
|
|
|
# as name relative to the package |
|
83
|
|
|
# as pfx/name relative to the package |
|
84
|
|
|
|
|
85
|
|
|
The last two alternatives is used to locate resources distributed in the package. |
|
86
|
|
|
This includes certain XSLT and XSD files. |
|
87
|
|
|
|
|
88
|
|
|
:param name: The string name of a resource |
|
89
|
|
|
:param pfx: An optional prefix to use in searching for name |
|
90
|
|
|
|
|
91
|
|
|
""" |
|
92
|
|
|
if os.path.exists(name): |
|
93
|
|
|
return name |
|
94
|
|
|
elif pfx and os.path.exists(os.path.join(pfx, name)): |
|
95
|
|
|
return os.path.join(pfx, name) |
|
96
|
|
|
elif pkg_resources.resource_exists(__name__, name): |
|
97
|
|
|
return pkg_resources.resource_filename(__name__, name) |
|
98
|
|
|
elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)): |
|
99
|
|
|
return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name)) |
|
100
|
|
|
|
|
101
|
|
|
return None |
|
102
|
|
|
|
|
103
|
|
|
|
|
104
|
|
|
def dmerge(a, b): |
|
105
|
|
|
""" |
|
106
|
|
|
Deep merge of two isomorphically structured dictionaries. |
|
107
|
|
|
|
|
108
|
|
|
:param a: The dictionary to merge into |
|
109
|
|
|
:param b: The dictionary to merge from |
|
110
|
|
|
""" |
|
111
|
|
|
for k in a: |
|
112
|
|
|
v = a[k] |
|
113
|
|
|
if isinstance(v, dict) and k in b: |
|
114
|
|
|
dmerge(v, b[k]) |
|
115
|
|
|
a.update(b) |
|
116
|
|
|
|
|
117
|
|
|
|
|
118
|
|
|
def tdelta(input): |
|
119
|
|
|
""" |
|
120
|
|
|
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds. |
|
121
|
|
|
|
|
122
|
|
|
:param input: A human-friendly string representation of a timedelta |
|
123
|
|
|
""" |
|
124
|
|
|
keys = ["weeks", "days", "hours", "minutes"] |
|
125
|
|
|
regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys]) |
|
126
|
|
|
kwargs = {} |
|
127
|
|
|
for k, v in re.match(regex, input).groupdict(default="0").items(): |
|
128
|
|
|
kwargs[k] = int(v) |
|
129
|
|
|
return timedelta(**kwargs) |
|
130
|
|
|
|
|
131
|
|
|
|
|
132
|
|
|
def dumptree(t, pretty_print=False, xml_declaration=True): |
|
133
|
|
|
""" |
|
134
|
|
|
Return a string representation of the tree, optionally pretty_print(ed) (default False) |
|
135
|
|
|
|
|
136
|
|
|
:param t: An ElemenTree to serialize |
|
137
|
|
|
""" |
|
138
|
|
|
return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print) |
|
139
|
|
|
|
|
140
|
|
|
|
|
141
|
|
|
def iso_now(): |
|
142
|
|
|
""" |
|
143
|
|
|
Current time in ISO format |
|
144
|
|
|
""" |
|
145
|
|
|
return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) |
|
146
|
|
|
|
|
147
|
|
|
|
|
148
|
|
|
class ResourceResolver(etree.Resolver): |
|
149
|
|
|
def resolve(self, system_url, public_id, context): |
|
150
|
|
|
""" |
|
151
|
|
|
Resolves URIs using the resource API |
|
152
|
|
|
""" |
|
153
|
|
|
log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id)) |
|
154
|
|
|
path = system_url.split("/") |
|
155
|
|
|
fn = path[len(path) - 1] |
|
156
|
|
|
if pkg_resources.resource_exists(__name__, fn): |
|
157
|
|
|
return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context) |
|
158
|
|
|
elif pkg_resources.resource_exists(__name__, "schema/%s" % fn): |
|
159
|
|
|
return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context) |
|
160
|
|
|
else: |
|
161
|
|
|
raise ValueError("Unable to locate %s" % fn) |
|
162
|
|
|
|
|
163
|
|
|
|
|
164
|
|
|
_SCHEMA = None |
|
165
|
|
|
|
|
166
|
|
|
|
|
167
|
|
|
def schema(): |
|
168
|
|
|
global _SCHEMA |
|
169
|
|
|
if _SCHEMA is None: |
|
170
|
|
|
try: |
|
171
|
|
|
parser = etree.XMLParser() |
|
172
|
|
|
parser.resolvers.add(ResourceResolver()) |
|
173
|
|
|
st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser) |
|
174
|
|
|
_SCHEMA = etree.XMLSchema(st) |
|
175
|
|
|
except etree.XMLSchemaParseError, ex: |
|
176
|
|
|
log.error(_e(ex.error_log)) |
|
177
|
|
|
raise ex |
|
178
|
|
|
return _SCHEMA |
|
179
|
|
|
|
|
180
|
|
|
|
|
181
|
|
|
def safe_write(fn, data): |
|
182
|
|
|
"""Safely write data to a file with name fn |
|
183
|
|
|
:param fn: a filename |
|
184
|
|
|
:param data: some data to write |
|
185
|
|
|
:return: True or False depending on the outcome of the write |
|
186
|
|
|
""" |
|
187
|
|
|
tmpn = None |
|
188
|
|
|
try: |
|
189
|
|
|
fn = os.path.expanduser(fn) |
|
190
|
|
|
dirname, basename = os.path.split(fn) |
|
191
|
|
|
with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp: |
|
192
|
|
|
tmp.write(data) |
|
193
|
|
|
tmpn = tmp.name |
|
194
|
|
|
if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0: |
|
195
|
|
|
os.rename(tmpn, fn) |
|
196
|
|
|
return True |
|
197
|
|
|
except Exception, ex: |
|
198
|
|
|
log.error(ex) |
|
199
|
|
|
finally: |
|
200
|
|
|
if tmpn is not None and os.path.exists(tmpn): |
|
201
|
|
|
try: |
|
202
|
|
|
os.unlink(tmpn) |
|
203
|
|
|
except Exception, ex: |
|
204
|
|
|
log.warn(ex) |
|
205
|
|
|
pass |
|
206
|
|
|
return False |
|
207
|
|
|
|
|
208
|
|
|
|
|
209
|
|
|
site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site") |
|
210
|
|
|
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')]) |
|
211
|
|
|
|
|
212
|
|
|
|
|
213
|
|
|
def template(name): |
|
214
|
|
|
return templates.get_template(name) |
|
215
|
|
|
|
|
216
|
|
|
|
|
217
|
|
|
class URLFetch(threading.Thread): |
|
218
|
|
|
def __init__(self, url, verify, id=None, enable_cache=False, tries=0): |
|
219
|
|
|
self.url = url.strip() |
|
220
|
|
|
self.verify = verify |
|
221
|
|
|
self.id = id |
|
222
|
|
|
self.result = None |
|
223
|
|
|
self.ex = None |
|
224
|
|
|
self.cached = False |
|
225
|
|
|
self.enable_cache = enable_cache |
|
226
|
|
|
self.cache_ttl = 0 |
|
227
|
|
|
self.last_modified = None |
|
228
|
|
|
self.date = None |
|
229
|
|
|
self.tries = 0 |
|
230
|
|
|
self.resp = None |
|
231
|
|
|
self.start_time = 0 |
|
232
|
|
|
self.end_time = 0 |
|
233
|
|
|
self.tries = tries |
|
234
|
|
|
|
|
235
|
|
|
if self.id is None: |
|
236
|
|
|
self.id = self.url |
|
237
|
|
|
|
|
238
|
|
|
threading.Thread.__init__(self) |
|
239
|
|
|
|
|
240
|
|
|
def time(self): |
|
241
|
|
|
if self.isAlive(): |
|
242
|
|
|
raise ValueError("caller attempted to obtain execution time while fetcher still active") |
|
243
|
|
|
return self.end_time - self.start_time |
|
244
|
|
|
|
|
245
|
|
|
def run(self): |
|
246
|
|
|
|
|
247
|
|
|
def _parse_date(str): |
|
248
|
|
|
if str is None: |
|
249
|
|
|
return datetime.new() |
|
250
|
|
|
return datetime(*parsedate(str)[:6]) |
|
251
|
|
|
|
|
252
|
|
|
self.start_time = clock() |
|
253
|
|
|
try: |
|
254
|
|
|
requests_cache.install_cache('.cache') |
|
255
|
|
|
if not self.enable_cache: |
|
256
|
|
|
log.debug("removing '%s' from cache" % self.url) |
|
257
|
|
|
requests_cache.get_cache().delete_url(self.url) |
|
258
|
|
|
|
|
259
|
|
|
log.debug("fetching '%s'" % self.url) |
|
260
|
|
|
|
|
261
|
|
|
if self.url.startswith('file://'): |
|
262
|
|
|
path = self.url[7:] |
|
263
|
|
|
if not os.path.exists(path): |
|
264
|
|
|
raise IOError("file not found: %s" % path) |
|
265
|
|
|
|
|
266
|
|
|
with open(path, 'r') as fd: |
|
267
|
|
|
self.result = fd.read() |
|
268
|
|
|
self.cached = False |
|
269
|
|
|
self.date = datetime.now() |
|
270
|
|
|
self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime) |
|
271
|
|
|
else: |
|
272
|
|
|
self.resp = requests.get(self.url, timeout=60, verify=False) |
|
273
|
|
|
self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None))) |
|
274
|
|
|
self.date = _parse_date(self.resp.headers['date']) |
|
275
|
|
|
self.cached = getattr(self.resp, 'from_cache', False) |
|
276
|
|
|
self.status = self.resp.status_code |
|
277
|
|
|
if self.resp.status_code != 200: |
|
278
|
|
|
raise IOError(self.resp.reason) |
|
279
|
|
|
self.result = self.resp.content |
|
280
|
|
|
|
|
281
|
|
|
log.debug("got %d bytes from '%s'" % (len(self.result), self.url)) |
|
282
|
|
|
except Exception, ex: |
|
283
|
|
|
traceback.print_exc() |
|
284
|
|
|
log.warn("unable to fetch '%s': %s" % (self.url, ex)) |
|
285
|
|
|
self.ex = ex |
|
286
|
|
|
self.result = None |
|
287
|
|
|
finally: |
|
288
|
|
|
self.end_time = clock() |
|
289
|
|
|
|
|
290
|
|
|
|
|
291
|
|
|
def root(t): |
|
292
|
|
|
if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'): |
|
293
|
|
|
return t.getroot() |
|
294
|
|
|
else: |
|
295
|
|
|
return t |
|
296
|
|
|
|
|
297
|
|
|
|
|
298
|
|
|
def duration2timedelta(period): |
|
299
|
|
|
regex = re.compile( |
|
300
|
|
|
'(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?') |
|
301
|
|
|
|
|
302
|
|
|
# Fetch the match groups with default value of 0 (not None) |
|
303
|
|
|
m = regex.match(period) |
|
304
|
|
|
if not m: |
|
305
|
|
|
return None |
|
306
|
|
|
|
|
307
|
|
|
duration = m.groupdict(0) |
|
308
|
|
|
|
|
309
|
|
|
# Create the timedelta object from extracted groups |
|
310
|
|
|
delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365), |
|
311
|
|
|
hours=int(duration['hours']), |
|
312
|
|
|
minutes=int(duration['minutes']), |
|
313
|
|
|
seconds=int(duration['seconds'])) |
|
314
|
|
|
|
|
315
|
|
|
if duration['sign'] == "-": |
|
316
|
|
|
delta *= -1 |
|
317
|
|
|
|
|
318
|
|
|
return delta |
|
319
|
|
|
|
|
320
|
|
|
|
|
321
|
|
|
def filter_lang(elts, langs=["en"]): |
|
322
|
|
|
def _l(elt): |
|
323
|
|
|
return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs |
|
324
|
|
|
|
|
325
|
|
|
if elts is None or len(elts) == 0: |
|
326
|
|
|
return [] |
|
327
|
|
|
|
|
328
|
|
|
lst = filter(_l, elts) |
|
329
|
|
|
if lst: |
|
330
|
|
|
return lst |
|
331
|
|
|
else: |
|
332
|
|
|
return elts |
|
333
|
|
|
|
|
334
|
|
|
|
|
335
|
|
|
def xslt_transform(t, stylesheet, params={}): |
|
336
|
|
|
xsl = etree.fromstring(resource_string(stylesheet, "xslt")) |
|
337
|
|
|
transform = etree.XSLT(xsl) |
|
338
|
|
|
return transform(t, **params) |
|
339
|
|
|
|
|
340
|
|
|
|
|
341
|
|
|
def total_seconds(dt): |
|
342
|
|
|
if hasattr(dt, "total_seconds"): |
|
343
|
|
|
return dt.total_seconds() |
|
344
|
|
|
else: |
|
345
|
|
|
return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6 |
|
346
|
|
|
|