1
|
|
|
""" |
2
|
|
|
|
3
|
|
|
This module contains various utilities. |
4
|
|
|
|
5
|
|
|
""" |
6
|
|
|
from datetime import timedelta, datetime |
7
|
|
|
import tempfile |
8
|
|
|
import traceback |
9
|
|
|
from mako.lookup import TemplateLookup |
10
|
|
|
import os |
11
|
|
|
import pkg_resources |
12
|
|
|
import re |
13
|
|
|
from lxml import etree |
14
|
|
|
from time import gmtime, strftime, clock |
15
|
|
|
from pyff.logs import log |
16
|
|
|
import threading |
17
|
|
|
import requests |
18
|
|
|
import requests_cache |
19
|
|
|
from email.utils import parsedate |
20
|
|
|
|
21
|
|
|
__author__ = 'leifj' |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
class PyffException(Exception): |
25
|
|
|
pass |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
def _e(error_log, m=None): |
29
|
|
|
def _f(x): |
30
|
|
|
if ":WARNING:" in x: |
31
|
|
|
return False |
32
|
|
|
if m is not None and not m in x: |
33
|
|
|
return False |
34
|
|
|
return True |
35
|
|
|
|
36
|
|
|
return "\n".join(filter(_f, ["%s" % e for e in error_log])) |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def debug_observer(e): |
40
|
|
|
log.error(repr(e)) |
41
|
|
|
|
42
|
|
|
|
43
|
|
|
def resource_string(name, pfx=None): |
44
|
|
|
""" |
45
|
|
|
Attempt to load and return the contents (as a string) of the resource named by |
46
|
|
|
the first argument in the first location of: |
47
|
|
|
|
48
|
|
|
# as name in the current directory |
49
|
|
|
# as name in the `pfx` subdirectory of the current directory if provided |
50
|
|
|
# as name relative to the package |
51
|
|
|
# as pfx/name relative to the package |
52
|
|
|
|
53
|
|
|
The last two alternatives is used to locate resources distributed in the package. |
54
|
|
|
This includes certain XSLT and XSD files. |
55
|
|
|
|
56
|
|
|
:param name: The string name of a resource |
57
|
|
|
:param pfx: An optional prefix to use in searching for name |
58
|
|
|
|
59
|
|
|
""" |
60
|
|
|
name = os.path.expanduser(name) |
61
|
|
|
if os.path.exists(name): |
62
|
|
|
with open(name) as fd: |
63
|
|
|
return fd.read() |
64
|
|
|
elif pfx and os.path.exists(os.path.join(pfx, name)): |
65
|
|
|
with open(os.path.join(pfx, name)) as fd: |
66
|
|
|
return fd.read() |
67
|
|
|
elif pkg_resources.resource_exists(__name__, name): |
68
|
|
|
return pkg_resources.resource_string(__name__, name) |
69
|
|
|
elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)): |
70
|
|
|
return pkg_resources.resource_string(__name__, "%s/%s" % (pfx, name)) |
71
|
|
|
|
72
|
|
|
return None |
73
|
|
|
|
74
|
|
|
|
75
|
|
|
def resource_filename(name, pfx=None): |
76
|
|
|
""" |
77
|
|
|
Attempt to find and return the filename of the resource named by the first argument |
78
|
|
|
in the first location of: |
79
|
|
|
|
80
|
|
|
# as name in the current directory |
81
|
|
|
# as name in the `pfx` subdirectory of the current directory if provided |
82
|
|
|
# as name relative to the package |
83
|
|
|
# as pfx/name relative to the package |
84
|
|
|
|
85
|
|
|
The last two alternatives is used to locate resources distributed in the package. |
86
|
|
|
This includes certain XSLT and XSD files. |
87
|
|
|
|
88
|
|
|
:param name: The string name of a resource |
89
|
|
|
:param pfx: An optional prefix to use in searching for name |
90
|
|
|
|
91
|
|
|
""" |
92
|
|
|
if os.path.exists(name): |
93
|
|
|
return name |
94
|
|
|
elif pfx and os.path.exists(os.path.join(pfx, name)): |
95
|
|
|
return os.path.join(pfx, name) |
96
|
|
|
elif pkg_resources.resource_exists(__name__, name): |
97
|
|
|
return pkg_resources.resource_filename(__name__, name) |
98
|
|
|
elif pfx and pkg_resources.resource_exists(__name__, "%s/%s" % (pfx, name)): |
99
|
|
|
return pkg_resources.resource_filename(__name__, "%s/%s" % (pfx, name)) |
100
|
|
|
|
101
|
|
|
return None |
102
|
|
|
|
103
|
|
|
|
104
|
|
|
def dmerge(a, b): |
105
|
|
|
""" |
106
|
|
|
Deep merge of two isomorphically structured dictionaries. |
107
|
|
|
|
108
|
|
|
:param a: The dictionary to merge into |
109
|
|
|
:param b: The dictionary to merge from |
110
|
|
|
""" |
111
|
|
|
for k in a: |
112
|
|
|
v = a[k] |
113
|
|
|
if isinstance(v, dict) and k in b: |
114
|
|
|
dmerge(v, b[k]) |
115
|
|
|
a.update(b) |
116
|
|
|
|
117
|
|
|
|
118
|
|
|
def tdelta(input): |
119
|
|
|
""" |
120
|
|
|
Parse a time delta from expressions like 1w 32d 4h 5s - i.e in weeks, days hours and/or seconds. |
121
|
|
|
|
122
|
|
|
:param input: A human-friendly string representation of a timedelta |
123
|
|
|
""" |
124
|
|
|
keys = ["weeks", "days", "hours", "minutes"] |
125
|
|
|
regex = "".join(["((?P<%s>\d+)%s ?)?" % (k, k[0]) for k in keys]) |
126
|
|
|
kwargs = {} |
127
|
|
|
for k, v in re.match(regex, input).groupdict(default="0").items(): |
128
|
|
|
kwargs[k] = int(v) |
129
|
|
|
return timedelta(**kwargs) |
130
|
|
|
|
131
|
|
|
|
132
|
|
|
def dumptree(t, pretty_print=False, xml_declaration=True): |
133
|
|
|
""" |
134
|
|
|
Return a string representation of the tree, optionally pretty_print(ed) (default False) |
135
|
|
|
|
136
|
|
|
:param t: An ElemenTree to serialize |
137
|
|
|
""" |
138
|
|
|
return etree.tostring(t, encoding='UTF-8', xml_declaration=xml_declaration, pretty_print=pretty_print) |
139
|
|
|
|
140
|
|
|
|
141
|
|
|
def iso_now(): |
142
|
|
|
""" |
143
|
|
|
Current time in ISO format |
144
|
|
|
""" |
145
|
|
|
return strftime("%Y-%m-%dT%H:%M:%SZ", gmtime()) |
146
|
|
|
|
147
|
|
|
|
148
|
|
|
class ResourceResolver(etree.Resolver): |
149
|
|
|
def resolve(self, system_url, public_id, context): |
150
|
|
|
""" |
151
|
|
|
Resolves URIs using the resource API |
152
|
|
|
""" |
153
|
|
|
log.debug("resolve SYSTEM URL' %s' for '%s'" % (system_url, public_id)) |
154
|
|
|
path = system_url.split("/") |
155
|
|
|
fn = path[len(path) - 1] |
156
|
|
|
if pkg_resources.resource_exists(__name__, fn): |
157
|
|
|
return self.resolve_file(pkg_resources.resource_stream(__name__, fn), context) |
158
|
|
|
elif pkg_resources.resource_exists(__name__, "schema/%s" % fn): |
159
|
|
|
return self.resolve_file(pkg_resources.resource_stream(__name__, "schema/%s" % fn), context) |
160
|
|
|
else: |
161
|
|
|
raise ValueError("Unable to locate %s" % fn) |
162
|
|
|
|
163
|
|
|
|
164
|
|
|
_SCHEMA = None |
165
|
|
|
|
166
|
|
|
|
167
|
|
|
def schema(): |
168
|
|
|
global _SCHEMA |
169
|
|
|
if _SCHEMA is None: |
170
|
|
|
try: |
171
|
|
|
parser = etree.XMLParser() |
172
|
|
|
parser.resolvers.add(ResourceResolver()) |
173
|
|
|
st = etree.parse(pkg_resources.resource_stream(__name__, "schema/schema.xsd"), parser) |
174
|
|
|
_SCHEMA = etree.XMLSchema(st) |
175
|
|
|
except etree.XMLSchemaParseError, ex: |
176
|
|
|
log.error(_e(ex.error_log)) |
177
|
|
|
raise ex |
178
|
|
|
return _SCHEMA |
179
|
|
|
|
180
|
|
|
|
181
|
|
|
def safe_write(fn, data): |
182
|
|
|
"""Safely write data to a file with name fn |
183
|
|
|
:param fn: a filename |
184
|
|
|
:param data: some data to write |
185
|
|
|
:return: True or False depending on the outcome of the write |
186
|
|
|
""" |
187
|
|
|
tmpn = None |
188
|
|
|
try: |
189
|
|
|
fn = os.path.expanduser(fn) |
190
|
|
|
dirname, basename = os.path.split(fn) |
191
|
|
|
with tempfile.NamedTemporaryFile('w', delete=False, prefix=".%s" % basename, dir=dirname) as tmp: |
192
|
|
|
tmp.write(data) |
193
|
|
|
tmpn = tmp.name |
194
|
|
|
if os.path.exists(tmpn) and os.stat(tmpn).st_size > 0: |
195
|
|
|
os.rename(tmpn, fn) |
196
|
|
|
return True |
197
|
|
|
except Exception, ex: |
198
|
|
|
log.error(ex) |
199
|
|
|
finally: |
200
|
|
|
if tmpn is not None and os.path.exists(tmpn): |
201
|
|
|
try: |
202
|
|
|
os.unlink(tmpn) |
203
|
|
|
except Exception, ex: |
204
|
|
|
log.warn(ex) |
205
|
|
|
pass |
206
|
|
|
return False |
207
|
|
|
|
208
|
|
|
|
209
|
|
|
site_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "site") |
210
|
|
|
templates = TemplateLookup(directories=[os.path.join(site_dir, 'templates')]) |
211
|
|
|
|
212
|
|
|
|
213
|
|
|
def template(name): |
214
|
|
|
return templates.get_template(name) |
215
|
|
|
|
216
|
|
|
|
217
|
|
|
class URLFetch(threading.Thread): |
218
|
|
|
def __init__(self, url, verify, id=None, enable_cache=False, tries=0): |
219
|
|
|
self.url = url.strip() |
220
|
|
|
self.verify = verify |
221
|
|
|
self.id = id |
222
|
|
|
self.result = None |
223
|
|
|
self.ex = None |
224
|
|
|
self.cached = False |
225
|
|
|
self.enable_cache = enable_cache |
226
|
|
|
self.cache_ttl = 0 |
227
|
|
|
self.last_modified = None |
228
|
|
|
self.date = None |
229
|
|
|
self.tries = 0 |
230
|
|
|
self.resp = None |
231
|
|
|
self.start_time = 0 |
232
|
|
|
self.end_time = 0 |
233
|
|
|
self.tries = tries |
234
|
|
|
|
235
|
|
|
if self.id is None: |
236
|
|
|
self.id = self.url |
237
|
|
|
|
238
|
|
|
threading.Thread.__init__(self) |
239
|
|
|
|
240
|
|
|
def time(self): |
241
|
|
|
if self.isAlive(): |
242
|
|
|
raise ValueError("caller attempted to obtain execution time while fetcher still active") |
243
|
|
|
return self.end_time - self.start_time |
244
|
|
|
|
245
|
|
|
def run(self): |
246
|
|
|
|
247
|
|
|
def _parse_date(str): |
248
|
|
|
if str is None: |
249
|
|
|
return datetime.new() |
250
|
|
|
return datetime(*parsedate(str)[:6]) |
251
|
|
|
|
252
|
|
|
self.start_time = clock() |
253
|
|
|
try: |
254
|
|
|
requests_cache.install_cache('.cache') |
255
|
|
|
if not self.enable_cache: |
256
|
|
|
log.debug("removing '%s' from cache" % self.url) |
257
|
|
|
requests_cache.get_cache().delete_url(self.url) |
258
|
|
|
|
259
|
|
|
log.debug("fetching '%s'" % self.url) |
260
|
|
|
|
261
|
|
|
if self.url.startswith('file://'): |
262
|
|
|
path = self.url[7:] |
263
|
|
|
if not os.path.exists(path): |
264
|
|
|
raise IOError("file not found: %s" % path) |
265
|
|
|
|
266
|
|
|
with open(path, 'r') as fd: |
267
|
|
|
self.result = fd.read() |
268
|
|
|
self.cached = False |
269
|
|
|
self.date = datetime.now() |
270
|
|
|
self.last_modified = datetime.fromtimestamp(os.stat(path).st_mtime) |
271
|
|
|
else: |
272
|
|
|
self.resp = requests.get(self.url, timeout=60, verify=False) |
273
|
|
|
self.last_modified = _parse_date(self.resp.headers.get('last-modified', self.resp.headers.get('date', None))) |
274
|
|
|
self.date = _parse_date(self.resp.headers['date']) |
275
|
|
|
self.cached = getattr(self.resp, 'from_cache', False) |
276
|
|
|
self.status = self.resp.status_code |
277
|
|
|
if self.resp.status_code != 200: |
278
|
|
|
raise IOError(self.resp.reason) |
279
|
|
|
self.result = self.resp.content |
280
|
|
|
|
281
|
|
|
log.debug("got %d bytes from '%s'" % (len(self.result), self.url)) |
282
|
|
|
except Exception, ex: |
283
|
|
|
traceback.print_exc() |
284
|
|
|
log.warn("unable to fetch '%s': %s" % (self.url, ex)) |
285
|
|
|
self.ex = ex |
286
|
|
|
self.result = None |
287
|
|
|
finally: |
288
|
|
|
self.end_time = clock() |
289
|
|
|
|
290
|
|
|
|
291
|
|
|
def root(t): |
292
|
|
|
if hasattr(t, 'getroot') and hasattr(t.getroot, '__call__'): |
293
|
|
|
return t.getroot() |
294
|
|
|
else: |
295
|
|
|
return t |
296
|
|
|
|
297
|
|
|
|
298
|
|
|
def duration2timedelta(period): |
299
|
|
|
regex = re.compile( |
300
|
|
|
'(?P<sign>[-+]?)P(?:(?P<years>\d+)[Yy])?(?:(?P<months>\d+)[Mm])?(?:(?P<days>\d+)[Dd])?(?:T(?:(?P<hours>\d+)[Hh])?(?:(?P<minutes>\d+)[Mm])?(?:(?P<seconds>\d+)[Ss])?)?') |
301
|
|
|
|
302
|
|
|
# Fetch the match groups with default value of 0 (not None) |
303
|
|
|
m = regex.match(period) |
304
|
|
|
if not m: |
305
|
|
|
return None |
306
|
|
|
|
307
|
|
|
duration = m.groupdict(0) |
308
|
|
|
|
309
|
|
|
# Create the timedelta object from extracted groups |
310
|
|
|
delta = timedelta(days=int(duration['days']) + (int(duration['months']) * 30) + (int(duration['years']) * 365), |
311
|
|
|
hours=int(duration['hours']), |
312
|
|
|
minutes=int(duration['minutes']), |
313
|
|
|
seconds=int(duration['seconds'])) |
314
|
|
|
|
315
|
|
|
if duration['sign'] == "-": |
316
|
|
|
delta *= -1 |
317
|
|
|
|
318
|
|
|
return delta |
319
|
|
|
|
320
|
|
|
|
321
|
|
|
def filter_lang(elts, langs=["en"]): |
322
|
|
|
def _l(elt): |
323
|
|
|
return elt.get("{http://www.w3.org/XML/1998/namespace}lang", None) in langs |
324
|
|
|
|
325
|
|
|
if elts is None or len(elts) == 0: |
326
|
|
|
return [] |
327
|
|
|
|
328
|
|
|
lst = filter(_l, elts) |
329
|
|
|
if lst: |
330
|
|
|
return lst |
331
|
|
|
else: |
332
|
|
|
return elts |
333
|
|
|
|
334
|
|
|
|
335
|
|
|
def xslt_transform(t, stylesheet, params={}): |
336
|
|
|
xsl = etree.fromstring(resource_string(stylesheet, "xslt")) |
337
|
|
|
transform = etree.XSLT(xsl) |
338
|
|
|
return transform(t, **params) |
339
|
|
|
|
340
|
|
|
|
341
|
|
|
def total_seconds(dt): |
342
|
|
|
if hasattr(dt, "total_seconds"): |
343
|
|
|
return dt.total_seconds() |
344
|
|
|
else: |
345
|
|
|
return (dt.microseconds + (dt.seconds + dt.days * 24 * 3600) * 10 ** 6) / 10 ** 6 |
346
|
|
|
|