1
|
|
|
""" |
2
|
|
|
|
3
|
|
|
This is the implementation of the active repository of SAML metadata. The 'local' and 'remote' pipes operate on this. |
4
|
|
|
|
5
|
|
|
""" |
6
|
|
|
from StringIO import StringIO |
7
|
|
|
from datetime import datetime |
8
|
|
|
import hashlib |
9
|
|
|
import urllib |
10
|
|
|
from UserDict import DictMixin, UserDict |
11
|
|
|
from lxml import etree |
12
|
|
|
from lxml.builder import ElementMaker |
13
|
|
|
from lxml.etree import DocumentInvalid |
14
|
|
|
import os |
15
|
|
|
import re |
16
|
|
|
from copy import deepcopy |
17
|
|
|
from pyff import merge_strategies |
18
|
|
|
import pyff.index |
19
|
|
|
from pyff.logs import log |
20
|
|
|
from pyff.utils import schema, URLFetch, filter_lang, root, duration2timedelta, template |
21
|
|
|
import xmlsec |
22
|
|
|
from pyff.constants import NS, NF_URI, DIGESTS, EVENT_DROP_ENTITY, EVENT_IMPORTED_METADATA, EVENT_IMPORT_FAIL |
23
|
|
|
import traceback |
24
|
|
|
import threading |
25
|
|
|
from Queue import Queue |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
__author__ = 'leifj' |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
def _is_self_signed_err(ebuf): |
32
|
|
|
for e in ebuf: |
33
|
|
|
if e['func'] == 'xmlSecOpenSSLX509StoreVerify' and re.match('err=18', e['message']): |
34
|
|
|
return True |
35
|
|
|
return False |
36
|
|
|
|
37
|
|
|
|
38
|
|
|
etree.set_default_parser(etree.XMLParser(resolve_entities=False)) |
39
|
|
|
|
40
|
|
|
|
41
|
|
|
def _e(error_log, m=None): |
42
|
|
|
def _f(x): |
43
|
|
|
if ":WARNING:" in x: |
44
|
|
|
return False |
45
|
|
|
if m is not None and not m in x: |
46
|
|
|
return False |
47
|
|
|
return True |
48
|
|
|
|
49
|
|
|
return "\n".join(filter(_f, ["%s" % e for e in error_log])) |
50
|
|
|
|
51
|
|
|
|
52
|
|
|
class MetadataException(Exception): |
53
|
|
|
pass |
54
|
|
|
|
55
|
|
|
|
56
|
|
|
class Event(UserDict): |
57
|
|
|
pass |
58
|
|
|
|
59
|
|
|
|
60
|
|
|
class Observable(object): |
61
|
|
|
def __init__(self): |
62
|
|
|
self.callbacks = [] |
63
|
|
|
|
64
|
|
|
def subscribe(self, callback): |
65
|
|
|
self.callbacks.append(callback) |
66
|
|
|
|
67
|
|
|
def fire(self, **attrs): |
68
|
|
|
e = Event(attrs) |
69
|
|
|
e['time'] = datetime.now() |
70
|
|
|
for fn in self.callbacks: |
71
|
|
|
fn(e) |
72
|
|
|
|
73
|
|
|
|
74
|
|
|
class MDRepository(DictMixin, Observable): |
75
|
|
|
"""A class representing a set of SAML Metadata. Instances present as dict-like objects where |
76
|
|
|
the keys are URIs and values are EntitiesDescriptor elements containing sets of metadata. |
77
|
|
|
""" |
78
|
|
|
|
79
|
|
|
def __init__(self, index=pyff.index.MemoryIndex(), metadata_cache_enabled=False, min_cache_ttl="PT5M"): |
80
|
|
|
self.md = {} |
81
|
|
|
self.index = index |
82
|
|
|
self.metadata_cache_enabled = metadata_cache_enabled |
83
|
|
|
self.min_cache_ttl = min_cache_ttl |
84
|
|
|
self.respect_cache_duration = True |
85
|
|
|
self.default_cache_duration = "PT10M" |
86
|
|
|
self.retry_limit = 5 |
87
|
|
|
|
88
|
|
|
super(MDRepository, self).__init__() |
89
|
|
|
|
90
|
|
|
def is_idp(self, entity): |
91
|
|
|
"""Returns True if the supplied EntityDescriptor has an IDPSSODescriptor Role |
92
|
|
|
|
93
|
|
|
:param entity: An EntityDescriptor element |
94
|
|
|
""" |
95
|
|
|
return bool(entity.find(".//{%s}IDPSSODescriptor" % NS['md']) is not None) |
96
|
|
|
|
97
|
|
|
def is_sp(self, entity): |
98
|
|
|
"""Returns True if the supplied EntityDescriptor has an SPSSODescriptor Role |
99
|
|
|
|
100
|
|
|
:param entity: An EntityDescriptor element |
101
|
|
|
""" |
102
|
|
|
return bool(entity.find(".//{%s}SPSSODescriptor" % NS['md']) is not None) |
103
|
|
|
|
104
|
|
|
def display(self, entity): |
105
|
|
|
"""Utility-method for computing a displayable string for a given entity. |
106
|
|
|
|
107
|
|
|
:param entity: An EntityDescriptor element |
108
|
|
|
""" |
109
|
|
|
for displayName in filter_lang(entity.findall(".//{%s}DisplayName" % NS['mdui'])): |
110
|
|
|
return displayName.text |
111
|
|
|
|
112
|
|
|
for serviceName in filter_lang(entity.findall(".//{%s}ServiceName" % NS['md'])): |
113
|
|
|
return serviceName.text |
114
|
|
|
|
115
|
|
|
for organizationDisplayName in filter_lang(entity.findall(".//{%s}OrganizationDisplayName" % NS['md'])): |
116
|
|
|
return organizationDisplayName.text |
117
|
|
|
|
118
|
|
|
for organizationName in filter_lang(entity.findall(".//{%s}OrganizationName" % NS['md'])): |
119
|
|
|
return organizationName.text |
120
|
|
|
|
121
|
|
|
return entity.get('entityID') |
122
|
|
|
|
123
|
|
|
def __iter__(self): |
124
|
|
|
for t in [self.md[url] for url in self.md.keys()]: |
125
|
|
|
for entity in t.findall(".//{%s}EntityDescriptor" % NS['md']): |
126
|
|
|
yield entity |
127
|
|
|
|
128
|
|
|
def sha1_id(self, e): |
129
|
|
|
return pyff.index.hash_id(e, 'sha1') |
130
|
|
|
|
131
|
|
|
def search(self, query, path=None, page=None, page_limit=10, entity_filter=None): |
132
|
|
|
""" |
133
|
|
|
:param query: A string to search for. |
134
|
|
|
:param path: The repository collection (@Name) to search in - None for search in all collections |
135
|
|
|
:param page: When using paged search, the page index |
136
|
|
|
:param page_limit: When using paged search, the maximum entry per page |
137
|
|
|
:param entity_filter: A lookup expression used to filter the entries before search is done. |
138
|
|
|
|
139
|
|
|
Returns a list of dict's for each EntityDescriptor present in the metadata store such |
140
|
|
|
that any of the DisplayName, ServiceName, OrganizationName or OrganizationDisplayName |
141
|
|
|
elements match the query (as in contains the query as a substring). |
142
|
|
|
|
143
|
|
|
The dict in the list contains three items: |
144
|
|
|
|
145
|
|
|
:param label: A displayable string, useful as a UI label |
146
|
|
|
:param value: The entityID of the EntityDescriptor |
147
|
|
|
:param id: A sha1-ID of the entityID - on the form {sha1}<sha1-hash-of-entityID> |
148
|
|
|
""" |
149
|
|
|
|
150
|
|
|
def _strings(e): |
151
|
|
|
lst = [e.get('entityID')] |
152
|
|
|
for attr in ['.//{%s}DisplayName' % NS['mdui'], |
153
|
|
|
'.//{%s}ServiceName' % NS['md'], |
154
|
|
|
'.//{%s}OrganizationDisplayName' % NS['md'], |
155
|
|
|
'.//{%s}OrganizationName' % NS['md']]: |
156
|
|
|
lst.extend([x.text.lower() for x in e.findall(attr)]) |
157
|
|
|
return filter(lambda s: s is not None, lst) |
158
|
|
|
|
159
|
|
|
def _match(query, e): |
160
|
|
|
#log.debug("looking for %s in %s" % (query,",".join(_strings(e)))) |
161
|
|
|
for qstr in _strings(e): |
162
|
|
|
if query in qstr: |
163
|
|
|
return True |
164
|
|
|
return False |
165
|
|
|
|
166
|
|
|
f = [] |
167
|
|
|
if path is not None: |
168
|
|
|
f.append(path) |
169
|
|
|
if entity_filter is not None: |
170
|
|
|
f.append(entity_filter) |
171
|
|
|
mexpr = None |
172
|
|
|
if f: |
173
|
|
|
mexpr = "+".join(f) |
174
|
|
|
|
175
|
|
|
log.debug("mexpr: %s" % mexpr) |
176
|
|
|
|
177
|
|
|
res = [{'label': self.display(e), |
178
|
|
|
'value': e.get('entityID'), |
179
|
|
|
'id': pyff.index.hash_id(e, 'sha1')} |
180
|
|
|
for e in pyff.index.EntitySet(filter(lambda ent: _match(query, ent), self.lookup(mexpr)))] |
181
|
|
|
|
182
|
|
|
res.sort(key=lambda i: i['label']) |
183
|
|
|
|
184
|
|
|
log.debug(res) |
185
|
|
|
|
186
|
|
|
if page is not None: |
187
|
|
|
total = len(res) |
188
|
|
|
begin = (page - 1) * page_limit |
189
|
|
|
end = begin + page_limit |
190
|
|
|
more = (end < total) |
191
|
|
|
return res[begin:end], more, total |
192
|
|
|
else: |
193
|
|
|
return res |
194
|
|
|
|
195
|
|
|
def sane(self): |
196
|
|
|
"""A very basic test for sanity. An empty metadata set is probably not a sane output of any process. |
197
|
|
|
|
198
|
|
|
:return: True iff there is at least one EntityDescriptor in the active set. |
199
|
|
|
""" |
200
|
|
|
return len(self.md) > 0 |
201
|
|
|
|
202
|
|
|
def extensions(self, e): |
203
|
|
|
"""Return a list of the Extensions elements in the EntityDescriptor |
204
|
|
|
|
205
|
|
|
:param e: an EntityDescriptor |
206
|
|
|
:return: a list |
207
|
|
|
""" |
208
|
|
|
ext = e.find(".//{%s}Extensions" % NS['md']) |
209
|
|
|
if ext is None: |
210
|
|
|
ext = etree.Element("{%s}Extensions" % NS['md']) |
211
|
|
|
e.insert(0, ext) |
212
|
|
|
return ext |
213
|
|
|
|
214
|
|
|
def annotate(self, e, category, title, message, source=None): |
215
|
|
|
"""Add an ATOM annotation to an EntityDescriptor or an EntitiesDescriptor. This is a simple way to |
216
|
|
|
add non-normative text annotations to metadata, eg for the purpuse of generating reports. |
217
|
|
|
|
218
|
|
|
:param e: An EntityDescriptor or an EntitiesDescriptor element |
219
|
|
|
:param category: The ATOM category |
220
|
|
|
:param title: The ATOM title |
221
|
|
|
:param message: The ATOM content |
222
|
|
|
:param source: An optional source URL. It is added as a <link> element with @rel='saml-metadata-source' |
223
|
|
|
""" |
224
|
|
|
if e.tag != "{%s}EntityDescriptor" % NS['md'] and e.tag != "{%s}EntitiesDescriptor" % NS['md']: |
225
|
|
|
raise MetadataException( |
226
|
|
|
"I can only annotate EntityDescriptor or EntitiesDescriptor elements") |
227
|
|
|
subject = e.get('Name', e.get('entityID', None)) |
228
|
|
|
atom = ElementMaker(nsmap={ |
229
|
|
|
'atom': 'http://www.w3.org/2005/Atom'}, namespace='http://www.w3.org/2005/Atom') |
230
|
|
|
args = [atom.published("%s" % datetime.now().isoformat()), |
231
|
|
|
atom.link(href=subject, rel="saml-metadata-subject")] |
232
|
|
|
if source is not None: |
233
|
|
|
args.append(atom.link(href=source, rel="saml-metadata-source")) |
234
|
|
|
args.extend([atom.title(title), |
235
|
|
|
atom.category(term=category), |
236
|
|
|
atom.content(message, type="text/plain")]) |
237
|
|
|
self.extensions(e).append(atom.entry(*args)) |
238
|
|
|
|
239
|
|
|
def _entity_attributes(self, e): |
240
|
|
|
ext = self.extensions(e) |
241
|
|
|
# log.debug(ext) |
242
|
|
|
ea = ext.find(".//{%s}EntityAttributes" % NS['mdattr']) |
243
|
|
|
if ea is None: |
244
|
|
|
ea = etree.Element("{%s}EntityAttributes" % NS['mdattr']) |
245
|
|
|
ext.append(ea) |
246
|
|
|
return ea |
247
|
|
|
|
248
|
|
|
def _eattribute(self, e, attr, nf): |
249
|
|
|
ea = self._entity_attributes(e) |
250
|
|
|
# log.debug(ea) |
251
|
|
|
a = ea.xpath( |
252
|
|
|
".//saml:Attribute[@NameFormat='%s' and @Name='%s']" % (nf, attr), namespaces=NS) |
253
|
|
|
if a is None or len(a) == 0: |
254
|
|
|
a = etree.Element("{%s}Attribute" % NS['saml']) |
255
|
|
|
a.set('NameFormat', nf) |
256
|
|
|
a.set('Name', attr) |
257
|
|
|
ea.append(a) |
258
|
|
|
else: |
259
|
|
|
a = a[0] |
260
|
|
|
# log.debug(etree.tostring(self.extensions(e))) |
261
|
|
|
return a |
262
|
|
|
|
263
|
|
|
def set_entity_attributes(self, e, d, nf=NF_URI): |
264
|
|
|
"""Set an entity attribute on an EntityDescriptor |
265
|
|
|
|
266
|
|
|
:param e: The EntityDescriptor element |
267
|
|
|
:param d: A dict of attribute-value pairs that should be added as entity attributes |
268
|
|
|
:param nf: The nameFormat (by default "urn:oasis:names:tc:SAML:2.0:attrname-format:uri") to use. |
269
|
|
|
:raise: MetadataException unless e is an EntityDescriptor element |
270
|
|
|
""" |
271
|
|
|
if e.tag != "{%s}EntityDescriptor" % NS['md']: |
272
|
|
|
raise MetadataException( |
273
|
|
|
"I can only add EntityAttribute(s) to EntityDescriptor elements") |
274
|
|
|
|
275
|
|
|
#log.debug("set %s" % d) |
276
|
|
|
for attr, value in d.iteritems(): |
277
|
|
|
#log.debug("set %s to %s" % (attr,value)) |
278
|
|
|
a = self._eattribute(e, attr, nf) |
279
|
|
|
# log.debug(etree.tostring(a)) |
280
|
|
|
velt = etree.Element("{%s}AttributeValue" % NS['saml']) |
281
|
|
|
velt.text = value |
282
|
|
|
a.append(velt) |
283
|
|
|
# log.debug(etree.tostring(a)) |
284
|
|
|
|
285
|
|
|
def fetch_metadata(self, resources, qsize=5, timeout=120, stats=None, xrd=None): |
286
|
|
|
"""Fetch a series of metadata URLs and optionally verify signatures. |
287
|
|
|
|
288
|
|
|
:param resources: A list of triples (url,cert-or-fingerprint,id) |
289
|
|
|
:param qsize: The number of parallell downloads to run |
290
|
|
|
:param timeout: The number of seconds to wait (120 by default) for each download |
291
|
|
|
:param stats: A dictionary used for storing statistics. Useful for cherrypy cpstats |
292
|
|
|
|
293
|
|
|
The list of triples is processed by first downloading the URL. If a cert-or-fingerprint |
294
|
|
|
is supplied it is used to validate the signature on the received XML. Two forms of XML |
295
|
|
|
is supported: SAML Metadata and XRD. |
296
|
|
|
|
297
|
|
|
SAML metadata is (if valid and contains a valid signature) stored under the 'id' |
298
|
|
|
identifier (which defaults to the URL unless provided in the triple. |
299
|
|
|
|
300
|
|
|
XRD elements are processed thus: for all <Link> elements that contain a ds;KeyInfo |
301
|
|
|
elements with a X509Certificate and where the <Rel> element contains the string |
302
|
|
|
'urn:oasis:names:tc:SAML:2.0:metadata', the corresponding <URL> element is download |
303
|
|
|
and verified. |
304
|
|
|
""" |
305
|
|
|
if stats is None: |
306
|
|
|
stats = {} |
307
|
|
|
|
308
|
|
|
def producer(q, resources, cache=self.metadata_cache_enabled): |
309
|
|
|
print resources |
310
|
|
|
for url, verify, id, tries in resources: |
311
|
|
|
log.debug("starting fetcher for '%s'" % url) |
312
|
|
|
thread = URLFetch( |
313
|
|
|
url, verify, id, enable_cache=cache, tries=tries) |
314
|
|
|
thread.start() |
315
|
|
|
q.put(thread, True) |
316
|
|
|
|
317
|
|
|
def consumer(q, njobs, stats, next_jobs=None, resolved=None): |
318
|
|
|
if next_jobs is None: |
319
|
|
|
next_jobs = [] |
320
|
|
|
if resolved is None: |
321
|
|
|
resolved = set() |
322
|
|
|
nfinished = 0 |
323
|
|
|
|
324
|
|
|
while nfinished < njobs: |
325
|
|
|
info = None |
326
|
|
|
try: |
327
|
|
|
log.debug("waiting for next thread to finish...") |
328
|
|
|
thread = q.get(True) |
329
|
|
|
thread.join(timeout) |
330
|
|
|
|
331
|
|
|
if thread.isAlive(): |
332
|
|
|
raise MetadataException( |
333
|
|
|
"thread timeout fetching '%s'" % thread.url) |
334
|
|
|
|
335
|
|
|
info = { |
336
|
|
|
'Time Spent': thread.time() |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
if thread.ex is not None: |
340
|
|
|
raise thread.ex |
341
|
|
|
else: |
342
|
|
|
if thread.result is not None: |
343
|
|
|
info['Bytes'] = len(thread.result) |
344
|
|
|
else: |
345
|
|
|
raise MetadataException( |
346
|
|
|
"empty response fetching '%s'" % thread.url) |
347
|
|
|
info['Cached'] = thread.cached |
348
|
|
|
info['Date'] = str(thread.date) |
349
|
|
|
info['Last-Modified'] = str(thread.last_modified) |
350
|
|
|
info['Tries'] = thread.tries |
351
|
|
|
|
352
|
|
|
xml = thread.result.strip() |
353
|
|
|
|
354
|
|
|
if thread.status is not None: |
355
|
|
|
info['Status'] = thread.resp.status_code |
356
|
|
|
|
357
|
|
|
t = self.parse_metadata( |
358
|
|
|
StringIO(xml), key=thread.verify, base_url=thread.url) |
359
|
|
|
if t is None: |
360
|
|
|
self.fire(type=EVENT_IMPORT_FAIL, url=thread.url) |
361
|
|
|
raise MetadataException( |
362
|
|
|
"no valid metadata found at '%s'" % thread.url) |
363
|
|
|
|
364
|
|
|
relt = root(t) |
365
|
|
|
if relt.tag in ('{%s}XRD' % NS['xrd'], '{%s}XRDS' % NS['xrd']): |
366
|
|
|
log.debug("%s looks like an xrd document" % thread.url) |
367
|
|
|
for xrd in t.xpath("//xrd:XRD", namespaces=NS): |
368
|
|
|
log.debug("xrd: %s" % xrd) |
369
|
|
|
for link in xrd.findall(".//{%s}Link[@rel='%s']" % (NS['xrd'], NS['md'])): |
370
|
|
|
url = link.get("href") |
371
|
|
|
certs = xmlsec.CertDict(link) |
372
|
|
|
fingerprints = certs.keys() |
373
|
|
|
fp = None |
374
|
|
|
if len(fingerprints) > 0: |
375
|
|
|
fp = fingerprints[0] |
376
|
|
|
log.debug("fingerprint: %s" % fp) |
377
|
|
|
next_jobs.append((url, fp, url, 0)) |
378
|
|
|
|
379
|
|
|
elif relt.tag in ('{%s}EntityDescriptor' % NS['md'], '{%s}EntitiesDescriptor' % NS['md']): |
380
|
|
|
cacheDuration = self.default_cache_duration |
381
|
|
|
if self.respect_cache_duration: |
382
|
|
|
cacheDuration = root(t).get( |
383
|
|
|
'cacheDuration', self.default_cache_duration) |
384
|
|
|
offset = duration2timedelta(cacheDuration) |
385
|
|
|
|
386
|
|
|
if thread.cached: |
387
|
|
|
if thread.last_modified + offset < datetime.now() - duration2timedelta(self.min_cache_ttl): |
388
|
|
|
raise MetadataException( |
389
|
|
|
"cached metadata expired") |
390
|
|
|
else: |
391
|
|
|
log.debug("found cached metadata for '%s' (last-modified: %s)" % |
392
|
|
|
(thread.url, thread.last_modified)) |
393
|
|
|
ne = self.import_metadata(t, url=thread.id) |
394
|
|
|
info['Number of Entities'] = ne |
395
|
|
|
else: |
396
|
|
|
log.debug("got fresh metadata for '%s' (date: %s)" % ( |
397
|
|
|
thread.url, thread.date)) |
398
|
|
|
ne = self.import_metadata(t, url=thread.id) |
399
|
|
|
info['Number of Entities'] = ne |
400
|
|
|
info['Cache Expiration Time'] = str( |
401
|
|
|
thread.last_modified + offset) |
402
|
|
|
certs = xmlsec.CertDict(relt) |
403
|
|
|
cert = None |
404
|
|
|
if certs.values(): |
405
|
|
|
cert = certs.values()[0].strip() |
406
|
|
|
resolved.add((thread.url, cert)) |
407
|
|
|
else: |
408
|
|
|
raise MetadataException( |
409
|
|
|
"unknown metadata type for '%s' (%s)" % (thread.url, relt.tag)) |
410
|
|
|
except Exception, ex: |
411
|
|
|
# traceback.print_exc(ex) |
412
|
|
|
log.warn("problem fetching '%s' (will retry): %s" % |
413
|
|
|
(thread.url, ex)) |
414
|
|
|
if info is not None: |
415
|
|
|
info['Exception'] = ex |
416
|
|
|
if thread.tries < self.retry_limit: |
417
|
|
|
next_jobs.append( |
418
|
|
|
(thread.url, thread.verify, thread.id, thread.tries + 1)) |
419
|
|
|
else: |
420
|
|
|
# traceback.print_exc(ex) |
421
|
|
|
log.error( |
422
|
|
|
"retry limit exceeded for %s (last error was: %s)" % (thread.url, ex)) |
423
|
|
|
finally: |
424
|
|
|
nfinished += 1 |
425
|
|
|
if info is not None: |
426
|
|
|
stats[thread.url] = info |
427
|
|
|
|
428
|
|
|
resources = [(url, verify, rid, 0) for url, verify, rid in resources] |
429
|
|
|
resolved = set() |
430
|
|
|
cache = True |
431
|
|
|
while len(resources) > 0: |
432
|
|
|
log.debug("fetching %d resources (%s)" % |
433
|
|
|
(len(resources), repr(resources))) |
434
|
|
|
next_jobs = [] |
435
|
|
|
q = Queue(qsize) |
436
|
|
|
prod_thread = threading.Thread( |
437
|
|
|
target=producer, args=(q, resources, cache)) |
438
|
|
|
cons_thread = threading.Thread(target=consumer, args=( |
439
|
|
|
q, len(resources), stats, next_jobs, resolved)) |
440
|
|
|
prod_thread.start() |
441
|
|
|
cons_thread.start() |
442
|
|
|
prod_thread.join() |
443
|
|
|
cons_thread.join() |
444
|
|
|
log.debug("after fetch: %d jobs to retry" % len(next_jobs)) |
445
|
|
|
if len(next_jobs) > 0: |
446
|
|
|
resources = next_jobs |
447
|
|
|
cache = False |
448
|
|
|
else: |
449
|
|
|
resources = [] |
450
|
|
|
|
451
|
|
|
if xrd is not None: |
452
|
|
|
with open(xrd, "w") as fd: |
453
|
|
|
fd.write(template("trust.xrd").render(links=resolved)) |
454
|
|
|
|
455
|
|
|
def parse_metadata(self, fn, key=None, base_url=None, fail_on_error=False, filter_invalid=True): |
456
|
|
|
"""Parse a piece of XML and split it up into EntityDescriptor elements. Each such element |
457
|
|
|
is stored in the MDRepository instance. |
458
|
|
|
|
459
|
|
|
:param fn: a file-like object containing SAML metadata |
460
|
|
|
:param key: a certificate (file) or a SHA1 fingerprint to use for signature verification |
461
|
|
|
:param base_url: use this base url to resolve relative URLs for XInclude processing |
462
|
|
|
""" |
463
|
|
|
try: |
464
|
|
|
t = etree.parse(fn, base_url=base_url, |
465
|
|
|
parser=etree.XMLParser(resolve_entities=False)) |
466
|
|
|
t.xinclude() |
467
|
|
|
if filter_invalid: |
468
|
|
|
for e in t.findall('{%s}EntityDescriptor' % NS['md']): |
469
|
|
|
if not schema().validate(e): |
470
|
|
|
error = _e(schema().error_log, m=base_url) |
471
|
|
|
log.debug("removing '%s': schema validation failed (%s)" % ( |
472
|
|
|
e.get('entityID'), error)) |
473
|
|
|
e.getparent().remove(e) |
474
|
|
|
self.fire(type=EVENT_DROP_ENTITY, url=base_url, |
475
|
|
|
entityID=e.get('entityID'), error=error) |
476
|
|
|
else: |
477
|
|
|
# Having removed the invalid entities this should now never |
478
|
|
|
# happen... |
479
|
|
|
schema().assertValid(t) |
480
|
|
|
except DocumentInvalid, ex: |
481
|
|
|
traceback.print_exc() |
482
|
|
|
log.debug("schema validation failed on '%s': %s" % ( |
483
|
|
|
base_url, _e(ex.error_log, m=base_url))) |
484
|
|
|
raise MetadataException("schema validation failed") |
485
|
|
|
except Exception, ex: |
486
|
|
|
# log.debug(_e(schema().error_log)) |
487
|
|
|
log.error(ex) |
488
|
|
|
if fail_on_error: |
489
|
|
|
raise ex |
490
|
|
|
return None |
491
|
|
|
if key is not None: |
492
|
|
|
try: |
493
|
|
|
log.debug("verifying signature using %s" % key) |
494
|
|
|
refs = xmlsec.verified(t, key) |
495
|
|
|
if len(refs) != 1: |
496
|
|
|
raise MetadataException( |
497
|
|
|
"XML metadata contains %d signatures - exactly 1 is required" % len(refs)) |
498
|
|
|
t = refs[0] # prevent wrapping attacks |
499
|
|
|
except Exception, ex: |
500
|
|
|
tb = traceback.format_exc() |
501
|
|
|
print tb |
502
|
|
|
log.error(ex) |
503
|
|
|
return None |
504
|
|
|
|
505
|
|
|
return t |
506
|
|
|
|
507
|
|
|
def _index_entity(self, e): |
508
|
|
|
#log.debug("adding %s to index" % e.get('entityID')) |
509
|
|
|
if 'ID' in e.attrib: |
510
|
|
|
del e.attrib['ID'] |
511
|
|
|
self.index.add(e) |
512
|
|
|
|
513
|
|
|
def import_metadata(self, t, url=None): |
514
|
|
|
""" |
515
|
|
|
:param t: An EntitiesDescriptor element |
516
|
|
|
:param url: An optional URL to used to identify the EntitiesDescriptor in the MDRepository |
517
|
|
|
|
518
|
|
|
Import an EntitiesDescriptor element using the @Name attribute (or the supplied url parameter). All |
519
|
|
|
EntityDescriptor elements are stripped of any @ID attribute and are then indexed before the collection |
520
|
|
|
is stored in the MDRepository object. |
521
|
|
|
""" |
522
|
|
|
if url is None: |
523
|
|
|
top = t.xpath("//md:EntitiesDescriptor", namespaces=NS) |
524
|
|
|
if top is not None and len(top) == 1: |
525
|
|
|
url = top[0].get("Name", None) |
526
|
|
|
if url is None: |
527
|
|
|
raise MetadataException("No collection name found") |
528
|
|
|
self[url] = t |
529
|
|
|
# we always clean incoming ID |
530
|
|
|
# add to the index |
531
|
|
|
ne = 0 |
532
|
|
|
|
533
|
|
|
if t is not None: |
534
|
|
|
if root(t).tag == "{%s}EntityDescriptor" % NS['md']: |
535
|
|
|
self._index_entity(root(t)) |
536
|
|
|
ne += 1 |
537
|
|
|
else: |
538
|
|
|
for e in t.findall(".//{%s}EntityDescriptor" % NS['md']): |
539
|
|
|
self._index_entity(e) |
540
|
|
|
ne += 1 |
541
|
|
|
|
542
|
|
|
self.fire(type=EVENT_IMPORTED_METADATA, size=ne, url=url) |
543
|
|
|
return ne |
544
|
|
|
|
545
|
|
|
def entities(self, t=None): |
546
|
|
|
""" |
547
|
|
|
:param t: An EntitiesDescriptor element |
548
|
|
|
|
549
|
|
|
Returns the list of contained EntityDescriptor elements |
550
|
|
|
""" |
551
|
|
|
if t is None: |
552
|
|
|
return [] |
553
|
|
|
elif root(t).tag == "{%s}EntityDescriptor" % NS['md']: |
554
|
|
|
return [root(t)] |
555
|
|
|
else: |
556
|
|
|
return t.findall(".//{%s}EntityDescriptor" % NS['md']) |
557
|
|
|
|
558
|
|
|
def load_dir(self, directory, ext=".xml", url=None): |
559
|
|
|
""" |
560
|
|
|
:param directory: A directory to walk. |
561
|
|
|
:param ext: Include files with this extension (default .xml) |
562
|
|
|
|
563
|
|
|
Traverse a directory tree looking for metadata. Files ending in the specified extension are included. Directories |
564
|
|
|
starting with '.' are excluded. |
565
|
|
|
""" |
566
|
|
|
if url is None: |
567
|
|
|
url = directory |
568
|
|
|
log.debug("walking %s" % directory) |
569
|
|
|
if not directory in self.md: |
570
|
|
|
entities = [] |
571
|
|
|
for top, dirs, files in os.walk(directory): |
572
|
|
|
for dn in dirs: |
573
|
|
|
if dn.startswith("."): |
574
|
|
|
dirs.remove(dn) |
575
|
|
|
for nm in files: |
576
|
|
|
log.debug("found file %s" % nm) |
577
|
|
|
if nm.endswith(ext): |
578
|
|
|
fn = os.path.join(top, nm) |
579
|
|
|
try: |
580
|
|
|
t = self.parse_metadata(fn, fail_on_error=True) |
581
|
|
|
# local metadata is assumed to be ok |
582
|
|
|
entities.extend(self.entities(t)) |
583
|
|
|
except Exception, ex: |
584
|
|
|
log.error(ex) |
585
|
|
|
self.import_metadata(self.entity_set(entities, url)) |
586
|
|
|
return self.md[url] |
587
|
|
|
|
588
|
|
|
def _lookup(self, member, xp=None): |
589
|
|
|
""" |
590
|
|
|
:param member: Either an entity, URL or a filter expression. |
591
|
|
|
|
592
|
|
|
Find a (set of) EntityDescriptor element(s) based on the specified 'member' expression. |
593
|
|
|
""" |
594
|
|
|
|
595
|
|
|
def _hash(hn, strv): |
596
|
|
|
if hn == 'null': |
597
|
|
|
return strv |
598
|
|
|
if not hasattr(hashlib, hn): |
599
|
|
|
raise MetadataException("Unknown digest mechanism: '%s'" % hn) |
600
|
|
|
hash_m = getattr(hashlib, hn) |
601
|
|
|
h = hash_m() |
602
|
|
|
h.update(strv) |
603
|
|
|
return h.hexdigest() |
604
|
|
|
|
605
|
|
|
if xp is None: |
606
|
|
|
xp = "//md:EntityDescriptor" |
607
|
|
|
if member is None: |
608
|
|
|
lst = [] |
609
|
|
|
for m in self.keys(): |
610
|
|
|
log.debug("resolving %s filtered by %s" % (m, xp)) |
611
|
|
|
lst.extend(self._lookup(m, xp)) |
612
|
|
|
return lst |
613
|
|
|
elif hasattr(member, 'xpath'): |
614
|
|
|
log.debug("xpath filter %s <- %s" % (xp, member)) |
615
|
|
|
return member.xpath(xp, namespaces=NS) |
616
|
|
|
elif type(member) is str or type(member) is unicode: |
617
|
|
|
log.debug("string lookup %s" % member) |
618
|
|
|
|
619
|
|
|
if '+' in member: |
620
|
|
|
member = member.strip('+') |
621
|
|
|
log.debug("lookup intersection of '%s'" % |
622
|
|
|
' and '.join(member.split('+'))) |
623
|
|
|
hits = None |
624
|
|
|
for f in member.split("+"): |
625
|
|
|
f = f.strip() |
626
|
|
|
if hits is None: |
627
|
|
|
hits = set(self._lookup(f, xp)) |
628
|
|
|
else: |
629
|
|
|
other = self._lookup(f, xp) |
630
|
|
|
hits.intersection_update(other) |
631
|
|
|
|
632
|
|
|
if not hits: |
633
|
|
|
log.debug("empty intersection") |
634
|
|
|
return [] |
635
|
|
|
|
636
|
|
|
if hits is not None and hits: |
637
|
|
|
return list(hits) |
638
|
|
|
else: |
639
|
|
|
return [] |
640
|
|
|
|
641
|
|
|
if "!" in member: |
642
|
|
|
(src, xp) = member.split("!") |
643
|
|
|
if len(src) == 0: |
644
|
|
|
src = None |
645
|
|
|
log.debug("filtering using %s" % xp) |
646
|
|
|
else: |
647
|
|
|
log.debug("selecting %s filtered by %s" % (src, xp)) |
648
|
|
|
return self._lookup(src, xp) |
649
|
|
|
|
650
|
|
|
m = re.match("^\{(.+)\}(.+)$", member) |
651
|
|
|
if m is not None: |
652
|
|
|
log.debug("attribute-value match: %s='%s'" % |
653
|
|
|
(m.group(1), m.group(2))) |
654
|
|
|
return self.index.get(m.group(1), m.group(2).rstrip("/")) |
655
|
|
|
|
656
|
|
|
m = re.match("^(.+)=(.+)$", member) |
657
|
|
|
if m is not None: |
658
|
|
|
log.debug("attribute-value match: %s='%s'" % |
659
|
|
|
(m.group(1), m.group(2))) |
660
|
|
|
return self.index.get(m.group(1), m.group(2).rstrip("/")) |
661
|
|
|
|
662
|
|
|
log.debug("basic lookup %s" % member) |
663
|
|
|
for idx in DIGESTS: |
664
|
|
|
e = self.index.get(idx, member) |
665
|
|
|
if e: |
666
|
|
|
log.debug("found %s in %s index" % (e, idx)) |
667
|
|
|
return e |
668
|
|
|
|
669
|
|
|
e = self.get(member, None) |
670
|
|
|
if e is not None: |
671
|
|
|
return self._lookup(e, xp) |
672
|
|
|
|
673
|
|
|
# hackish but helps save people from their misstakes |
674
|
|
|
e = self.get("%s.xml" % member, None) |
675
|
|
|
if e is not None: |
676
|
|
|
if not "://" in member: # not an absolute URL |
677
|
|
|
log.warn( |
678
|
|
|
"Found %s.xml as an alias - AVOID extensions in 'select as' statements" % member) |
679
|
|
|
return self._lookup(e, xp) |
680
|
|
|
|
681
|
|
|
if "://" in member: # looks like a URL and wasn't an entity or collection - recurse away! |
682
|
|
|
log.debug("recursively fetching members from '%s'" % member) |
683
|
|
|
# note that this supports remote lists which may be more rope |
684
|
|
|
# than is healthy |
685
|
|
|
return [self._lookup(line, xp) for line in urllib.urlopen(member).iterlines()] |
686
|
|
|
|
687
|
|
|
return [] |
688
|
|
|
elif hasattr(member, '__iter__') and type(member) is not dict: |
689
|
|
|
if not len(member): |
690
|
|
|
member = self.keys() |
691
|
|
|
return [self._lookup(m, xp) for m in member] |
692
|
|
|
else: |
693
|
|
|
raise MetadataException("What about %s ??" % member) |
694
|
|
|
|
695
|
|
|
def lookup(self, member, xp=None): |
696
|
|
|
""" |
697
|
|
|
Lookup elements in the working metadata repository |
698
|
|
|
|
699
|
|
|
:param member: A selector (cf below) |
700
|
|
|
:type member: basestring |
701
|
|
|
:param xp: An optional xpath filter |
702
|
|
|
:type xp: basestring |
703
|
|
|
:return: An interable of EntityDescriptor elements |
704
|
|
|
:rtype: etree.Element |
705
|
|
|
|
706
|
|
|
**Selector Syntax** |
707
|
|
|
|
708
|
|
|
- selector "+" selector |
709
|
|
|
- [sourceID] "!" xpath |
710
|
|
|
- attribute=value or {attribute}value |
711
|
|
|
- entityID |
712
|
|
|
- sourceID (@Name) |
713
|
|
|
- <URL containing one selector per line> |
714
|
|
|
|
715
|
|
|
The first form results in the intersection of the results of doing a lookup on the selectors. The second form |
716
|
|
|
results in the EntityDescriptor elements from the source (defaults to all EntityDescriptors) that match the |
717
|
|
|
xpath expression. The attribute-value forms resuls in the EntityDescriptors that contain the specified entity |
718
|
|
|
attribute pair. If non of these forms apply, the lookup is done using either source ID (normally @Name from |
719
|
|
|
the EntitiesDescriptor) or the entityID of single EntityDescriptors. If member is a URI but isn't part of |
720
|
|
|
the metadata repository then it is fetched an treated as a list of (one per line) of selectors. If all else |
721
|
|
|
fails an empty list is returned. |
722
|
|
|
|
723
|
|
|
""" |
724
|
|
|
l = self._lookup(member, xp) |
725
|
|
|
return list(set(filter(lambda x: x is not None, l))) |
726
|
|
|
|
727
|
|
|
def entity_set(self, entities, name, cacheDuration=None, validUntil=None, validate=True): |
728
|
|
|
""" |
729
|
|
|
:param entities: a set of entities specifiers (lookup is used to find entities from this set) |
730
|
|
|
:param name: the @Name attribute |
731
|
|
|
:param cacheDuration: an XML timedelta expression, eg PT1H for 1hr |
732
|
|
|
:param validUntil: a relative time eg 2w 4d 1h for 2 weeks, 4 days and 1hour from now. |
733
|
|
|
|
734
|
|
|
Produce an EntityDescriptors set from a list of entities. Optional Name, cacheDuration and validUntil are affixed. |
735
|
|
|
""" |
736
|
|
|
attrs = dict(Name=name, nsmap=NS) |
737
|
|
|
if cacheDuration is not None: |
738
|
|
|
attrs['cacheDuration'] = cacheDuration |
739
|
|
|
if validUntil is not None: |
740
|
|
|
attrs['validUntil'] = validUntil |
741
|
|
|
t = etree.Element("{%s}EntitiesDescriptor" % NS['md'], **attrs) |
742
|
|
|
nent = 0 |
743
|
|
|
seen = {} # TODO make better de-duplication |
744
|
|
|
for member in entities: |
745
|
|
|
for ent in self.lookup(member): |
746
|
|
|
entityID = ent.get('entityID', None) |
747
|
|
|
if (ent is not None) and (entityID is not None) and (not seen.get(entityID, False)): |
748
|
|
|
t.append(deepcopy(ent)) |
749
|
|
|
seen[entityID] = True |
750
|
|
|
nent += 1 |
751
|
|
|
|
752
|
|
|
log.debug("selecting %d entities from %d entity set(s) before validation" % ( |
753
|
|
|
nent, len(entities))) |
754
|
|
|
|
755
|
|
|
if not nent: |
756
|
|
|
return None |
757
|
|
|
|
758
|
|
|
if validate: |
759
|
|
|
try: |
760
|
|
|
schema().assertValid(t) |
761
|
|
|
except DocumentInvalid, ex: |
762
|
|
|
log.debug(_e(ex.error_log)) |
763
|
|
|
#raise MetadataException( |
764
|
|
|
# "XML schema validation failed: %s" % name) |
765
|
|
|
return t |
766
|
|
|
|
767
|
|
|
def error_set(self, url, title, ex): |
768
|
|
|
""" |
769
|
|
|
Creates an "error" EntitiesDescriptor - empty but for an annotation about the error that occured |
770
|
|
|
""" |
771
|
|
|
t = etree.Element("{%s}EntitiesDescriptor" % |
772
|
|
|
NS['md'], Name=url, nsmap=NS) |
773
|
|
|
self.annotate(t, "error", title, ex, source=url) |
774
|
|
|
|
775
|
|
|
def keys(self): |
776
|
|
|
return self.md.keys() |
777
|
|
|
|
778
|
|
|
def __getitem__(self, item): |
779
|
|
|
return self.md[item] |
780
|
|
|
|
781
|
|
|
def __setitem__(self, key, value): |
782
|
|
|
self.md[key] = value |
783
|
|
|
|
784
|
|
|
def __delitem__(self, key): |
785
|
|
|
del self.md[key] |
786
|
|
|
|
787
|
|
|
def summary(self, uri): |
788
|
|
|
""" |
789
|
|
|
:param uri: An EntitiesDescriptor URI present in the MDRepository |
790
|
|
|
:return: an information dict |
791
|
|
|
|
792
|
|
|
Returns a dict object with basic information about the EntitiesDescriptor |
793
|
|
|
""" |
794
|
|
|
seen = dict() |
795
|
|
|
info = dict() |
796
|
|
|
t = root(self[uri]) |
797
|
|
|
info['Name'] = t.get('Name', uri) |
798
|
|
|
info['cacheDuration'] = t.get('cacheDuration', None) |
799
|
|
|
info['validUntil'] = t.get('validUntil', None) |
800
|
|
|
info['Duplicates'] = [] |
801
|
|
|
info['Size'] = 0 |
802
|
|
|
for e in self.entities(self[uri]): |
803
|
|
|
entityID = e.get('entityID') |
804
|
|
|
if seen.get(entityID, False): |
805
|
|
|
info['Duplicates'].append(entityID) |
806
|
|
|
else: |
807
|
|
|
seen[entityID] = True |
808
|
|
|
info['Size'] += 1 |
809
|
|
|
|
810
|
|
|
return info |
811
|
|
|
|
812
|
|
|
def merge(self, t, nt, strategy=pyff.merge_strategies.replace_existing, strategy_name=None): |
813
|
|
|
""" |
814
|
|
|
:param t: The EntitiesDescriptor element to merge *into* |
815
|
|
|
:param nt: The EntitiesDescriptor element to merge *from* |
816
|
|
|
:param strategy: A callable implementing the merge strategy pattern |
817
|
|
|
:param strategy_name: The name of a strategy to import. Overrides the callable if present. |
818
|
|
|
:return: |
819
|
|
|
|
820
|
|
|
Two EntitiesDescriptor elements are merged - the second into the first. For each element |
821
|
|
|
in the second collection that is present (using the @entityID attribute as key) in the |
822
|
|
|
first the strategy callable is called with the old and new EntityDescriptor elements |
823
|
|
|
as parameters. The strategy callable thus must implement the following pattern: |
824
|
|
|
|
825
|
|
|
:param old_e: The EntityDescriptor from t |
826
|
|
|
:param e: The EntityDescriptor from nt |
827
|
|
|
:return: A merged EntityDescriptor element |
828
|
|
|
|
829
|
|
|
Before each call to strategy old_e is removed from the MDRepository index and after |
830
|
|
|
merge the resultant EntityDescriptor is added to the index before it is used to |
831
|
|
|
replace old_e in t. |
832
|
|
|
""" |
833
|
|
|
if strategy_name is not None: |
834
|
|
|
if not '.' in strategy_name: |
835
|
|
|
strategy_name = "pyff.merge_strategies.%s" % strategy_name |
836
|
|
|
(mn, sep, fn) = strategy_name.rpartition('.') |
837
|
|
|
#log.debug("import %s from %s" % (fn,mn)) |
838
|
|
|
module = None |
839
|
|
|
if '.' in mn: |
840
|
|
|
(pn, sep, modn) = mn.rpartition('.') |
841
|
|
|
module = getattr(__import__( |
842
|
|
|
pn, globals(), locals(), [modn], -1), modn) |
843
|
|
|
else: |
844
|
|
|
module = __import__(mn, globals(), locals(), [], -1) |
845
|
|
|
# we might aswell let this fail early if the strategy is wrongly |
846
|
|
|
# named |
847
|
|
|
strategy = getattr(module, fn) |
848
|
|
|
|
849
|
|
|
if strategy is None: |
850
|
|
|
raise MetadataException("No merge strategy - refusing to merge") |
851
|
|
|
|
852
|
|
|
for e in nt.findall(".//{%s}EntityDescriptor" % NS['md']): |
853
|
|
|
entityID = e.get("entityID") |
854
|
|
|
# we assume ddup:ed tree |
855
|
|
|
old_e = t.find( |
856
|
|
|
".//{%s}EntityDescriptor[@entityID='%s']" % (NS['md'], entityID)) |
857
|
|
|
#log.debug("merging %s into %s" % (e,old_e)) |
858
|
|
|
# update index! |
859
|
|
|
|
860
|
|
|
try: |
861
|
|
|
self.index.remove(old_e) |
862
|
|
|
#log.debug("removed old entity from index") |
863
|
|
|
strategy(old_e, e) |
864
|
|
|
new_e = t.find( |
865
|
|
|
".//{%s}EntityDescriptor[@entityID='%s']" % (NS['md'], entityID)) |
866
|
|
|
if new_e: |
867
|
|
|
# we don't know which strategy was employed |
868
|
|
|
self.index.add(new_e) |
869
|
|
|
except Exception, ex: |
870
|
|
|
traceback.print_exc() |
871
|
|
|
self.index.add(old_e) |
872
|
|
|
raise ex |
873
|
|
|
|