Completed
Push — master ( 1b52dc...01dbab )
by Koen
01:05
created

MemoryProvider.finder()   A

Complexity

Conditions 2

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 5
rs 9.4285
1
# -*- coding: utf-8 -*-
2
3
'''This module provides an abstraction of controlled vocabularies.
4
5
This abstraction allows our application to work with both local and remote
6
vocabs (be they SOAP, REST, XML-RPC or something else).
7
8
The basic idea is that we have skos providers. Each provider is an instance
9
of a :class:`VocabularyProvider`. The same class can thus be reused with
10
different configurations to handle different vocabs. Generally speaking, every
11
instance of a certain :class:`VocabularyProvider` will deal with concepts and
12
collections from a single conceptscheme.
13
'''
14
15
from __future__ import unicode_literals
16
17
import abc
18
import copy
19
import logging
20
from operator import methodcaller
21
22
from .skos import (
23
    Concept,
24
    Collection,
25
    ConceptScheme
26
)
27
28
from .uri import (
29
    DefaultUrnGenerator,
30
    DefaultConceptSchemeUrnGenerator
31
)
32
33
log = logging.getLogger(__name__)
34
35
36
class VocabularyProvider:
37
    '''An interface that all vocabulary providers must follow.
38
    '''
39
40
    __metaclass__ = abc.ABCMeta
41
42
    concept_scheme = None
43
    '''The :class:`~skosprovider.skos.ConceptScheme` this provider serves.'''
44
45
    uri_generator = None
46
    '''The :class:`~skosprovider.uri.UriGenerator` responsible for generating
47
    uris for this provider.'''
48
49
    def __init__(self, metadata, **kwargs):
50
        '''Create a new provider and register some metadata.
51
52
53
        :param uri_generator: An object that implements the
54
            :class:`skosprovider.uri.UriGenerator` interface.
55
        :param concept_scheme: A :class:`~skosprovider.skos.ConceptScheme`. If
56
            not present, a default :class:`~skosprovider.skos.ConceptScheme`
57
            will be created with a uri generated by the
58
            :class:`~skosprovider.uri.DefaultConceptSchemeUrnGenerator` in
59
            combination with the provider `id`.
60
        :param dict metadata: Metadata essential to this provider. Expected
61
            metadata:
62
63
                * `id`: A unique identifier for the vocabulary. Required.
64
                * `default_language`: Used to determine what language to use when \
65
                    returning labels if no language is specified. Will default \
66
                    to `en` if not specified.
67
                * `subject`: A list of subjects or tags that define what the \
68
                    provider is about or what the provider can handle. This \
69
                    information can then be used when querying a \
70
                    :class:`~skosprovider.registry.Registry` for providers.
71
        '''
72
        if 'subject' not in metadata:
73
            metadata['subject'] = []
74
        self.metadata = metadata
75
        if 'uri_generator' in kwargs:
76
            self.uri_generator = kwargs.get('uri_generator')
77
        else:
78
            self.uri_generator = DefaultUrnGenerator(self.metadata.get('id'))
79
        if 'concept_scheme' in kwargs:
80
            self.concept_scheme = kwargs.get('concept_scheme')
81
        else:
82
            self.concept_scheme = ConceptScheme(
83
                uri=DefaultConceptSchemeUrnGenerator().generate(
84
                    id=self.metadata.get('id')
85
                )
86
            )
87
88
    def _get_language(self, **kwargs):
89
        '''Determine what language to render labels in.
90
91
        Will first check if there's a language keyword specified in **kwargs.
92
        If not, will check the default language of the provider. If there's no
93
        default language, will fall back to 'en'.
94
95
        :rtype: str
96
        '''
97
        return kwargs.get(
98
            'language',
99
            self.metadata.get('default_language', 'en')
100
        )
101
102
    def _get_sort(self, **kwargs):
103
        '''Determine on what attribute to sort.
104
105
        :rtype: str
106
        '''
107
        return kwargs.get('sort', None)
108
109
    def _get_sort_order(self, **kwargs):
110
        '''Determine the sort order.
111
112
        :rtype: str
113
        :returns: 'asc' or 'desc'
114
        '''
115
        return kwargs.get('sort_order', 'asc')
116
117
    def _sort(self, concepts, sort=None, language='any', reverse=False):
118
        '''
119
        Returns a sorted version of a list of concepts. Will leave the original
120
        list unsorted.
121
122
        :param list concepts: A list of concepts and collections.
123
        :param string sort: What to sort on: `id`, `label` or `sortlabel`
124
        :param string language: Language to use when sorting on `label` or
125
            `sortlabel`.
126
        :param boolean reverse: Reverse the sort order?
127
        :rtype: list
128
        '''
129
        sorted = copy.copy(concepts)
130
        if sort:
131
            sorted.sort(key=methodcaller('_sortkey', sort, language), reverse=reverse)
132
        return sorted
133
134
    def get_vocabulary_id(self):
135
        '''Get an identifier for the vocabulary.
136
137
        :rtype: String or number.
138
        '''
139
        return self.metadata.get('id')
140
141
    def get_metadata(self):
142
        '''Get some metadata on the provider or the vocab it represents.
143
144
        :rtype: Dict.
145
        '''
146
        return self.metadata
147
148
    @abc.abstractmethod
149
    def get_by_id(self, id):
150
        '''Get all information on a concept or collection, based on id.
151
152
        Providers should assume that all id's passed are strings. If a provider
153
        knows that internally it uses numeric identifiers, it's up to the
154
        provider to do the typecasting. Generally, this should not be done by
155
        changing the id's themselves (eg. from int to str), but by doing the
156
        id comparisons in a type agnostic way.
157
158
        Since this method could be used to find both concepts and collections,
159
        it's assumed that there are no id collisions between concepts and
160
        collections.
161
162
        :rtype: :class:`skosprovider.skos.Concept` or
163
            :class:`skosprovider.skos.Collection` or `False` if the concept or
164
            collection is unknown to the provider.
165
        '''
166
167
    @abc.abstractmethod
168
    def get_by_uri(self, uri):
169
        '''Get all information on a concept or collection, based on a
170
        :term:`URI`.
171
172
        :rtype: :class:`skosprovider.skos.Concept` or
173
            :class:`skosprovider.skos.Collection` or `False` if the concept or
174
            collection is unknown to the provider.
175
        '''
176
177
    @abc.abstractmethod
178
    def get_all(self, **kwargs):
179
        '''Returns all concepts and collections in this provider.
180
181
        :param string language: Optional. If present, it should be a
182
            :term:`language-tag`. This language-tag is passed on to the
183
            underlying providers and used when selecting the label to display
184
            for each concept.
185
        :param string sort: Optional. If present, it should either be `id`,
186
            `label` or `sortlabel`. The `sortlabel` option means the providers should
187
            take into account any `sortLabel` if present, if not it will
188
            fallback to a regular label to sort on.
189
        :param string sort_order: Optional. What order to sort in: `asc` or
190
            `desc`. Defaults to `asc`
191
192
        :returns: A :class:`lst` of concepts and collections. Each of these is a dict
193
            with the following keys:
194
195
            * id: id within the conceptscheme
196
            * uri: :term:`uri` of the concept or collection
197
            * type: concept or collection
198
            * label: A label to represent the concept or collection. It is \
199
                determined by looking at the `language` parameter, the default \
200
                language of the provider and finally falls back to `en`.
201
202
        '''
203
204
    @abc.abstractmethod
205
    def get_top_concepts(self, **kwargs):
206
        '''
207
        Returns all top-level concepts in this provider.
208
209
        Top-level concepts are concepts that have no broader concepts
210
        themselves. They might have narrower concepts, but this is not
211
        mandatory.
212
213
        :param string language: Optional. If present, it should be a
214
            :term:`language-tag`. This language-tag is passed on to the
215
            underlying providers and used when selecting the label to display
216
            for each concept.
217
        :param string sort: Optional. If present, it should either be `id`,
218
            `label` or `sortlabel`. The `sortlabel` option means the providers should
219
            take into account any `sortLabel` if present, if not it will
220
            fallback to a regular label to sort on.
221
        :param string sort_order: Optional. What order to sort in: `asc` or
222
            `desc`. Defaults to `asc`
223
224
        :returns: A :class:`lst` of concepts, NOT collections. Each of these
225
            is a dict with the following keys:
226
227
            * id: id within the conceptscheme
228
            * uri: :term:`uri` of the concept or collection
229
            * type: concept or collection
230
            * label: A label to represent the concept or collection. It is \
231
                determined by looking at the `language` parameter, the default \
232
                language of the provider and finally falls back to `en`.
233
234
        '''
235
236
    @abc.abstractmethod
237
    def find(self, query, **kwargs):
238
        '''Find concepts that match a certain query.
239
240
        Currently query is expected to be a dict, so that complex queries can
241
        be passed. You can use this dict to search for concepts or collections
242
        with a certain label, with a certain type and for concepts that belong
243
        to a certain collection.
244
245
        .. code-block:: python
246
247
            # Find anything that has a label of church.
248
            provider.find({'label': 'church'})
249
250
            # Find all concepts that are a part of collection 5.
251
            provider.find({'type': 'concept', 'collection': {'id': 5})
252
253
            # Find all concepts, collections or children of these
254
            # that belong to collection 5.
255
            provider.find({'collection': {'id': 5, 'depth': 'all'})
256
257
            # Find anything that has a label of church.
258
            # Preferentially display a label in Dutch.
259
            provider.find({'label': 'church'}, language='nl')
260
261
        :param query: A dict that can be used to express a query. The following
262
            keys are permitted:
263
264
            * `label`: Search for something with this label value. An empty \
265
                label is equal to searching for all concepts.
266
            * `type`: Limit the search to certain SKOS elements. If not \
267
                present `all` is assumed:
268
269
                * `concept`: Only return :class:`skosprovider.skos.Concept` \
270
                    instances.
271
                * `collection`: Only return \
272
                    :class:`skosprovider.skos.Collection` instances.
273
                * `all`: Return both :class:`skosprovider.skos.Concept` and \
274
                    :class:`skosprovider.skos.Collection` instances.
275
            * `collection`: Search only for concepts belonging to a certain \
276
                collection. This argument should be a dict with two keys:
277
278
                * `id`: The id of a collection. Required.
279
                * `depth`: Can be `members` or `all`. Optional. If not \
280
                    present, `members` is assumed, meaning only concepts or \
281
                    collections that are a direct member of the collection \
282
                    should be considered. When set to `all`, this method \
283
                    should return concepts and collections that are a member \
284
                    of the collection or are a narrower concept of a member \
285
                    of the collection.
286
287
        :param string language: Optional. If present, it should be a
288
            :term:`language-tag`. This language-tag is passed on to the
289
            underlying providers and used when selecting the label to display
290
            for each concept.
291
        :param string sort: Optional. If present, it should either be `id`,
292
            `label` or `sortlabel`. The `sortlabel` option means the providers should
293
            take into account any `sortLabel` if present, if not it will
294
            fallback to a regular label to sort on.
295
        :param string sort_order: Optional. What order to sort in: `asc` or
296
            `desc`. Defaults to `asc`
297
298
        :returns: A :class:`lst` of concepts and collections. Each of these
299
            is a dict with the following keys:
300
301
            * id: id within the conceptscheme
302
            * uri: :term:`uri` of the concept or collection
303
            * type: concept or collection
304
            * label: A label to represent the concept or collection. It is \
305
                determined by looking at the `language` parameter, the default \
306
                language of the provider and finally falls back to `en`.
307
308
        '''
309
310
    @abc.abstractmethod
311
    def expand(self, id):
312
        '''Expand a concept or collection to all it's narrower
313
        concepts.
314
315
        This method should recurse and also return narrower concepts
316
        of narrower concepts.
317
318
        If the id passed belongs to a :class:`skosprovider.skos.Concept`,
319
        the id of the concept itself should be include in the return value.
320
321
        If the id passed belongs to a :class:`skosprovider.skos.Collection`,
322
        the id of the collection itself must not be present in the return value
323
        In this case the return value includes all the member concepts and
324
        their narrower concepts.
325
326
        :param id: A concept or collection id.
327
        :rtype: A list of id's or `False` if the concept or collection doesn't
328
            exist.
329
        '''
330
331
    def get_top_display(self, **kwargs):
332
        '''
333
        Returns all concepts or collections that form the top-level of a
334
        display hierarchy.
335
336
        As opposed to the :meth:`get_top_concepts`, this method can possibly
337
        return both concepts and collections.
338
339
        :param string language: Optional. If present, it should be a
340
            :term:`language-tag`. This language-tag is passed on to the
341
            underlying providers and used when selecting the label to display
342
            for each concept.
343
        :param string sort: Optional. If present, it should either be `id`,
344
            `label` or `sortlabel`. The `sortlabel` option means the providers should
345
            take into account any `sortLabel` if present, if not it will
346
            fallback to a regular label to sort on.
347
        :param string sort_order: Optional. What order to sort in: `asc` or
348
            `desc`. Defaults to `asc`
349
350
        :returns: A :class:`lst` of concepts and collections. Each of these
351
            is a dict with the following keys:
352
353
            * id: id within the conceptscheme
354
            * uri: :term:`uri` of the concept or collection
355
            * type: concept or collection
356
            * label: A label to represent the concept or collection. It is\
357
                determined by looking at the `language` parameter, the default\
358
                language of the provider and finally falls back to `en`.
359
360
        '''
361
362
    def get_children_display(self, id, **kwargs):
363
        '''
364
        Return a list of concepts or collections that should be displayed
365
        under this concept or collection.
366
367
        :param string language: Optional. If present, it should be a
368
            :term:`language-tag`. This language-tag is passed on to the
369
            underlying providers and used when selecting the label to display
370
            for each concept.
371
        :param string sort: Optional. If present, it should either be `id`,
372
            `label` or `sortlabel`. The `sortlabel` option means the providers should
373
            take into account any `sortLabel` if present, if not it will
374
            fallback to a regular label to sort on.
375
        :param string sort_order: Optional. What order to sort in: `asc` or
376
            `desc`. Defaults to `asc`
377
378
        :param str id: A concept or collection id.
379
        :returns: A :class:`lst` of concepts and collections. Each of these
380
            is a dict with the following keys:
381
382
            * id: id within the conceptscheme
383
            * uri: :term:`uri` of the concept or collection
384
            * type: concept or collection
385
            * label: A label to represent the concept or collection. It is \
386
                determined by looking at the `language` parameter, the default \
387
                language of the provider and finally falls back to `en`.
388
389
        '''
390
391
392
class MemoryProvider(VocabularyProvider):
393
    '''
394
    A provider that keeps everything in memory.
395
396
    The data is passed in the constructor of this provider as a :class:`lst` of
397
    :class:`skosprovider.skos.Concept` and :class:`skosprovider.skos.Collection`
398
    instances.
399
    '''
400
401
    case_insensitive = True
402
    '''
403
    Is searching for labels case insensitive?
404
405
    By default a search for a label is done case insensitive. Older versions of
406
    this provider were case sensitive. If this behaviour is desired, this can
407
    be triggered by providing a `case_insensitive` keyword to the constructor.
408
    '''
409
410
    def __init__(self, metadata, list, **kwargs):
411
        '''
412
        :param dict metadata: A dictionary with keywords like language.
413
        :param list list: A list of :class:`skosprovider.skos.Concept` and
414
            :class:`skosprovider.skos.Collection` instances.
415
        :param Boolean case_insensitive: Should searching for labels be done
416
            case-insensitive?
417
        '''
418
        super(MemoryProvider, self).__init__(metadata, **kwargs)
419
        self.list = list
420
        if 'case_insensitive' in kwargs:
421
            self.case_insensitive = kwargs['case_insensitive']
422
423
    def get_by_id(self, id):
424
        id = str(id)
425
        for c in self.list:
426
            if str(c.id) == id:
427
                return c
428
        return False
429
430
    def get_by_uri(self, uri):
431
        uri = str(uri)
432
        for c in self.list:
433
            if str(c.uri) == uri:
434
                return c
435
        return False
436
437
    def find(self, query, **kwargs):
438
        filtered = [c for c in self.list if self._include_in_find(c, query)]
439
        language = self._get_language(**kwargs)
440
        sort = self._get_sort(**kwargs)
441
        sort_order = self._get_sort_order(**kwargs)
442
        return [self._get_find_dict(c, **kwargs) for c in self._sort(filtered, sort, language, sort_order == 'desc')]
443
444
    def _include_in_find(self, c, query):
445
        '''
446
        :param c: A :class:`skosprovider.skos.Concept` or
447
            :class:`skosprovider.skos.Collection`.
448
        :param query: A dict that can be used to express a query.
449
        :rtype: boolean
450
        '''
451
        include = True
452
        if include and 'type' in query and query['type'] != 'all':
453
            include = query['type'] == c.type
454
        if include and 'label' in query:
455
            def finder(l, query):
456
                if not self.case_insensitive:
457
                    return l.label.find(query['label'])
458
                else:
459
                    return l.label.upper().find(query['label'].upper())
460
            include = any([finder(l, query) >= 0 for l in c.labels])
461
        if include and 'collection' in query:
462
            coll = self.get_by_id(query['collection']['id'])
463
            if not coll or not isinstance(coll, Collection):
464
                raise ValueError(
465
                    'You are searching for items in an unexisting collection.'
466
                )
467
            if 'depth' in query['collection'] and query['collection']['depth'] == 'all':
468
                members = self.expand(coll.id)
469
            else:
470
                members = coll.members
471
            include = any([True for id in members if str(id) == str(c.id)]) 
472
        return include
473
474
    def _get_find_dict(self, c, **kwargs):
475
        '''
476
        Return a dict that can be used in the return list of the :meth:`find`
477
        method.
478
479
        :param c: A :class:`skosprovider.skos.Concept` or
480
            :class:`skosprovider.skos.Collection`.
481
        :rtype: dict
482
        '''
483
        language = self._get_language(**kwargs)
484
        return {
485
            'id': c.id,
486
            'uri': c.uri,
487
            'type': c.type,
488
            'label': None if c.label() is None else c.label(language).label
489
        }
490
491
    def get_all(self, **kwargs):
492
        language = self._get_language(**kwargs)
493
        sort = self._get_sort(**kwargs)
494
        sort_order = self._get_sort_order(**kwargs)
495
        return [self._get_find_dict(c, **kwargs) for c in self._sort(self.list, sort, language, sort_order == 'desc')]
496
497
    def get_top_concepts(self, **kwargs):
498
        language = self._get_language(**kwargs)
499
        sort = self._get_sort(**kwargs)
500
        sort_order = self._get_sort_order(**kwargs)
501
        tc = [c for c in self.list if isinstance(c, Concept) and len(c.broader) == 0]
502
        return [self._get_find_dict(c, **kwargs) for c in self._sort(tc, sort, language, sort_order == 'desc')]
503
504
    def expand(self, id):
505
        id = str(id)
506
        for c in self.list:
507
            if str(c.id) == id:
508
                if isinstance(c, Concept):
509
                    ret = set([c.id])
510
                    for cid in c.narrower:
511
                        ret |= set(self.expand(cid))
512
                    return list(ret)
513
                elif isinstance(c, Collection):
514
                    ret = set([])
515
                    for m in c.members:
516
                        ret |= set(self.expand(m))
517
                    return list(ret)
518
        return False
519
520
    def get_top_display(self, **kwargs):
521
        language = self._get_language(**kwargs)
522
        sort = self._get_sort(**kwargs)
523
        sort_order = self._get_sort_order(**kwargs)
524
        td = [c for c in self.list if
525
              (isinstance(c, Concept) and len(c.broader) == 0 and len(c.member_of) == 0) or
526
              (isinstance(c, Collection) and len(c.superordinates) == 0 and len(c.member_of) == 0)]
527
        return [
528
            {
529
                'id': c.id,
530
                'uri': c.uri,
531
                'type': c.type,
532
                'label': None if c.label() is None else c.label(language).label
533
            } for c in self._sort(td, sort, language, sort_order == 'desc')]
534
535
    def get_children_display(self, id, **kwargs):
536
        c = self.get_by_id(id)
537
        if not c:
538
            return False
539
        language = self._get_language(**kwargs)
540
        sort = self._get_sort(**kwargs)
541
        sort_order = self._get_sort_order(**kwargs)
542
        if isinstance(c, Concept):
543
            if len(c.subordinate_arrays) == 0:
544
                display_children = c.narrower
545
            else:
546
                display_children = c.subordinate_arrays
547
        else:
548
            display_children = c.members
549
        dc = [self.get_by_id(dcid) for dcid in display_children]
550
        return [
551
            {
552
                'id': co.id,
553
                'uri': co.uri,
554
                'type': co.type,
555
                'label': None if co.label() is None else co.label(language).label
556
            } for co in self._sort(dc, sort, language, sort_order == 'desc')]
557
558
559
class DictionaryProvider(MemoryProvider):
560
    '''A simple vocab provider that use a python list of dicts.
561
562
    The provider expects a list with elements that are dicts that represent
563
    the concepts.
564
    '''
565
566
    def __init__(self, metadata, list, **kwargs):
567
        super(DictionaryProvider, self).__init__(metadata, [], **kwargs)
568
        self.list = [self._from_dict(c) for c in list]
569
570
    def _from_dict(self, data):
571
        if 'type' in data and data['type'] == 'collection':
572
            return Collection(
573
                id=data['id'],
574
                uri=data.get('uri') if data.get('uri') is not None else self.uri_generator.generate(type='collection', id=data['id']),
575
                concept_scheme=self.concept_scheme,
576
                labels=data.get('labels', []),
577
                notes=data.get('notes', []),
578
                sources=data.get('sources', []),
579
                members=data.get('members', []),
580
                member_of=data.get('member_of', []),
581
                superordinates=data.get('superordinates', [])
582
            )
583
        else:
584
            return Concept(
585
                id=data['id'],
586
                uri=data.get('uri') if data.get('uri') is not None else self.uri_generator.generate(type='collection', id=data['id']),
587
                concept_scheme=self.concept_scheme,
588
                labels=data.get('labels', []),
589
                notes=data.get('notes', []),
590
                sources=data.get('sources', []),
591
                broader=data.get('broader', []),
592
                narrower=data.get('narrower', []),
593
                related=data.get('related', []),
594
                member_of=data.get('member_of', []),
595
                subordinate_arrays=data.get('subordinate_arrays', []),
596
                matches=data.get('matches', {})
597
            )
598
599
600
class SimpleCsvProvider(MemoryProvider):
601
    '''
602
    A provider that reads a simple csv format into memory.
603
604
    The supported csv format looks like this:
605
    <id>,<preflabel>,<note>,<source>
606
607
    This provider essentialy provides a flat list of concepts. This is commonly
608
    associated with short lookup-lists.
609
610
    .. versionadded:: 0.2.0
611
    '''
612
613
    def __init__(self, metadata, reader, **kwargs):
614
        '''
615
        :param metadata: A metadata dictionary.
616
        :param reader: A csv reader.
617
        '''
618
        super(SimpleCsvProvider, self).__init__(metadata, [], **kwargs)
619
        self.list = [self._from_row(row) for row in reader]
620
621
    def _from_row(self, row):
622
        id = row[0]
623
        labels = [{'label': row[1], 'type':'prefLabel'}]
624
        if len(row) > 2 and row[2]:
625
            notes = [{'note': row[2], 'type':'note'}]
626
        else:
627
            notes = []
628
        if len(row) > 3 and row[3]:
629
            sources = [{'citation': 'My citation.'}]
630
        else:
631
            sources = []
632
        return Concept(
633
            id=id,
634
            uri=self.uri_generator.generate(type='concept', id=id),
635
            labels=labels,
636
            notes=notes,
637
            sources=sources
638
        )
639