Completed
Push — master ( 48d743...cc73ec )
by Koen
01:02
created

DictionaryProvider._from_dict()   B

Complexity

Conditions 5

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 5
dl 0
loc 27
rs 8.0894
1
# -*- coding: utf-8 -*-
2
3
'''This module provides an abstraction of controlled vocabularies.
4
5
This abstraction allows our application to work with both local and remote
6
vocabs (be they SOAP, REST, XML-RPC or something else).
7
8
The basic idea is that we have skos providers. Each provider is an instance
9
of a :class:`VocabularyProvider`. The same class can thus be reused with
10
different configurations to handle different vocabs. Generally speaking, every
11
instance of a certain :class:`VocabularyProvider` will deal with concepts and
12
collections from a single conceptscheme.
13
'''
14
15
from __future__ import unicode_literals
16
17
import abc
18
import copy
19
import logging
20
from operator import methodcaller
21
22
from .skos import (
23
    Concept,
24
    Collection,
25
    ConceptScheme
26
)
27
28
from .uri import (
29
    DefaultUrnGenerator,
30
    DefaultConceptSchemeUrnGenerator
31
)
32
33
log = logging.getLogger(__name__)
34
35
36
class VocabularyProvider:
37
    '''An interface that all vocabulary providers must follow.
38
    '''
39
40
    __metaclass__ = abc.ABCMeta
41
42
    concept_scheme = None
43
    '''The :class:`~skosprovider.skos.ConceptScheme` this provider serves.'''
44
45
    uri_generator = None
46
    '''The :class:`~skosprovider.uri.UriGenerator` responsible for generating
47
    uris for this provider.'''
48
49
    def __init__(self, metadata, **kwargs):
50
        '''Create a new provider and register some metadata.
51
52
53
        :param uri_generator: An object that implements the
54
            :class:`skosprovider.uri.UriGenerator` interface.
55
        :param concept_scheme: A :class:`~skosprovider.skos.ConceptScheme`. If
56
            not present, a default :class:`~skosprovider.skos.ConceptScheme`
57
            will be created with a uri generated by the
58
            :class:`~skosprovider.uri.DefaultConceptSchemeUrnGenerator` in
59
            combination with the provider `id`.
60
        :param dict metadata: Metadata essential to this provider. Expected
61
            metadata:
62
63
                * `id`: A unique identifier for the vocabulary. Required.
64
                * `default_language`: Used to determine what language to use when \
65
                    returning labels if no language is specified. Will default \
66
                    to `en` if not specified.
67
                * `subject`: A list of subjects or tags that define what the \
68
                    provider is about or what the provider can handle. This \
69
                    information can then be used when querying a \
70
                    :class:`~skosprovider.registry.Registry` for providers.
71
        '''
72
        if 'subject' not in metadata:
73
            metadata['subject'] = []
74
        self.metadata = metadata
75
        if 'uri_generator' in kwargs:
76
            self.uri_generator = kwargs.get('uri_generator')
77
        else:
78
            self.uri_generator = DefaultUrnGenerator(self.metadata.get('id'))
79
        if 'concept_scheme' in kwargs:
80
            self.concept_scheme = kwargs.get('concept_scheme')
81
        else:
82
            self.concept_scheme = ConceptScheme(
83
                uri=DefaultConceptSchemeUrnGenerator().generate(
84
                    id=self.metadata.get('id')
85
                )
86
            )
87
88
    def _get_language(self, **kwargs):
89
        '''Determine what language to render labels in.
90
91
        Will first check if there's a language keyword specified in **kwargs.
92
        If not, will check the default language of the provider. If there's no
93
        default language, will fall back to 'en'.
94
95
        :rtype: str
96
        '''
97
        return kwargs.get(
98
            'language',
99
            self.metadata.get('default_language', 'en')
100
        )
101
102
    def _get_sort(self, **kwargs):
103
        '''Determine on what attribute to sort.
104
105
        :rtype: str
106
        '''
107
        return kwargs.get('sort', None)
108
109
    def _get_sort_order(self, **kwargs):
110
        '''Determine the sort order.
111
112
        :rtype: str
113
        :returns: 'asc' or 'desc'
114
        '''
115
        return kwargs.get('sort_order', 'asc')
116
117
    def _sort(self, concepts, sort=None, language='any', reverse=False):
118
        '''
119
        Returns a sorted version of a list of concepts. Will leave the original
120
        list unsorted.
121
122
        :param list concepts: A list of concepts and collections.
123
        :param string sort: What to sort on: `id`, `label` or `sortlabel`
124
        :param string language: Language to use when sorting on `label` or
125
            `sortlabel`.
126
        :param boolean reverse: Reverse the sort order?
127
        :rtype: list
128
        '''
129
        sorted = copy.copy(concepts)
130
        if sort:
131
            sorted.sort(key=methodcaller('_sortkey', sort, language), reverse=reverse)
132
        return sorted
133
134
    def get_vocabulary_id(self):
135
        '''Get an identifier for the vocabulary.
136
137
        :rtype: String or number.
138
        '''
139
        return self.metadata.get('id')
140
141
    def get_metadata(self):
142
        '''Get some metadata on the provider or the vocab it represents.
143
144
        :rtype: Dict.
145
        '''
146
        return self.metadata
147
148
    @abc.abstractmethod
149
    def get_by_id(self, id):
150
        '''Get all information on a concept or collection, based on id.
151
152
        Providers should assume that all id's passed are strings. If a provider
153
        knows that internally it uses numeric identifiers, it's up to the
154
        provider to do the typecasting. Generally, this should not be done by
155
        changing the id's themselves (eg. from int to str), but by doing the
156
        id comparisons in a type agnostic way.
157
158
        Since this method could be used to find both concepts and collections,
159
        it's assumed that there are no id collisions between concepts and
160
        collections.
161
162
        :rtype: :class:`skosprovider.skos.Concept` or
163
            :class:`skosprovider.skos.Collection` or `False` if the concept or
164
            collection is unknown to the provider.
165
        '''
166
167
    @abc.abstractmethod
168
    def get_by_uri(self, uri):
169
        '''Get all information on a concept or collection, based on a
170
        :term:`URI`.
171
172
        :rtype: :class:`skosprovider.skos.Concept` or
173
            :class:`skosprovider.skos.Collection` or `False` if the concept or
174
            collection is unknown to the provider.
175
        '''
176
177
    @abc.abstractmethod
178
    def get_all(self, **kwargs):
179
        '''Returns all concepts and collections in this provider.
180
181
        :param string language: Optional. If present, it should be a
182
            :term:`language-tag`. This language-tag is passed on to the
183
            underlying providers and used when selecting the label to display
184
            for each concept.
185
        :param string sort: Optional. If present, it should either be `id`,
186
            `label` or `sortlabel`. The `sortlabel` option means the providers should
187
            take into account any `sortLabel` if present, if not it will
188
            fallback to a regular label to sort on.
189
        :param string sort_order: Optional. What order to sort in: `asc` or
190
            `desc`. Defaults to `asc`
191
192
        :returns: A :class:`lst` of concepts and collections. Each of these is a dict
193
            with the following keys:
194
195
            * id: id within the conceptscheme
196
            * uri: :term:`uri` of the concept or collection
197
            * type: concept or collection
198
            * label: A label to represent the concept or collection. It is \
199
                determined by looking at the `language` parameter, the default \
200
                language of the provider and finally falls back to `en`.
201
202
        '''
203
204
    @abc.abstractmethod
205
    def get_top_concepts(self, **kwargs):
206
        '''
207
        Returns all top-level concepts in this provider.
208
209
        Top-level concepts are concepts that have no broader concepts
210
        themselves. They might have narrower concepts, but this is not
211
        mandatory.
212
213
        :param string language: Optional. If present, it should be a
214
            :term:`language-tag`. This language-tag is passed on to the
215
            underlying providers and used when selecting the label to display
216
            for each concept.
217
        :param string sort: Optional. If present, it should either be `id`,
218
            `label` or `sortlabel`. The `sortlabel` option means the providers should
219
            take into account any `sortLabel` if present, if not it will
220
            fallback to a regular label to sort on.
221
        :param string sort_order: Optional. What order to sort in: `asc` or
222
            `desc`. Defaults to `asc`
223
224
        :returns: A :class:`lst` of concepts, NOT collections. Each of these
225
            is a dict with the following keys:
226
227
            * id: id within the conceptscheme
228
            * uri: :term:`uri` of the concept or collection
229
            * type: concept or collection
230
            * label: A label to represent the concept or collection. It is \
231
                determined by looking at the `language` parameter, the default \
232
                language of the provider and finally falls back to `en`.
233
234
        '''
235
236
    @abc.abstractmethod
237
    def find(self, query, **kwargs):
238
        '''Find concepts that match a certain query.
239
240
        Currently query is expected to be a dict, so that complex queries can
241
        be passed. You can use this dict to search for concepts or collections
242
        with a certain label, with a certain type and for concepts that belong
243
        to a certain collection.
244
245
        .. code-block:: python
246
247
            # Find anything that has a label of church.
248
            provider.find({'label': 'church'})
249
250
            # Find all concepts that are a part of collection 5.
251
            provider.find({'type': 'concept', 'collection': {'id': 5})
252
253
            # Find all concepts, collections or children of these
254
            # that belong to collection 5.
255
            provider.find({'collection': {'id': 5, 'depth': 'all'})
256
257
            # Find anything that has a label of church.
258
            # Preferentially display a label in Dutch.
259
            provider.find({'label': 'church'}, language='nl')
260
261
        :param query: A dict that can be used to express a query. The following
262
            keys are permitted:
263
264
            * `label`: Search for something with this label value. An empty \
265
                label is equal to searching for all concepts.
266
            * `type`: Limit the search to certain SKOS elements. If not \
267
                present `all` is assumed:
268
269
                * `concept`: Only return :class:`skosprovider.skos.Concept` \
270
                    instances.
271
                * `collection`: Only return \
272
                    :class:`skosprovider.skos.Collection` instances.
273
                * `all`: Return both :class:`skosprovider.skos.Concept` and \
274
                    :class:`skosprovider.skos.Collection` instances.
275
            * `collection`: Search only for concepts belonging to a certain \
276
                collection. This argument should be a dict with two keys:
277
278
                * `id`: The id of a collection. Required.
279
                * `depth`: Can be `members` or `all`. Optional. If not \
280
                    present, `members` is assumed, meaning only concepts or \
281
                    collections that are a direct member of the collection \
282
                    should be considered. When set to `all`, this method \
283
                    should return concepts and collections that are a member \
284
                    of the collection or are a narrower concept of a member \
285
                    of the collection.
286
287
        :param string language: Optional. If present, it should be a
288
            :term:`language-tag`. This language-tag is passed on to the
289
            underlying providers and used when selecting the label to display
290
            for each concept.
291
        :param string sort: Optional. If present, it should either be `id`,
292
            `label` or `sortlabel`. The `sortlabel` option means the providers should
293
            take into account any `sortLabel` if present, if not it will
294
            fallback to a regular label to sort on.
295
        :param string sort_order: Optional. What order to sort in: `asc` or
296
            `desc`. Defaults to `asc`
297
298
        :returns: A :class:`lst` of concepts and collections. Each of these
299
            is a dict with the following keys:
300
301
            * id: id within the conceptscheme
302
            * uri: :term:`uri` of the concept or collection
303
            * type: concept or collection
304
            * label: A label to represent the concept or collection. It is \
305
                determined by looking at the `language` parameter, the default \
306
                language of the provider and finally falls back to `en`.
307
308
        '''
309
310
    @abc.abstractmethod
311
    def expand(self, id):
312
        '''Expand a concept or collection to all it's narrower
313
        concepts.
314
315
        This method should recurse and also return narrower concepts
316
        of narrower concepts.
317
318
        If the id passed belongs to a :class:`skosprovider.skos.Concept`,
319
        the id of the concept itself should be include in the return value.
320
321
        If the id passed belongs to a :class:`skosprovider.skos.Collection`,
322
        the id of the collection itself must not be present in the return value
323
        In this case the return value includes all the member concepts and
324
        their narrower concepts.
325
326
        :param id: A concept or collection id.
327
        :rtype: A list of id's or `False` if the concept or collection doesn't
328
            exist.
329
        '''
330
331
    def get_top_display(self, **kwargs):
332
        '''
333
        Returns all concepts or collections that form the top-level of a
334
        display hierarchy.
335
336
        As opposed to the :meth:`get_top_concepts`, this method can possibly
337
        return both concepts and collections.
338
339
        :param string language: Optional. If present, it should be a
340
            :term:`language-tag`. This language-tag is passed on to the
341
            underlying providers and used when selecting the label to display
342
            for each concept.
343
        :param string sort: Optional. If present, it should either be `id`,
344
            `label` or `sortlabel`. The `sortlabel` option means the providers should
345
            take into account any `sortLabel` if present, if not it will
346
            fallback to a regular label to sort on.
347
        :param string sort_order: Optional. What order to sort in: `asc` or
348
            `desc`. Defaults to `asc`
349
350
        :returns: A :class:`lst` of concepts and collections. Each of these
351
            is a dict with the following keys:
352
353
            * id: id within the conceptscheme
354
            * uri: :term:`uri` of the concept or collection
355
            * type: concept or collection
356
            * label: A label to represent the concept or collection. It is\
357
                determined by looking at the `language` parameter, the default\
358
                language of the provider and finally falls back to `en`.
359
360
        '''
361
362
    def get_children_display(self, id, **kwargs):
363
        '''
364
        Return a list of concepts or collections that should be displayed
365
        under this concept or collection.
366
367
        :param string language: Optional. If present, it should be a
368
            :term:`language-tag`. This language-tag is passed on to the
369
            underlying providers and used when selecting the label to display
370
            for each concept.
371
        :param string sort: Optional. If present, it should either be `id`,
372
            `label` or `sortlabel`. The `sortlabel` option means the providers should
373
            take into account any `sortLabel` if present, if not it will
374
            fallback to a regular label to sort on.
375
        :param string sort_order: Optional. What order to sort in: `asc` or
376
            `desc`. Defaults to `asc`
377
378
        :param str id: A concept or collection id.
379
        :returns: A :class:`lst` of concepts and collections. Each of these
380
            is a dict with the following keys:
381
382
            * id: id within the conceptscheme
383
            * uri: :term:`uri` of the concept or collection
384
            * type: concept or collection
385
            * label: A label to represent the concept or collection. It is \
386
                determined by looking at the `language` parameter, the default \
387
                language of the provider and finally falls back to `en`.
388
389
        '''
390
391
392
class MemoryProvider(VocabularyProvider):
393
    '''
394
    A provider that keeps everything in memory.
395
396
    The data is passed in the constructor of this provider as a :class:`lst` of
397
    :class:`skosprovider.skos.Concept` and :class:`skosprovider.skos.Collection`
398
    instances.
399
    '''
400
401
    case_insensitive = True
402
    '''
403
    Is searching for labels case insensitive?
404
405
    By default a search for a label is done case insensitive. Older versions of
406
    this provider were case sensitive. If this behaviour is desired, this can
407
    be triggered by providing a `case_insensitive` keyword to the constructor.
408
    '''
409
410
    def __init__(self, metadata, list, **kwargs):
411
        '''
412
        :param dict metadata: A dictionary with keywords like language.
413
        :param list list: A list of :class:`skosprovider.skos.Concept` and
414
            :class:`skosprovider.skos.Collection` instances.
415
        :param Boolean case_insensitive: Should searching for labels be done
416
            case-insensitive?
417
        '''
418
        super(MemoryProvider, self).__init__(metadata, **kwargs)
419
        self.list = list
420
        if 'case_insensitive' in kwargs:
421
            self.case_insensitive = kwargs['case_insensitive']
422
423
    def get_by_id(self, id):
424
        id = str(id)
425
        for c in self.list:
426
            if str(c.id) == id:
427
                return c
428
        return False
429
430
    def get_by_uri(self, uri):
431
        uri = str(uri)
432
        for c in self.list:
433
            if str(c.uri) == uri:
434
                return c
435
        return False
436
437
    def find(self, query, **kwargs):
438
        filtered = [c for c in self.list if self._include_in_find(c, query)]
439
        language = self._get_language(**kwargs)
440
        sort = self._get_sort(**kwargs)
441
        sort_order = self._get_sort_order(**kwargs)
442
        return [self._get_find_dict(c, **kwargs) for c in self._sort(filtered, sort, language, sort_order == 'desc')]
443
444
    def _include_in_find(self, c, query):
445
        '''
446
        :param c: A :class:`skosprovider.skos.Concept` or
447
            :class:`skosprovider.skos.Collection`.
448
        :param query: A dict that can be used to express a query.
449
        :rtype: boolean
450
        '''
451
        include = True
452
        if include and 'type' in query and query['type'] != 'all':
453
            if query['type'] == 'concept' and not isinstance(c, Concept):
454
                include = False
455
            elif query['type'] == 'collection' and not isinstance(c, Collection):
456
                include = False
457
        if include and 'label' in query:
458
            if not self.case_insensitive:
459
                finder = lambda l, query: l.label.find(query['label'])
460
            else:
461
                finder = lambda l, query: l.label.upper().find(query['label'].upper())
462
            if not any([finder(l, query) >= 0 for l in c.labels]):
463
                include = False
464
        if include and 'collection' in query:
465
            coll = self.get_by_id(query['collection']['id'])
466
            if not coll or not isinstance(coll, Collection):
467
                raise ValueError(
468
                    'You are searching for items in an unexisting collection.'
469
                )
470
            else:
471
                if 'depth' in query['collection'] and query['collection']['depth'] == 'all':
472
                    members = self.expand(coll.id)
473
                else:
474
                    members = coll.members
475
                members = [str(id) for id in members]
476
                if not str(c.id) in members:
477
                    include = False
478
        return include
479
480
    def _get_find_dict(self, c, **kwargs):
481
        '''
482
        Return a dict that can be used in the return list of the :meth:`find`
483
        method.
484
485
        :param c: A :class:`skosprovider.skos.Concept` or
486
            :class:`skosprovider.skos.Collection`.
487
        :rtype: dict
488
        '''
489
        language = self._get_language(**kwargs)
490
        return {
491
            'id': c.id,
492
            'uri': c.uri,
493
            'type': c.type,
494
            'label': None if c.label() is None else c.label(language).label
495
        }
496
497
    def get_all(self, **kwargs):
498
        language = self._get_language(**kwargs)
499
        sort = self._get_sort(**kwargs)
500
        sort_order = self._get_sort_order(**kwargs)
501
        return [self._get_find_dict(c, **kwargs) for c in self._sort(self.list, sort, language, sort_order == 'desc')]
502
503
    def get_top_concepts(self, **kwargs):
504
        language = self._get_language(**kwargs)
505
        sort = self._get_sort(**kwargs)
506
        sort_order = self._get_sort_order(**kwargs)
507
        tc = [c for c in self.list if isinstance(c, Concept) and len(c.broader) == 0]
508
        return [self._get_find_dict(c, **kwargs) for c in self._sort(tc, sort, language, sort_order == 'desc')]
509
510
    def expand(self, id):
511
        id = str(id)
512
        for c in self.list:
513
            if str(c.id) == id:
514
                if isinstance(c, Concept):
515
                    ret = set([c.id])
516
                    for cid in c.narrower:
517
                        ret |= set(self.expand(cid))
518
                    return list(ret)
519
                elif isinstance(c, Collection):
520
                    ret = set([])
521
                    for m in c.members:
522
                        ret |= set(self.expand(m))
523
                    return list(ret)
524
        return False
525
526
    def get_top_display(self, **kwargs):
527
        language = self._get_language(**kwargs)
528
        sort = self._get_sort(**kwargs)
529
        sort_order = self._get_sort_order(**kwargs)
530
        td = [c for c in self.list if
531
              (isinstance(c, Concept) and len(c.broader) == 0 and len(c.member_of) == 0) or
532
              (isinstance(c, Collection) and len(c.superordinates) == 0 and len(c.member_of) == 0)]
533
        return [
534
            {
535
                'id': c.id,
536
                'uri': c.uri,
537
                'type': c.type,
538
                'label': None if c.label() is None else c.label(language).label
539
            } for c in self._sort(td, sort, language, sort_order == 'desc')]
540
541
    def get_children_display(self, id, **kwargs):
542
        c = self.get_by_id(id)
543
        if not c:
544
            return False
545
        language = self._get_language(**kwargs)
546
        sort = self._get_sort(**kwargs)
547
        sort_order = self._get_sort_order(**kwargs)
548
        if isinstance(c, Concept):
549
            if len(c.subordinate_arrays) == 0:
550
                display_children = c.narrower
551
            else:
552
                display_children = c.subordinate_arrays
553
        else:
554
            display_children = c.members
555
        dc = [self.get_by_id(dcid) for dcid in display_children]
556
        return [
557
            {
558
                'id': co.id,
559
                'uri': co.uri,
560
                'type': co.type,
561
                'label': None if co.label() is None else co.label(language).label
562
            } for co in self._sort(dc, sort, language, sort_order == 'desc')]
563
564
565
class DictionaryProvider(MemoryProvider):
566
    '''A simple vocab provider that use a python list of dicts.
567
568
    The provider expects a list with elements that are dicts that represent
569
    the concepts.
570
    '''
571
572
    def __init__(self, metadata, list, **kwargs):
573
        super(DictionaryProvider, self).__init__(metadata, [], **kwargs)
574
        self.list = [self._from_dict(c) for c in list]
575
576
    def _from_dict(self, data):
577
        if 'type' in data and data['type'] == 'collection':
578
            return Collection(
579
                id=data['id'],
580
                uri=data.get('uri') if data.get('uri') is not None else self.uri_generator.generate(type='collection', id=data['id']),
581
                concept_scheme=self.concept_scheme,
582
                labels=data.get('labels', []),
583
                notes=data.get('notes', []),
584
                sources=data.get('sources', []),
585
                members=data.get('members', []),
586
                member_of=data.get('member_of', []),
587
                superordinates=data.get('superordinates', [])
588
            )
589
        else:
590
            return Concept(
591
                id=data['id'],
592
                uri=data.get('uri') if data.get('uri') is not None else self.uri_generator.generate(type='collection', id=data['id']),
593
                concept_scheme=self.concept_scheme,
594
                labels=data.get('labels', []),
595
                notes=data.get('notes', []),
596
                sources=data.get('sources', []),
597
                broader=data.get('broader', []),
598
                narrower=data.get('narrower', []),
599
                related=data.get('related', []),
600
                member_of=data.get('member_of', []),
601
                subordinate_arrays=data.get('subordinate_arrays', []),
602
                matches=data.get('matches', {})
603
            )
604
605
606
class SimpleCsvProvider(MemoryProvider):
607
    '''
608
    A provider that reads a simple csv format into memory.
609
610
    The supported csv format looks like this:
611
    <id>,<preflabel>,<note>,<source>
612
613
    This provider essentialy provides a flat list of concepts. This is commonly
614
    associated with short lookup-lists.
615
616
    .. versionadded:: 0.2.0
617
    '''
618
619
    def __init__(self, metadata, reader, **kwargs):
620
        '''
621
        :param metadata: A metadata dictionary.
622
        :param reader: A csv reader.
623
        '''
624
        super(SimpleCsvProvider, self).__init__(metadata, [], **kwargs)
625
        self.list = [self._from_row(row) for row in reader]
626
627
    def _from_row(self, row):
628
        id = row[0]
629
        labels = [{'label': row[1], 'type':'prefLabel'}]
630
        if len(row) > 2 and row[2]:
631
            notes = [{'note': row[2], 'type':'note'}]
632
        else:
633
            notes = []
634
        if len(row) > 3 and row[3]:
635
            sources = [{'citation': 'My citation.'}]
636
        else:
637
            sources = []
638
        return Concept(
639
            id=id,
640
            uri=self.uri_generator.generate(type='concept', id=id),
641
            labels=labels,
642
            notes=notes,
643
            sources=sources
644
        )
645