Passed
Pull Request — master (#461)
by
unknown
20:48
created

annif.vocab.AnnifVocabulary.skos_vocab()   A

Complexity

Conditions 3

Size

Total Lines 10
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nop 1
dl 0
loc 10
rs 9.95
c 0
b 0
f 0
1
"""Vocabulary management functionality for Annif"""
2
3
import os.path
4
import annif
5
import annif.corpus
6
import annif.util
7
from annif.datadir import DatadirMixin
8
from annif.exception import NotInitializedException
9
10
logger = annif.logger
11
12
13
class AnnifVocabulary(DatadirMixin):
14
    """Class representing a subject vocabulary which can be used by multiple
15
    Annif projects."""
16
17
    # defaults for uninitialized instances
18
    _subjects = None
19
20
    def __init__(self, vocab_id, datadir):
21
        DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id)
22
        self.vocab_id = vocab_id
23
        self._skos_vocab = None
24
25
    def _create_subject_index(self, subject_corpus):
26
        self._subjects = annif.corpus.SubjectIndex(subject_corpus)
27
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
28
29
    def _update_subject_index(self, subject_corpus):
30
        old_subjects = self.subjects
31
        new_subjects = annif.corpus.SubjectIndex(subject_corpus)
32
        updated_subjects = annif.corpus.SubjectIndex()
33
34
        for uri, label, notation in old_subjects:
35
            if new_subjects.contains_uri(uri):
36
                label, notation = new_subjects[new_subjects.by_uri(uri)][1:3]
37
            else:  # subject removed from new corpus
38
                label, notation = None, None
39
            updated_subjects.append(uri, label, notation)
40
        for uri, label, notation in new_subjects:
41
            if not old_subjects.contains_uri(uri):
42
                updated_subjects.append(uri, label, notation)
43
        self._subjects = updated_subjects
44
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
45
46
    @property
47
    def subjects(self):
48
        if self._subjects is None:
49
            path = os.path.join(self.datadir, 'subjects')
50
            if os.path.exists(path):
51
                logger.debug('loading subjects from %s', path)
52
                self._subjects = annif.corpus.SubjectIndex.load(path)
53
            else:
54
                raise NotInitializedException(
55
                    "subject file {} not found".format(path))
56
        return self._subjects
57
58
    @property
59
    def skos_vocab(self):
60
        if self._skos_vocab is None:
61
            path = os.path.join(self.datadir, 'subjects.ttl')
62
            if os.path.exists(path):
63
                logger.debug(f'loading graph from {path}')
64
                self._skos_vocab = annif.corpus.SubjectFileSKOS(path, None)
65
            else:
66
                raise NotInitializedException(f'graph file {path} not found')
67
        return self._skos_vocab
68
69
    @property
70
    def skos_concepts(self):
71
        return self.skos_vocab.skos_concepts
72
73
    def get_skos_concept_labels(self, concept, label_types, language):
74
        return self.skos_vocab.get_skos_concept_labels(concept, label_types,
75
                                                       language)
76
77
    def load_vocabulary(self, subject_corpus, language):
78
        """load subjects from a subject corpus and save them into a
79
        SKOS/Turtle file for later use"""
80
81
        if os.path.exists(os.path.join(self.datadir, 'subjects')):
82
            logger.info('updating existing vocabulary')
83
            self._update_subject_index(subject_corpus)
84
        else:
85
            self._create_subject_index(subject_corpus)
86
        subject_corpus.save_skos(os.path.join(self.datadir, 'subjects.ttl'),
87
                                 language)
88
89
    def as_skos(self):
90
        """return the vocabulary as a file object, in SKOS/Turtle syntax"""
91
        return open(os.path.join(self.datadir, 'subjects.ttl'), 'rb')
92
93
    def as_graph(self):
94
        """return the vocabulary as an rdflib graph"""
95
        return self.skos_vocab.graph
96