Completed
Pull Request — master (#344)
by Osma
06:44
created

annif.vocab   A

Complexity

Total Complexity 7

Size/Duplication

Total Lines 51
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 7
eloc 32
dl 0
loc 51
rs 10
c 0
b 0
f 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A AnnifVocabulary.__init__() 0 3 1
A AnnifVocabulary._create_subject_index() 0 3 1
A AnnifVocabulary.subjects() 0 11 3
A AnnifVocabulary.as_skos() 0 3 1
A AnnifVocabulary.load_vocabulary() 0 7 1
1
"""Vocabulary management functionality for Annif"""
2
3
import os.path
4
import annif
5
import annif.corpus
6
import annif.util
7
from annif.datadir import DatadirMixin
8
from annif.exception import NotInitializedException
9
10
logger = annif.logger
11
12
13
class AnnifVocabulary(DatadirMixin):
14
    """Class representing a subject vocabulary which can be used by multiple
15
    Annif projects."""
16
17
    # defaults for uninitialized instances
18
    _subjects = None
19
20
    def __init__(self, vocab_id, datadir):
21
        DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id)
22
        self.vocab_id = vocab_id
23
24
    def _create_subject_index(self, subject_corpus):
25
        self._subjects = annif.corpus.SubjectIndex(subject_corpus)
26
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
27
28
    @property
29
    def subjects(self):
30
        if self._subjects is None:
31
            path = os.path.join(self.datadir, 'subjects')
32
            if os.path.exists(path):
33
                logger.debug('loading subjects from %s', path)
34
                self._subjects = annif.corpus.SubjectIndex.load(path)
35
            else:
36
                raise NotInitializedException(
37
                    "subject file {} not found".format(path))
38
        return self._subjects
39
40
    def load_vocabulary(self, subject_corpus, language):
41
        """load subjects from a subject corpus and save them into a
42
        SKOS/Turtle file for later use"""
43
44
        self._create_subject_index(subject_corpus)
45
        subject_corpus.save_skos(os.path.join(self.datadir, 'subjects.ttl'),
46
                                 language)
47
48
    def as_skos(self):
49
        """return the vocabulary as a file object, in SKOS/Turtle syntax"""
50
        return open(os.path.join(self.datadir, 'subjects.ttl'), 'rb')
51