Completed
Push — master ( 131b99...8a6527 )
by Osma
28s queued 10s
created

annif.vocab   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 44
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 6
eloc 28
dl 0
loc 44
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A AnnifVocabulary.__init__() 0 3 1
A AnnifVocabulary._create_subject_index() 0 3 1
A AnnifVocabulary.subjects() 0 11 3
A AnnifVocabulary.load_vocabulary() 0 4 1
1
"""Vocabulary management functionality for Annif"""
2
3
import os.path
4
import annif
5
import annif.corpus
6
import annif.util
7
from annif.datadir import DatadirMixin
8
from annif.exception import NotInitializedException
9
10
logger = annif.logger
11
12
13
class AnnifVocabulary(DatadirMixin):
14
    """Class representing a subject vocabulary which can be used by multiple
15
    Annif projects."""
16
17
    # defaults for uninitialized instances
18
    _subjects = None
19
20
    def __init__(self, vocab_id, datadir):
21
        DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id)
22
        self.vocab_id = vocab_id
23
24
    def _create_subject_index(self, subject_corpus):
25
        self._subjects = annif.corpus.SubjectIndex(subject_corpus)
26
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
27
28
    @property
29
    def subjects(self):
30
        if self._subjects is None:
31
            path = os.path.join(self.datadir, 'subjects')
32
            if os.path.exists(path):
33
                logger.debug('loading subjects from %s', path)
34
                self._subjects = annif.corpus.SubjectIndex.load(path)
35
            else:
36
                raise NotInitializedException(
37
                    "subject file {} not found".format(path))
38
        return self._subjects
39
40
    def load_vocabulary(self, subject_corpus):
41
        """load subjects from a subject corpus"""
42
43
        self._create_subject_index(subject_corpus)
44