| Total Complexity | 6 |
| Total Lines | 44 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Vocabulary management functionality for Annif""" |
||
| 2 | |||
| 3 | import os.path |
||
| 4 | import annif |
||
| 5 | import annif.corpus |
||
| 6 | import annif.util |
||
| 7 | from annif.datadir import DatadirMixin |
||
| 8 | from annif.exception import NotInitializedException |
||
| 9 | |||
| 10 | logger = annif.logger |
||
| 11 | |||
| 12 | |||
| 13 | class AnnifVocabulary(DatadirMixin): |
||
| 14 | """Class representing a subject vocabulary which can be used by multiple |
||
| 15 | Annif projects.""" |
||
| 16 | |||
| 17 | # defaults for uninitialized instances |
||
| 18 | _subjects = None |
||
| 19 | |||
| 20 | def __init__(self, vocab_id, datadir): |
||
| 21 | DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id) |
||
| 22 | self.vocab_id = vocab_id |
||
| 23 | |||
| 24 | def _create_subject_index(self, subject_corpus): |
||
| 25 | self._subjects = annif.corpus.SubjectIndex(subject_corpus) |
||
| 26 | annif.util.atomic_save(self._subjects, self.datadir, 'subjects') |
||
| 27 | |||
| 28 | @property |
||
| 29 | def subjects(self): |
||
| 30 | if self._subjects is None: |
||
| 31 | path = os.path.join(self.datadir, 'subjects') |
||
| 32 | if os.path.exists(path): |
||
| 33 | logger.debug('loading subjects from %s', path) |
||
| 34 | self._subjects = annif.corpus.SubjectIndex.load(path) |
||
| 35 | else: |
||
| 36 | raise NotInitializedException( |
||
| 37 | "subject file {} not found".format(path)) |
||
| 38 | return self._subjects |
||
| 39 | |||
| 40 | def load_vocabulary(self, subject_corpus): |
||
| 41 | """load subjects from a subject corpus""" |
||
| 42 | |||
| 43 | self._create_subject_index(subject_corpus) |
||
| 44 |