annif.vocab - Code Metrics - Inspection of "Lexical STWFSAPY Backend" - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#438)

unknown

created 2021-01-25 16:57 UTC

annif.vocab A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	82
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	14
eloc	57
dl	0
loc	82
rs	10
c	0
b	0
f	0

7 Methods

Rating	Name	Size	Complexity
A	AnnifVocabulary.as_skos()	3	1
A	AnnifVocabulary.subjects()	11	3
A	AnnifVocabulary.load_vocabulary()	11	2
A	AnnifVocabulary.as_graph()	8	1
A	AnnifVocabulary.__init__()	3	1
A	AnnifVocabulary._create_subject_index()	3	1
A	AnnifVocabulary._update_subject_index()	16	5

"""Vocabulary management functionality for Annif"""

import os.path
import rdflib.graph
import annif
import annif.corpus
import annif.util
from annif.datadir import DatadirMixin
from annif.exception import NotInitializedException

logger = annif.logger


class AnnifVocabulary(DatadirMixin):
    """Class representing a subject vocabulary which can be used by multiple
    Annif projects."""

    # defaults for uninitialized instances
    _subjects = None

    def __init__(self, vocab_id, datadir):
        DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id)
        self.vocab_id = vocab_id

    def _create_subject_index(self, subject_corpus):
        self._subjects = annif.corpus.SubjectIndex(subject_corpus)
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')

    def _update_subject_index(self, subject_corpus):
        old_subjects = self.subjects
        new_subjects = annif.corpus.SubjectIndex(subject_corpus)
        updated_subjects = annif.corpus.SubjectIndex()

        for uri, label, notation in old_subjects:
            if new_subjects.contains_uri(uri):
                label, notation = new_subjects[new_subjects.by_uri(uri)][1:3]
            else:  # subject removed from new corpus
                label, notation = None, None
            updated_subjects.append(uri, label, notation)
        for uri, label, notation in new_subjects:
            if not old_subjects.contains_uri(uri):
                updated_subjects.append(uri, label, notation)
        self._subjects = updated_subjects
        annif.util.atomic_save(self._subjects, self.datadir, 'subjects')

    @property
    def subjects(self):
        if self._subjects is None:
            path = os.path.join(self.datadir, 'subjects')
            if os.path.exists(path):
                logger.debug('loading subjects from %s', path)
                self._subjects = annif.corpus.SubjectIndex.load(path)
            else:
                raise NotInitializedException(
                    "subject file {} not found".format(path))
        return self._subjects

    def load_vocabulary(self, subject_corpus, language):
        """load subjects from a subject corpus and save them into a
        SKOS/Turtle file for later use"""

        if os.path.exists(os.path.join(self.datadir, 'subjects')):
            logger.info('updating existing vocabulary')
            self._update_subject_index(subject_corpus)
        else:
            self._create_subject_index(subject_corpus)
        subject_corpus.save_skos(os.path.join(self.datadir, 'subjects.ttl'),
                                 language)

    def as_skos(self):
        """return the vocabulary as a file object, in SKOS/Turtle syntax"""
        return open(os.path.join(self.datadir, 'subjects.ttl'), 'rb')

    def as_graph(self):
        """return the vocabulary as an rdflib graph"""
        g = rdflib.graph.Graph()
        g.load(
            os.path.join(self.datadir, 'subjects.ttl'),
            format='ttl'
        )
        return g


1			"""Vocabulary management functionality for Annif"""
2
3			import os.path
4			import rdflib.graph
5			import annif
6			import annif.corpus
7			import annif.util
8			from annif.datadir import DatadirMixin
9			from annif.exception import NotInitializedException
10
11			logger = annif.logger
12
13
14			class AnnifVocabulary(DatadirMixin):
15			"""Class representing a subject vocabulary which can be used by multiple
16			Annif projects."""
17
18			# defaults for uninitialized instances
19			_subjects = None
20
21			def __init__(self, vocab_id, datadir):
22			DatadirMixin.__init__(self, datadir, 'vocabs', vocab_id)
23			self.vocab_id = vocab_id
24
25			def _create_subject_index(self, subject_corpus):
26			self._subjects = annif.corpus.SubjectIndex(subject_corpus)
27			annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
28
29			def _update_subject_index(self, subject_corpus):
30			old_subjects = self.subjects
31			new_subjects = annif.corpus.SubjectIndex(subject_corpus)
32			updated_subjects = annif.corpus.SubjectIndex()
33
34			for uri, label, notation in old_subjects:
35			if new_subjects.contains_uri(uri):
36			label, notation = new_subjects[new_subjects.by_uri(uri)][1:3]
37			else: # subject removed from new corpus
38			label, notation = None, None
39			updated_subjects.append(uri, label, notation)
40			for uri, label, notation in new_subjects:
41			if not old_subjects.contains_uri(uri):
42			updated_subjects.append(uri, label, notation)
43			self._subjects = updated_subjects
44			annif.util.atomic_save(self._subjects, self.datadir, 'subjects')
45
46			@property
47			def subjects(self):
48			if self._subjects is None:
49			path = os.path.join(self.datadir, 'subjects')
50			if os.path.exists(path):
51			logger.debug('loading subjects from %s', path)
52			self._subjects = annif.corpus.SubjectIndex.load(path)
53			else:
54			raise NotInitializedException(
55			"subject file {} not found".format(path))
56			return self._subjects
57
58			def load_vocabulary(self, subject_corpus, language):
59			"""load subjects from a subject corpus and save them into a
60			SKOS/Turtle file for later use"""
61
62			if os.path.exists(os.path.join(self.datadir, 'subjects')):
63			logger.info('updating existing vocabulary')
64			self._update_subject_index(subject_corpus)
65			else:
66			self._create_subject_index(subject_corpus)
67			subject_corpus.save_skos(os.path.join(self.datadir, 'subjects.ttl'),
68			language)
69
70			def as_skos(self):
71			"""return the vocabulary as a file object, in SKOS/Turtle syntax"""
72			return open(os.path.join(self.datadir, 'subjects.ttl'), 'rb')
73
74			def as_graph(self):
75			"""return the vocabulary as an rdflib graph"""
76			g = rdflib.graph.Graph()
77			g.load(
78			os.path.join(self.datadir, 'subjects.ttl'),
79			format='ttl'
80			)
81			return g
82

NatLibFi / Annif

Pull Request — master (#438)

annif.vocab A

Complexity

Size/Duplication

Importance

7 Methods

Duplication Side-by-Side

Filter issues like