Passed
Pull Request — master (#614)
by Osma
02:50
created

annif.registry.get_vocab()   A

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 2
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
"""Registry that keeps track of Annif projects"""
2
3
import collections
4
import re
5
from flask import current_app
6
import annif
7
from annif.config import parse_config
8
from annif.exception import ConfigurationException
9
from annif.project import Access, AnnifProject
10
from annif.vocab import AnnifVocabulary
11
from annif.util import parse_args
12
13
logger = annif.logger
14
15
16 View Code Duplication
class AnnifRegistry:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
17
    """Class that keeps track of the Annif projects and vocabularies"""
18
19
    # Note: The individual projects and vocabularies are stored in shared
20
    # static variables, keyed by the "registry ID" which is unique to the
21
    # registry instance. This is done to make it possible to serialize
22
    # AnnifRegistry instances without including the potentially huge objects
23
    # (which contain backends with large models, vocabularies with lots of
24
    # concepts etc). Serialized AnnifRegistry instances can then be passed
25
    # between processes when using the multiprocessing module.
26
    _projects = {}
27
    _vocabs = {}
28
29
    def __init__(self, projects_config_path, datadir, init_projects):
30
        self._rid = id(self)
31
        self._datadir = datadir
32
        self._projects[self._rid] = \
33
            self._create_projects(projects_config_path)
34
        self._vocabs[self._rid] = {}
35
        if init_projects:
36
            for project in self._projects[self._rid].values():
37
                project.initialize()
38
39
    def _create_projects(self, projects_config_path):
40
        # parse the configuration
41
        config = parse_config(projects_config_path)
42
43
        # handle the case where the config file doesn't exist
44
        if config is None:
45
            return {}
46
47
        # create AnnifProject objects from the configuration file
48
        projects = collections.OrderedDict()
49
        for project_id in config.project_ids:
50
            projects[project_id] = AnnifProject(project_id,
51
                                                config[project_id],
52
                                                self._datadir,
53
                                                self)
54
        return projects
55
56
    def get_projects(self, min_access=Access.private):
57
        """Return the available projects as a dict of project_id ->
58
        AnnifProject. The min_access parameter may be used to set the minimum
59
        access level required for the returned projects."""
60
61
        return {project_id: project
62
                for project_id, project in self._projects[self._rid].items()
63
                if project.access >= min_access}
64
65
    def get_project(self, project_id, min_access=Access.private):
66
        """return the definition of a single Project by project_id"""
67
68
        projects = self.get_projects(min_access)
69
        try:
70
            return projects[project_id]
71
        except KeyError:
72
            raise ValueError("No such project {}".format(project_id))
73
74
    def get_vocab(self, vocab_spec, default_language):
75
        """Return an (AnnifVocabulary, language) pair corresponding to the
76
        vocab_spec. If no language information is specified, use the given
77
        default language."""
78
79
        match = re.match(r'(\w+)(\((.*)\))?', vocab_spec)
80
        if match is None:
81
            raise ValueError(
82
                f"Invalid vocabulary specification: {vocab_spec}")
83
        vocab_id = match.group(1)
84
        posargs, kwargs = parse_args(match.group(3))
85
        language = posargs[0] if posargs else default_language
86
        vocab_key = (vocab_id, language)
87
88
        if vocab_key not in self._vocabs[self._rid]:
89
            self._vocabs[self._rid][vocab_key] = AnnifVocabulary(
90
                vocab_id, self._datadir)
91
        return self._vocabs[self._rid][vocab_key], language
92
93
94
def initialize_projects(app):
95
    projects_config_path = app.config['PROJECTS_CONFIG_PATH']
96
    datadir = app.config['DATADIR']
97
    init_projects = app.config['INITIALIZE_PROJECTS']
98
    app.annif_registry = AnnifRegistry(projects_config_path, datadir,
99
                                       init_projects)
100
101
102
def get_projects(min_access=Access.private):
103
    """Return the available projects as a dict of project_id ->
104
    AnnifProject. The min_access parameter may be used to set the minimum
105
    access level required for the returned projects."""
106
    if not hasattr(current_app, 'annif_registry'):
107
        initialize_projects(current_app)
108
109
    return current_app.annif_registry.get_projects(min_access)
110
111
112
def get_project(project_id, min_access=Access.private):
113
    """return the definition of a single Project by project_id"""
114
115
    projects = get_projects(min_access)
116
    try:
117
        return projects[project_id]
118
    except KeyError:
119
        raise ValueError(f"No such project '{project_id}'")
120
121
122 View Code Duplication
def get_vocabs(min_access=Access.private):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
123
    """Return the available vocabularies as a dict of vocab_id ->
124
    AnnifVocabulary. The min_access parameter may be used to set the minimum
125
    access level required for the returned vocabularies."""
126
127
    vocabs = {}
128
    for proj in get_projects(min_access).values():
129
        try:
130
            vocabs[proj.vocab.vocab_id] = proj.vocab
131
        except ConfigurationException:
132
            pass
133
134
    return vocabs
135
136
137
def get_vocab(vocab_id, min_access=Access.private):
138
    """return a single AnnifVocabulary by vocabulary id"""
139
140
    vocabs = get_vocabs(min_access)
141
    try:
142
        return vocabs[vocab_id]
143
    except KeyError:
144
        raise ValueError(f"No such vocabulary '{vocab_id}'")
145