Passed
Pull Request — master (#638)
by Osma
02:48
created

annif.registry.AnnifRegistry._init_vars()   A

Complexity

Conditions 2

Size

Total Lines 6
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nop 1
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""Registry that keeps track of Annif projects"""
2
3
import collections
4
import re
5
from flask import current_app
6
import annif
7
from annif.config import parse_config
8
from annif.exception import ConfigurationException
9
from annif.project import Access, AnnifProject
10
from annif.vocab import AnnifVocabulary
11
from annif.util import parse_args
12
13
logger = annif.logger
14
15
16
class AnnifRegistry:
17
    """Class that keeps track of the Annif projects and vocabularies"""
18
19
    # Note: The individual projects and vocabularies are stored in shared
20
    # static variables, keyed by the "registry ID" which is unique to the
21
    # registry instance. This is done to make it possible to serialize
22
    # AnnifRegistry instances without including the potentially huge objects
23
    # (which contain backends with large models, vocabularies with lots of
24
    # concepts etc). Serialized AnnifRegistry instances can then be passed
25
    # between processes when using the multiprocessing module.
26
    _projects = {}
27
    _vocabs = {}
28
29
    def __init__(self, projects_config_path, datadir, init_projects):
30
        self._rid = id(self)
31
        self._projects_config_path = projects_config_path
32
        self._datadir = datadir
33
        self._init_vars()
34
        if init_projects:
35
            for project in self._projects[self._rid].values():
36
                project.initialize()
37
38
    def _init_vars(self):
39
        # initialize the static variables, if necessary
40
        if self._rid not in self._projects:
41
            self._projects[self._rid] = \
42
                self._create_projects()
43
            self._vocabs[self._rid] = {}
44
45
    def _create_projects(self):
46
        # parse the configuration
47
        config = parse_config(self._projects_config_path)
48
49
        # handle the case where the config file doesn't exist
50
        if config is None:
51
            return {}
52
53
        # create AnnifProject objects from the configuration file
54
        projects = collections.OrderedDict()
55
        for project_id in config.project_ids:
56
            projects[project_id] = AnnifProject(project_id,
57
                                                config[project_id],
58
                                                self._datadir,
59
                                                self)
60
        return projects
61
62
    def get_projects(self, min_access=Access.private):
63
        """Return the available projects as a dict of project_id ->
64
        AnnifProject. The min_access parameter may be used to set the minimum
65
        access level required for the returned projects."""
66
67
        self._init_vars()
68
        return {project_id: project
69
                for project_id, project in self._projects[self._rid].items()
70
                if project.access >= min_access}
71
72
    def get_project(self, project_id, min_access=Access.private):
73
        """return the definition of a single Project by project_id"""
74
75
        projects = self.get_projects(min_access)
76
        try:
77
            return projects[project_id]
78
        except KeyError:
79
            raise ValueError("No such project {}".format(project_id))
80
81
    def get_vocab(self, vocab_spec, default_language):
82
        """Return an (AnnifVocabulary, language) pair corresponding to the
83
        vocab_spec. If no language information is specified, use the given
84
        default language."""
85
86
        match = re.match(r'([\w-]+)(\((.*)\))?$', vocab_spec)
87
        if match is None:
88
            raise ValueError(
89
                f"Invalid vocabulary specification: {vocab_spec}")
90
        vocab_id = match.group(1)
91
        posargs, kwargs = parse_args(match.group(3))
92
        language = posargs[0] if posargs else default_language
93
        vocab_key = (vocab_id, language)
94
95
        self._init_vars()
96
        if vocab_key not in self._vocabs[self._rid]:
97
            self._vocabs[self._rid][vocab_key] = AnnifVocabulary(
98
                vocab_id, self._datadir)
99
        return self._vocabs[self._rid][vocab_key], language
100
101
102
def initialize_projects(app):
103
    projects_config_path = app.config['PROJECTS_CONFIG_PATH']
104
    datadir = app.config['DATADIR']
105
    init_projects = app.config['INITIALIZE_PROJECTS']
106
    app.annif_registry = AnnifRegistry(projects_config_path, datadir,
107
                                       init_projects)
108
109
110
def get_projects(min_access=Access.private):
111
    """Return the available projects as a dict of project_id ->
112
    AnnifProject. The min_access parameter may be used to set the minimum
113
    access level required for the returned projects."""
114
    if not hasattr(current_app, 'annif_registry'):
115
        initialize_projects(current_app)
116
117
    return current_app.annif_registry.get_projects(min_access)
118
119
120
def get_project(project_id, min_access=Access.private):
121
    """return the definition of a single Project by project_id"""
122
123
    projects = get_projects(min_access)
124
    try:
125
        return projects[project_id]
126
    except KeyError:
127
        raise ValueError(f"No such project '{project_id}'")
128
129
130
def get_vocabs(min_access=Access.private):
131
    """Return the available vocabularies as a dict of vocab_id ->
132
    AnnifVocabulary. The min_access parameter may be used to set the minimum
133
    access level required for the returned vocabularies."""
134
135
    vocabs = {}
136
    for proj in get_projects(min_access).values():
137
        try:
138
            vocabs[proj.vocab.vocab_id] = proj.vocab
139
        except ConfigurationException:
140
            pass
141
142
    return vocabs
143
144
145
def get_vocab(vocab_id, min_access=Access.private):
146
    """return a single AnnifVocabulary by vocabulary id"""
147
148
    vocabs = get_vocabs(min_access)
149
    try:
150
        return vocabs[vocab_id]
151
    except KeyError:
152
        raise ValueError(f"No such vocabulary '{vocab_id}'")
153