annif.registry.AnnifRegistry._create_projects()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
nop 1
dl 0
loc 15
rs 9.95
c 0
b 0
f 0
1
"""Registry that keeps track of Annif projects"""
2
3
from __future__ import annotations
4
5
import os
6
import re
7
8
from flask import Flask, current_app
9
10
import annif
11
from annif.config import parse_config
12
from annif.exception import ConfigurationException
13
from annif.project import Access, AnnifProject
14
from annif.vocab import AnnifVocabulary
15
16
logger = annif.logger
17
18
19
class AnnifRegistry:
20
    """Class that keeps track of the Annif projects and vocabularies"""
21
22
    # Note: The individual projects and vocabularies are stored in shared
23
    # static variables, keyed by the "registry ID" which is unique to the
24
    # registry instance. This is done to make it possible to serialize
25
    # AnnifRegistry instances without including the potentially huge objects
26
    # (which contain backends with large models, vocabularies with lots of
27
    # concepts etc). Serialized AnnifRegistry instances can then be passed
28
    # between processes when using the multiprocessing module.
29
    _projects = {}
30
    _vocabs = {}
31
32
    def __init__(
33
        self, projects_config_path: str, datadir: str, init_projects: bool
34
    ) -> None:
35
        self._rid = id(self)
36
        self._projects_config_path = projects_config_path
37
        self._datadir = datadir
38
        self._init_vars()
39
        projects_pattern = os.getenv("ANNIF_PROJECTS_INIT", ".*")
40
        if init_projects:
41
            for project in self._projects[self._rid].values():
42
                if re.search(projects_pattern, project.project_id) is not None:
43
                    project.initialize()
44
45
    def _init_vars(self) -> None:
46
        # initialize the static variables, if necessary
47
        if self._rid not in self._projects:
48
            self._projects[self._rid] = self._create_projects()
49
            self._vocabs[self._rid] = {}
50
51
    def _create_projects(self) -> dict:
52
        # parse the configuration
53
        config = parse_config(self._projects_config_path)
54
55
        # handle the case where the config file doesn't exist
56
        if config is None:
57
            return {}
58
59
        # create AnnifProject objects from the configuration file
60
        projects = dict()
61
        for project_id in config.project_ids:
62
            projects[project_id] = AnnifProject(
63
                project_id, config[project_id], self._datadir, self
64
            )
65
        return projects
66
67
    def get_projects(
68
        self, min_access: Access = Access.private
69
    ) -> dict[str, AnnifProject]:
70
        """Return the available projects as a dict of project_id ->
71
        AnnifProject. The min_access parameter may be used to set the minimum
72
        access level required for the returned projects."""
73
74
        self._init_vars()
75
        return {
76
            project_id: project
77
            for project_id, project in self._projects[self._rid].items()
78
            if project.access >= min_access
79
        }
80
81
    def get_project(
82
        self, project_id: str, min_access: Access = Access.private
83
    ) -> AnnifProject:
84
        """return the definition of a single Project by project_id"""
85
86
        projects = self.get_projects(min_access)
87
        try:
88
            return projects[project_id]
89
        except KeyError:
90
            raise ValueError("No such project {}".format(project_id))
91
92
    def get_vocab(self, vocab_id: str) -> AnnifVocabulary:
93
        """Return an AnnifVocabulary by vocab_id"""
94
95
        if not vocab_id:
96
            raise ValueError(f"Invalid vocabulary ID: '{vocab_id}'")
97
98
        self._init_vars()
99
        if vocab_id not in self._vocabs[self._rid]:
100
            self._vocabs[self._rid][vocab_id] = AnnifVocabulary(vocab_id, self._datadir)
101
        return self._vocabs[self._rid][vocab_id]
102
103
104
def initialize_projects(app: Flask) -> None:
105
    projects_config_path = app.config["PROJECTS_CONFIG_PATH"]
106
    datadir = app.config["DATADIR"]
107
    init_projects = app.config["INITIALIZE_PROJECTS"]
108
    app.annif_registry = AnnifRegistry(projects_config_path, datadir, init_projects)
109
110
111
def get_projects(min_access: Access = Access.private) -> dict[str, AnnifProject]:
112
    """Return the available projects as a dict of project_id ->
113
    AnnifProject. The min_access parameter may be used to set the minimum
114
    access level required for the returned projects."""
115
    if not hasattr(current_app, "annif_registry"):
116
        initialize_projects(current_app)
117
118
    return current_app.annif_registry.get_projects(min_access)
119
120
121
def get_project(project_id: str, min_access: Access = Access.private) -> AnnifProject:
122
    """return the definition of a single Project by project_id"""
123
124
    projects = get_projects(min_access)
125
    try:
126
        return projects[project_id]
127
    except KeyError:
128
        raise ValueError(f"No such project '{project_id}'")
129
130
131
def get_vocabs(min_access: Access = Access.private) -> dict[str, AnnifVocabulary]:
132
    """Return the available vocabularies as a dict of vocab_id ->
133
    AnnifVocabulary. The min_access parameter may be used to set the minimum
134
    access level required for the returned vocabularies."""
135
136
    vocabs = {}
137
    for proj in get_projects(min_access).values():
138
        try:
139
            vocabs[proj.vocab.vocab_id] = proj.vocab
140
        except ConfigurationException:
141
            pass
142
143
    return vocabs
144
145
146
def get_vocab(vocab_id: str, min_access: Access = Access.private) -> AnnifVocabulary:
147
    """return a single AnnifVocabulary by vocabulary id"""
148
149
    vocabs = get_vocabs(min_access)
150
    try:
151
        return vocabs[vocab_id]
152
    except KeyError:
153
        raise ValueError(f"No such vocabulary '{vocab_id}'")
154