Passed
Push — default-disable-learn-function ( 19b582 )
by Juho
03:57
created

annif.backend.backend.AnnifBackend.params()   A

Complexity

Conditions 1

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""Common functionality for backends."""
2
3
from __future__ import annotations
4
5
import abc
6
import os.path
7
from datetime import datetime, timezone
8
from glob import glob
9
from typing import TYPE_CHECKING, Any
10
11
from annif import logger
12
from annif.exception import ConfigurationException
13
from annif.suggestion import SuggestionBatch
14
15
if TYPE_CHECKING:
16
    from configparser import SectionProxy
17
18
    from annif.corpus.document import DocumentCorpus
19
    from annif.project import AnnifProject
20
21
22
class AnnifBackend(metaclass=abc.ABCMeta):
23
    """Base class for Annif backends that perform analysis. The
24
    non-implemented methods should be overridden in subclasses."""
25
26
    name = None
27
28
    DEFAULT_PARAMETERS = {"limit": 100}
29
30
    def __init__(
31
        self,
32
        backend_id: str,
33
        config_params: dict[str, Any] | SectionProxy,
34
        project: AnnifProject,
35
    ) -> None:
36
        """Initialize backend with specific parameters. The
37
        parameters are a dict. Keys and values depend on the specific
38
        backend type."""
39
        self.backend_id = backend_id
40
        self.config_params = config_params
41
        self.project = project
42
        self.datadir = project.datadir
43
44
    def default_params(self) -> dict[str, Any]:
45
        params = AnnifBackend.DEFAULT_PARAMETERS.copy()
46
        params.update(self.DEFAULT_PARAMETERS)  # Optional backend specific parameters
47
        return params
48
49
    @property
50
    def params(self) -> dict[str, Any]:
51
        params = {}
52
        params.update(self.default_params())
53
        params.update(self.config_params)
54
        return params
55
56
    @property
57
    def _model_file_paths(self) -> list:
58
        all_paths = glob(os.path.join(self.datadir, "**"), recursive=True)
59
        file_paths = [p for p in all_paths if os.path.isfile(p)]
60
        ignore_patterns = ("*-train*", "tmp-*", "vectorizer")
61
        ignore_paths = [
62
            path
63
            for igp in ignore_patterns
64
            for path in glob(os.path.join(self.datadir, igp))
65
        ]
66
        return list(set(file_paths) - set(ignore_paths))
67
68
    @property
69
    def is_trained(self) -> bool:
70
        return bool(self._model_file_paths)
71
72
    @property
73
    def modification_time(self) -> datetime | None:
74
        mtimes = [
75
            datetime.utcfromtimestamp(os.path.getmtime(p))
76
            for p in self._model_file_paths
77
        ]
78
        most_recent = max(mtimes, default=None)
79
        if most_recent is None:
80
            return None
81
        return most_recent.replace(tzinfo=timezone.utc)
82
83
    def _get_backend_params(
84
        self,
85
        params: dict[str, Any] | None,
86
    ) -> dict[str, Any]:
87
        backend_params = dict(self.params)
88
        if params is not None:
89
            backend_params.update(params)
90
        return backend_params
91
92
    def _train(
93
        self,
94
        corpus: DocumentCorpus,
95
        params: dict[str, Any],
96
        jobs: int = 0,
97
    ) -> None:
98
        """This method can be overridden by backends. It implements
99
        the train functionality, with pre-processed parameters."""
100
        pass  # default is to do nothing, subclasses may override
101
102
    def train(
103
        self,
104
        corpus: DocumentCorpus,
105
        params: dict[str, Any] | None = None,
106
        jobs: int = 0,
107
    ) -> None:
108
        """Train the model on the given document or subject corpus."""
109
        beparams = self._get_backend_params(params)
110
        return self._train(corpus, params=beparams, jobs=jobs)
111
112
    def initialize(self, parallel: bool = False) -> None:
113
        """This method can be overridden by backends. It should cause the
114
        backend to pre-load all data it needs during operation.
115
        If parallel is True, the backend should expect to be used for
116
        parallel operation."""
117
        pass
118
119
    def _suggest(self, text, params):
120
        """Either this method or _suggest_batch should be implemented by by
121
        backends.  It implements the suggest functionality for a single
122
        document, with pre-processed parameters."""
123
        pass  # pragma: no cover
124
125
    def _suggest_batch(
126
        self, texts: list[str], params: dict[str, Any]
127
    ) -> SuggestionBatch:
128
        """This method can be implemented by backends to use batching of documents in
129
        their operations. This default implementation uses the regular suggest
130
        functionality."""
131
        return SuggestionBatch.from_sequence(
132
            [self._suggest(text, params) for text in texts],
133
            self.project.subjects,
134
            limit=int(params.get("limit")),
135
        )
136
137
    def suggest(
138
        self,
139
        texts: list[str],
140
        params: dict[str, Any] | None = None,
141
    ) -> SuggestionBatch:
142
        """Suggest subjects for the input documents and return a list of subject sets
143
        represented as a list of SubjectSuggestion objects."""
144
        beparams = self._get_backend_params(params)
145
        self.initialize()
146
        return self._suggest_batch(texts, params=beparams)
147
148
    def debug(self, message: str) -> None:
149
        """Log a debug message from this backend"""
150
        logger.debug("Backend {}: {}".format(self.backend_id, message))
151
152
    def info(self, message: str) -> None:
153
        """Log an info message from this backend"""
154
        logger.info("Backend {}: {}".format(self.backend_id, message))
155
156
    def warning(self, message: str) -> None:
157
        """Log a warning message from this backend"""
158
        logger.warning("Backend {}: {}".format(self.backend_id, message))
159
160
161
class AnnifLearningBackend(AnnifBackend):
162
    """Base class for Annif backends that can perform online learning"""
163
164
    DEFAULT_PARAMETERS = {
165
        "allow_learn": False,
166
    }
167
168
    def default_params(self) -> dict[str, Any]:
169
        params = AnnifBackend.DEFAULT_PARAMETERS.copy()
170
        params.update(AnnifLearningBackend.DEFAULT_PARAMETERS.copy())
171
        params.update(self.DEFAULT_PARAMETERS)  # Optional backend specific parameters
172
        return params
173
174
    @abc.abstractmethod
175
    def _learn(self, corpus, params):
176
        """This method should implemented by backends. It implements the learn
177
        functionality, with pre-processed parameters."""
178
        pass  # pragma: no cover
179
180
    def learn(
181
        self,
182
        corpus: DocumentCorpus,
183
        params: dict[str, Any] | None = None,
184
    ) -> None:
185
        """Further train the model on the given document or subject corpus."""
186
        beparams = self._get_backend_params(params)
187
        if beparams["allow_learn"]:
188
            return self._learn(corpus, params=beparams)
189
        else:
190
            raise ConfigurationException(
191
                "Learning not enabled for backend", backend_id=self.backend_id
192
            )
193