annif.backend.backend.AnnifBackend.is_trained()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""Common functionality for backends."""
2
3
from __future__ import annotations
4
5
import abc
6
import os.path
7
from datetime import datetime, timezone
8
from glob import glob
9
from typing import TYPE_CHECKING, Any
10
11
from annif import logger
12
from annif.suggestion import SuggestionBatch
13
14
if TYPE_CHECKING:
15
    from configparser import SectionProxy
16
17
    from annif.corpus.document import Document, DocumentCorpus
18
    from annif.project import AnnifProject
19
20
21
class AnnifBackend(metaclass=abc.ABCMeta):
22
    """Base class for Annif backends that perform analysis. The
23
    non-implemented methods should be overridden in subclasses."""
24
25
    name = None
26
27
    DEFAULT_PARAMETERS = {"limit": 100}
28
29
    def __init__(
30
        self,
31
        backend_id: str,
32
        config_params: dict[str, Any] | SectionProxy,
33
        project: AnnifProject,
34
    ) -> None:
35
        """Initialize backend with specific parameters. The
36
        parameters are a dict. Keys and values depend on the specific
37
        backend type."""
38
        self.backend_id = backend_id
39
        self.config_params = config_params
40
        self.project = project
41
        self.datadir = project.datadir
42
43
    def default_params(self) -> dict[str, Any]:
44
        params = AnnifBackend.DEFAULT_PARAMETERS.copy()
45
        params.update(self.DEFAULT_PARAMETERS)  # Optional backend specific parameters
46
        return params
47
48
    @property
49
    def params(self) -> dict[str, Any]:
50
        params = {}
51
        params.update(self.default_params())
52
        params.update(self.config_params)
53
        return params
54
55
    @property
56
    def _model_file_paths(self) -> list:
57
        all_paths = glob(os.path.join(self.datadir, "**"), recursive=True)
58
        file_paths = [p for p in all_paths if os.path.isfile(p)]
59
        ignore_patterns = ("*-train*", "tmp-*", "vectorizer")
60
        ignore_paths = [
61
            path
62
            for igp in ignore_patterns
63
            for path in glob(os.path.join(self.datadir, igp))
64
        ]
65
        return list(set(file_paths) - set(ignore_paths))
66
67
    @property
68
    def is_trained(self) -> bool:
69
        return bool(self._model_file_paths)
70
71
    @property
72
    def modification_time(self) -> datetime | None:
73
        mtimes = [
74
            datetime.fromtimestamp(os.path.getmtime(p), tz=timezone.utc)
75
            for p in self._model_file_paths
76
        ]
77
        most_recent = max(mtimes, default=None)
78
        return most_recent
79
80
    def _get_backend_params(
81
        self,
82
        params: dict[str, Any] | None,
83
    ) -> dict[str, Any]:
84
        backend_params = dict(self.params)
85
        if params is not None:
86
            backend_params.update(params)
87
        return backend_params
88
89
    def _train(
90
        self,
91
        corpus: DocumentCorpus,
92
        params: dict[str, Any],
93
        jobs: int = 0,
94
    ) -> None:
95
        """This method can be overridden by backends. It implements
96
        the train functionality, with pre-processed parameters."""
97
        pass  # default is to do nothing, subclasses may override
98
99
    def train(
100
        self,
101
        corpus: DocumentCorpus,
102
        params: dict[str, Any] | None = None,
103
        jobs: int = 0,
104
    ) -> None:
105
        """Train the model on the given document or subject corpus."""
106
        beparams = self._get_backend_params(params)
107
        return self._train(corpus, params=beparams, jobs=jobs)
108
109
    def initialize(self, parallel: bool = False) -> None:
110
        """This method can be overridden by backends. It should cause the
111
        backend to pre-load all data it needs during operation.
112
        If parallel is True, the backend should expect to be used for
113
        parallel operation."""
114
        pass
115
116
    def _suggest(self, doc: Document, params: dict[str, Any]):
117
        """Either this method or _suggest_batch should be implemented by by
118
        backends.  It implements the suggest functionality for a single
119
        document, with pre-processed parameters."""
120
        pass  # pragma: no cover
121
122
    def _suggest_batch(
123
        self, documents: list[Document], params: dict[str, Any]
124
    ) -> SuggestionBatch:
125
        """This method can be implemented by backends to use batching of documents in
126
        their operations. This default implementation uses the regular suggest
127
        functionality."""
128
        return SuggestionBatch.from_sequence(
129
            [self._suggest(doc, params) for doc in documents],
130
            self.project.subjects,
131
            limit=int(params.get("limit")),
132
        )
133
134
    def suggest(
135
        self,
136
        documents: list[Document],
137
        params: dict[str, Any] | None = None,
138
    ) -> SuggestionBatch:
139
        """Suggest subjects for the input documents and return a list of subject sets
140
        represented as a list of SubjectSuggestion objects."""
141
        beparams = self._get_backend_params(params)
142
        self.initialize()
143
        return self._suggest_batch(documents, params=beparams)
144
145
    def debug(self, message: str) -> None:
146
        """Log a debug message from this backend"""
147
        logger.debug("Backend {}: {}".format(self.backend_id, message))
148
149
    def info(self, message: str) -> None:
150
        """Log an info message from this backend"""
151
        logger.info("Backend {}: {}".format(self.backend_id, message))
152
153
    def warning(self, message: str) -> None:
154
        """Log a warning message from this backend"""
155
        logger.warning("Backend {}: {}".format(self.backend_id, message))
156
157
158
class AnnifLearningBackend(AnnifBackend):
159
    """Base class for Annif backends that can perform online learning"""
160
161
    @abc.abstractmethod
162
    def _learn(self, corpus, params):
163
        """This method should implemented by backends. It implements the learn
164
        functionality, with pre-processed parameters."""
165
        pass  # pragma: no cover
166
167
    def learn(
168
        self,
169
        corpus: DocumentCorpus,
170
        params: dict[str, Any] | None = None,
171
    ) -> None:
172
        """Further train the model on the given document or subject corpus."""
173
        beparams = self._get_backend_params(params)
174
        return self._learn(corpus, params=beparams)
175