Passed
Push — testing-on-windows-and-macos ( 782857...ea99ad )
by Juho
04:06
created

annif.backend.backend.AnnifBackend.info()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 2
1
"""Common functionality for backends."""
2
3
from __future__ import annotations
4
5
import abc
6
import os.path
7
from datetime import datetime, timezone
8
from glob import glob
9
from typing import TYPE_CHECKING, Any
10
11
from annif import logger
12
from annif.suggestion import SuggestionBatch
13
14
if TYPE_CHECKING:
15
    from configparser import SectionProxy
16
17
    from annif.corpus.document import DocumentCorpus
18
    from annif.project import AnnifProject
19
20
21
class AnnifBackend(metaclass=abc.ABCMeta):
22
    """Base class for Annif backends that perform analysis. The
23
    non-implemented methods should be overridden in subclasses."""
24
25
    name = None
26
27
    DEFAULT_PARAMETERS = {"limit": 100}
28
29
    def __init__(
30
        self,
31
        backend_id: str,
32
        config_params: dict[str, Any] | SectionProxy,
33
        project: AnnifProject,
34
    ) -> None:
35
        """Initialize backend with specific parameters. The
36
        parameters are a dict. Keys and values depend on the specific
37
        backend type."""
38
        self.backend_id = backend_id
39
        self.config_params = config_params
40
        self.project = project
41
        self.datadir = project.datadir
42
43
    def default_params(self) -> dict[str, Any]:
44
        params = AnnifBackend.DEFAULT_PARAMETERS.copy()
45
        params.update(self.DEFAULT_PARAMETERS)  # Optional backend specific parameters
46
        return params
47
48
    @property
49
    def params(self) -> dict[str, Any]:
50
        params = {}
51
        params.update(self.default_params())
52
        params.update(self.config_params)
53
        return params
54
55
    @property
56
    def _model_file_paths(self) -> list:
57
        all_paths = glob(os.path.join(self.datadir, "**"), recursive=True)
58
        file_paths = [p for p in all_paths if os.path.isfile(p)]
59
        ignore_patterns = ("*-train*", "tmp-*", "vectorizer")
60
        ignore_paths = [
61
            path
62
            for igp in ignore_patterns
63
            for path in glob(os.path.join(self.datadir, igp))
64
        ]
65
        return list(set(file_paths) - set(ignore_paths))
66
67
    @property
68
    def is_trained(self) -> bool:
69
        return bool(self._model_file_paths)
70
71
    @property
72
    def modification_time(self) -> datetime | None:
73
        mtimes = [
74
            datetime.utcfromtimestamp(os.path.getmtime(p))
75
            for p in self._model_file_paths
76
        ]
77
        most_recent = max(mtimes, default=None)
78
        if most_recent is None:
79
            return None
80
        return most_recent.replace(tzinfo=timezone.utc)
81
82
    def _get_backend_params(
83
        self,
84
        params: dict[str, Any] | None,
85
    ) -> dict[str, Any]:
86
        backend_params = dict(self.params)
87
        if params is not None:
88
            backend_params.update(params)
89
        return backend_params
90
91
    def _train(
92
        self,
93
        corpus: DocumentCorpus,
94
        params: dict[str, Any],
95
        jobs: int = 0,
96
    ) -> None:
97
        """This method can be overridden by backends. It implements
98
        the train functionality, with pre-processed parameters."""
99
        pass  # default is to do nothing, subclasses may override
100
101
    def train(
102
        self,
103
        corpus: DocumentCorpus,
104
        params: dict[str, Any] | None = None,
105
        jobs: int = 0,
106
    ) -> None:
107
        """Train the model on the given document or subject corpus."""
108
        beparams = self._get_backend_params(params)
109
        return self._train(corpus, params=beparams, jobs=jobs)
110
111
    def initialize(self, parallel: bool = False) -> None:
112
        """This method can be overridden by backends. It should cause the
113
        backend to pre-load all data it needs during operation.
114
        If parallel is True, the backend should expect to be used for
115
        parallel operation."""
116
        pass
117
118
    def _suggest(self, text, params):
119
        """Either this method or _suggest_batch should be implemented by by
120
        backends.  It implements the suggest functionality for a single
121
        document, with pre-processed parameters."""
122
        pass  # pragma: no cover
123
124
    def _suggest_batch(
125
        self, texts: list[str], params: dict[str, Any]
126
    ) -> SuggestionBatch:
127
        """This method can be implemented by backends to use batching of documents in
128
        their operations. This default implementation uses the regular suggest
129
        functionality."""
130
        return SuggestionBatch.from_sequence(
131
            [self._suggest(text, params) for text in texts],
132
            self.project.subjects,
133
            limit=int(params.get("limit")),
134
        )
135
136
    def suggest(
137
        self,
138
        texts: list[str],
139
        params: dict[str, Any] | None = None,
140
    ) -> SuggestionBatch:
141
        """Suggest subjects for the input documents and return a list of subject sets
142
        represented as a list of SubjectSuggestion objects."""
143
        beparams = self._get_backend_params(params)
144
        self.initialize()
145
        return self._suggest_batch(texts, params=beparams)
146
147
    def debug(self, message: str) -> None:
148
        """Log a debug message from this backend"""
149
        logger.debug("Backend {}: {}".format(self.backend_id, message))
150
151
    def info(self, message: str) -> None:
152
        """Log an info message from this backend"""
153
        logger.info("Backend {}: {}".format(self.backend_id, message))
154
155
    def warning(self, message: str) -> None:
156
        """Log a warning message from this backend"""
157
        logger.warning("Backend {}: {}".format(self.backend_id, message))
158
159
160
class AnnifLearningBackend(AnnifBackend):
161
    """Base class for Annif backends that can perform online learning"""
162
163
    @abc.abstractmethod
164
    def _learn(self, corpus, params):
165
        """This method should implemented by backends. It implements the learn
166
        functionality, with pre-processed parameters."""
167
        pass  # pragma: no cover
168
169
    def learn(
170
        self,
171
        corpus: DocumentCorpus,
172
        params: dict[str, Any] | None = None,
173
    ) -> None:
174
        """Further train the model on the given document or subject corpus."""
175
        beparams = self._get_backend_params(params)
176
        return self._learn(corpus, params=beparams)
177