Passed
Pull Request — master (#663)
by Juho
03:15
created

annif.backend.backend   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 131
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 74
dl 0
loc 131
rs 10
c 0
b 0
f 0
wmc 20

18 Methods

Rating   Name   Duplication   Size   Complexity  
A AnnifBackend.__init__() 0 8 1
A AnnifBackend.default_params() 0 2 1
A AnnifBackend.is_trained() 0 3 1
A AnnifBackend.params() 0 6 1
A AnnifBackend._get_backend_params() 0 5 2
A AnnifBackend._suggest() 0 5 1
A AnnifBackend.train() 0 4 1
A AnnifBackend.initialize() 0 6 1
A AnnifBackend._train() 0 4 1
A AnnifBackend.modification_time() 0 10 2
A AnnifBackend.suggest() 0 6 1
A AnnifBackend.suggest_batch() 0 6 1
A AnnifBackend._suggest_batch() 0 7 1
A AnnifLearningBackend.learn() 0 4 1
A AnnifBackend.debug() 0 3 1
A AnnifBackend.info() 0 3 1
A AnnifBackend.warning() 0 3 1
A AnnifLearningBackend._learn() 0 5 1
1
"""Common functionality for backends."""
2
3
import abc
4
import os.path
5
from datetime import datetime, timezone
6
from glob import glob
7
8
from annif import logger
9
10
11
class AnnifBackend(metaclass=abc.ABCMeta):
12
    """Base class for Annif backends that perform analysis. The
13
    non-implemented methods should be overridden in subclasses."""
14
15
    name = None
16
17
    DEFAULT_PARAMETERS = {"limit": 100}
18
19
    def __init__(self, backend_id, config_params, project):
20
        """Initialize backend with specific parameters. The
21
        parameters are a dict. Keys and values depend on the specific
22
        backend type."""
23
        self.backend_id = backend_id
24
        self.config_params = config_params
25
        self.project = project
26
        self.datadir = project.datadir
27
28
    def default_params(self):
29
        return self.DEFAULT_PARAMETERS
30
31
    @property
32
    def params(self):
33
        params = {}
34
        params.update(self.default_params())
35
        params.update(self.config_params)
36
        return params
37
38
    @property
39
    def is_trained(self):
40
        return bool(glob(os.path.join(self.datadir, "*")))
41
42
    @property
43
    def modification_time(self):
44
        mtimes = [
45
            datetime.utcfromtimestamp(os.path.getmtime(p))
46
            for p in glob(os.path.join(self.datadir, "*"))
47
        ]
48
        most_recent = max(mtimes, default=None)
49
        if most_recent is None:
50
            return None
51
        return most_recent.replace(tzinfo=timezone.utc)
52
53
    def _get_backend_params(self, params):
54
        backend_params = dict(self.params)
55
        if params is not None:
56
            backend_params.update(params)
57
        return backend_params
58
59
    def _train(self, corpus, params, jobs=0):
60
        """This method can be overridden by backends. It implements
61
        the train functionality, with pre-processed parameters."""
62
        pass  # default is to do nothing, subclasses may override
63
64
    def train(self, corpus, params=None, jobs=0):
65
        """Train the model on the given document or subject corpus."""
66
        beparams = self._get_backend_params(params)
67
        return self._train(corpus, params=beparams, jobs=jobs)
68
69
    def initialize(self, parallel=False):
70
        """This method can be overridden by backends. It should cause the
71
        backend to pre-load all data it needs during operation.
72
        If parallel is True, the backend should expect to be used for
73
        parallel operation."""
74
        pass
75
76
    @abc.abstractmethod
77
    def _suggest(self, text, params):
78
        """This method should implemented by backends. It implements
79
        the suggest functionality, with pre-processed parameters."""
80
        pass  # pragma: no cover
81
82
    def _suggest_batch(self, corpus, transform, params):
83
        """This method can be implemented by backends to use batching of documents in
84
        their operations. This default implementation uses the regular suggest
85
        functionality."""
86
        return [
87
            self._suggest(transform.transform_text(doc.text), params)
88
            for doc in corpus.documents
89
        ]
90
91
    def suggest(self, text, params=None):
92
        """Suggest subjects for the input text and return a list of subjects
93
        represented as a list of SubjectSuggestion objects."""
94
        beparams = self._get_backend_params(params)
95
        self.initialize()
96
        return self._suggest(text, params=beparams)
97
98
    def suggest_batch(self, corpus, transform, params=None):
99
        """Suggest subjects for the input documents and return a list of subject sets
100
        represented as a list of SubjectSuggestion objects."""
101
        beparams = self._get_backend_params(params)
102
        self.initialize()
103
        return self._suggest_batch(corpus, transform, params=beparams)
104
105
    def debug(self, message):
106
        """Log a debug message from this backend"""
107
        logger.debug("Backend {}: {}".format(self.backend_id, message))
108
109
    def info(self, message):
110
        """Log an info message from this backend"""
111
        logger.info("Backend {}: {}".format(self.backend_id, message))
112
113
    def warning(self, message):
114
        """Log a warning message from this backend"""
115
        logger.warning("Backend {}: {}".format(self.backend_id, message))
116
117
118
class AnnifLearningBackend(AnnifBackend):
119
    """Base class for Annif backends that can perform online learning"""
120
121
    @abc.abstractmethod
122
    def _learn(self, corpus, params):
123
        """This method should implemented by backends. It implements the learn
124
        functionality, with pre-processed parameters."""
125
        pass  # pragma: no cover
126
127
    def learn(self, corpus, params=None):
128
        """Further train the model on the given document or subject corpus."""
129
        beparams = self._get_backend_params(params)
130
        return self._learn(corpus, params=beparams)
131