Passed
Pull Request — master (#452)
by
unknown
01:52
created

annif.backend.backend.AnnifBackend.suggest()   A

Complexity

Conditions 2

Size

Total Lines 9
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 9
rs 10
c 0
b 0
f 0
cc 2
nop 3
1
"""Common functionality for backends."""
2
3
import abc
4
import os.path
5
from datetime import datetime, timezone
6
from glob import glob
7
from annif import logger
8
from annif.corpus import TruncatingDocumentCorpus
9
from annif.exception import ConfigurationException
10
11
12
class AnnifBackend(metaclass=abc.ABCMeta):
13
    """Base class for Annif backends that perform analysis. The
14
    non-implemented methods should be overridden in subclasses."""
15
16
    name = None
17
    needs_subject_index = False
18
19
    DEFAULT_PARAMETERS = {'limit': 100,
20
                          'input_limit': 0}
21
22
    def __init__(self, backend_id, config_params, project):
23
        """Initialize backend with specific parameters. The
24
        parameters are a dict. Keys and values depend on the specific
25
        backend type."""
26
        self.backend_id = backend_id
27
        self.config_params = config_params
28
        self.project = project
29
        self.datadir = project.datadir
30
31
    def default_params(self):
32
        return self.DEFAULT_PARAMETERS
33
34
    @property
35
    def params(self):
36
        params = {}
37
        params.update(self.default_params())
38
        params.update(self.config_params)
39
        return params
40
41
    @property
42
    def is_trained(self):
43
        return bool(glob(os.path.join(self.datadir, '*')))
44
45
    @property
46
    def modification_time(self):
47
        mtimes = [datetime.utcfromtimestamp(os.path.getmtime(p))
48
                  for p in glob(os.path.join(self.datadir, '*'))]
49
        most_recent = max(mtimes, default=None)
50
        if most_recent is None:
51
            return None
52
        return most_recent.replace(tzinfo=timezone.utc)
53
54
    def _get_backend_params(self, params):
55
        backend_params = dict(self.params)
56
        if params is not None:
57
            backend_params.update(params)
58
        return backend_params
59
60
    def _validate_input_limit(self, input_limit):
61
        input_limit = int(input_limit)
62
        if input_limit >= 0:
63
            return input_limit
64
        else:
65
            raise ConfigurationException(
66
                'input_limit can not be negative', backend_id=self.backend_id)
67
68
    def _train(self, corpus, params):
69
        """This method can be overridden by backends. It implements
70
        the train functionality, with pre-processed parameters."""
71
        pass  # default is to do nothing, subclasses may override
72
73
    def train(self, corpus, params=None):
74
        """Train the model on the given document or subject corpus."""
75
        beparams = self._get_backend_params(params)
76
        input_limit = self._validate_input_limit(beparams['input_limit'])
77
        if input_limit != 0:
78
            corpus = TruncatingDocumentCorpus(corpus, input_limit)
79
        return self._train(corpus, params=beparams)
80
81
    def initialize(self):
82
        """This method can be overridden by backends. It should cause the
83
        backend to pre-load all data it needs during operation."""
84
        pass
85
86
    @abc.abstractmethod
87
    def _suggest(self, text, params):
88
        """This method should implemented by backends. It implements
89
        the suggest functionality, with pre-processed parameters."""
90
        pass  # pragma: no cover
91
92
    def suggest(self, text, params=None):
93
        """Suggest subjects for the input text and return a list of subjects
94
        represented as a list of SubjectSuggestion objects."""
95
        beparams = self._get_backend_params(params)
96
        self.initialize()
97
        input_limit = self._validate_input_limit(beparams['input_limit'])
98
        if input_limit != 0:
99
            text = text[:input_limit]
100
        return self._suggest(text, params=beparams)
101
102
    def debug(self, message):
103
        """Log a debug message from this backend"""
104
        logger.debug("Backend {}: {}".format(self.backend_id, message))
105
106
    def info(self, message):
107
        """Log an info message from this backend"""
108
        logger.info("Backend {}: {}".format(self.backend_id, message))
109
110
    def warning(self, message):
111
        """Log a warning message from this backend"""
112
        logger.warning("Backend {}: {}".format(self.backend_id, message))
113
114
115
class AnnifLearningBackend(AnnifBackend):
116
    """Base class for Annif backends that can perform online learning"""
117
118
    @abc.abstractmethod
119
    def _learn(self, corpus, params):
120
        """This method should implemented by backends. It implements the learn
121
        functionality, with pre-processed parameters."""
122
        pass  # pragma: no cover
123
124
    def learn(self, corpus, params=None):
125
        """Further train the model on the given document or subject corpus."""
126
        beparams = self._get_backend_params(params)
127
        input_limit = self._validate_input_limit(beparams['input_limit'])
128
        if input_limit != 0:
129
            corpus = TruncatingDocumentCorpus(corpus, input_limit)
130
        return self._learn(corpus, params=beparams)
131