|
1
|
|
|
"""Common functionality for backends.""" |
|
2
|
|
|
|
|
3
|
|
|
import abc |
|
4
|
|
|
import os |
|
5
|
|
|
import os.path |
|
6
|
|
|
from annif import logger |
|
7
|
|
|
|
|
8
|
|
|
|
|
9
|
|
|
class AnnifBackend(metaclass=abc.ABCMeta): |
|
10
|
|
|
"""Base class for Annif backends that perform analysis. The |
|
11
|
|
|
non-implemented methods should be overridden in subclasses.""" |
|
12
|
|
|
|
|
13
|
|
|
name = None |
|
14
|
|
|
needs_subject_index = False |
|
15
|
|
|
needs_subject_vectorizer = False |
|
16
|
|
|
|
|
17
|
|
|
def __init__(self, backend_id, params, datadir): |
|
18
|
|
|
"""Initialize backend with specific parameters. The |
|
19
|
|
|
parameters are a dict. Keys and values depend on the specific |
|
20
|
|
|
backend type.""" |
|
21
|
|
|
self.backend_id = backend_id |
|
22
|
|
|
self.params = params |
|
23
|
|
|
self._datadir = datadir |
|
24
|
|
|
|
|
25
|
|
|
def _get_datadir(self): |
|
26
|
|
|
"""return the path of the directory where this backend can store its |
|
27
|
|
|
data files""" |
|
28
|
|
|
if not os.path.exists(self._datadir): |
|
29
|
|
|
os.makedirs(self._datadir) |
|
30
|
|
|
return self._datadir |
|
31
|
|
|
|
|
32
|
|
|
def train(self, corpus, project): |
|
33
|
|
|
"""train the model on the given document or subject corpus""" |
|
34
|
|
|
pass # default is to do nothing, subclasses may override |
|
35
|
|
|
|
|
36
|
|
|
def initialize(self): |
|
37
|
|
|
"""This method can be overridden by backends. It should cause the |
|
38
|
|
|
backend to pre-load all data it needs during operation.""" |
|
39
|
|
|
pass |
|
40
|
|
|
|
|
41
|
|
|
@abc.abstractmethod |
|
42
|
|
|
def _analyze(self, text, project, params): |
|
43
|
|
|
"""This method should implemented by backends. It implements |
|
44
|
|
|
the analyze functionality, with pre-processed parameters.""" |
|
45
|
|
|
pass |
|
46
|
|
|
|
|
47
|
|
|
def analyze(self, text, project, params=None): |
|
48
|
|
|
"""Analyze some input text and return a list of subjects represented |
|
49
|
|
|
as a list of AnalysisHit objects.""" |
|
50
|
|
|
beparams = dict(self.params) |
|
51
|
|
|
if params is not None: |
|
52
|
|
|
beparams.update(params) |
|
53
|
|
|
return self._analyze(text, project, params=beparams) |
|
54
|
|
|
|
|
55
|
|
|
def debug(self, message): |
|
56
|
|
|
"""Log a debug message from this backend""" |
|
57
|
|
|
logger.debug("Backend {}: {}".format(self.backend_id, message)) |
|
58
|
|
|
|
|
59
|
|
|
def info(self, message): |
|
60
|
|
|
"""Log an info message from this backend""" |
|
61
|
|
|
logger.info("Backend {}: {}".format(self.backend_id, message)) |
|
62
|
|
|
|
|
63
|
|
|
def warning(self, message): |
|
64
|
|
|
"""Log a warning message from this backend""" |
|
65
|
|
|
logger.warning("Backend {}: {}".format(self.backend_id, message)) |
|
66
|
|
|
|