Passed
Pull Request — master (#418)
by Osma
01:28
created

annif.backend.ensemble.EnsembleBackend._suggest()   A

Complexity

Conditions 1

Size

Total Lines 6
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 6
dl 0
loc 6
rs 10
c 0
b 0
f 0
cc 1
nop 3
1
"""Ensemble backend that combines results from multiple projects"""
2
3
4
import annif.suggestion
5
import annif.util
6
from . import backend
7
from annif.exception import NotSupportedException
8
9
10
class EnsembleBackend(backend.AnnifBackend):
11
    """Ensemble backend that combines results from multiple projects"""
12
    name = "ensemble"
13
14
    @property
15
    def is_trained(self):
16
        sources_trained = self._get_sources_attribute('is_trained')
17
        return all(sources_trained)
18
19
    @property
20
    def modification_time(self):
21
        mtimes = self._get_sources_attribute('modification_time')
22
        return max(filter(None, mtimes), default=None)
23
24
    def _get_sources_attribute(self, attr):
25
        params = self._get_backend_params(None)
26
        sources = annif.util.parse_sources(params['sources'])
27
        return [getattr(self.project.registry.get_project(project_id), attr)
28
                for project_id, _ in sources]
29
30
    def initialize(self):
31
        # initialize all the source projects
32
        params = self._get_backend_params(None)
33
        for project_id, _ in annif.util.parse_sources(params['sources']):
34
            project = self.project.registry.get_project(project_id)
35
            project.initialize()
36
37
    def _normalize_hits(self, hits, source_project):
38
        """Hook for processing hits from backends. Intended to be overridden
39
        by subclasses."""
40
        return hits
41
42
    def _suggest_with_sources(self, text, sources):
43
        hits_from_sources = []
44
        for project_id, weight in sources:
45
            source_project = self.project.registry.get_project(project_id)
46
            hits = source_project.suggest(text)
47
            self.debug(
48
                'Got {} hits from project {}'.format(
49
                    len(hits), source_project.project_id))
50
            norm_hits = self._normalize_hits(hits, source_project)
51
            hits_from_sources.append(
52
                annif.suggestion.WeightedSuggestion(
53
                    hits=norm_hits,
54
                    weight=weight,
55
                    subjects=source_project.subjects))
56
        return hits_from_sources
57
58
    def _merge_hits_from_sources(self, hits_from_sources, params):
59
        """Hook for merging hits from sources. Can be overridden by
60
        subclasses."""
61
        return annif.util.merge_hits(hits_from_sources, self.project.subjects)
62
63
    def _suggest(self, text, params):
64
        sources = annif.util.parse_sources(params['sources'])
65
        hits_from_sources = self._suggest_with_sources(text, sources)
66
        merged_hits = self._merge_hits_from_sources(hits_from_sources, params)
67
        self.debug('{} hits after merging'.format(len(merged_hits)))
68
        return merged_hits
69
70
    def _train(self, corpus, params):
71
        raise NotSupportedException('Training ensemble model is not possible.')
72