Passed
Push — master ( c8c370...dee89b )
by Osma
03:14
created

annif.backend.mixins.ChunkingBackend._analyze()   A

Complexity

Conditions 3

Size

Total Lines 14
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 14
rs 9.7
c 0
b 0
f 0
cc 3
nop 4
1
"""Annif backend mixins that can be used to implement features"""
2
3
4
import abc
5
from annif.suggestion import ListSuggestionResult
6
7
8
class ChunkingBackend(metaclass=abc.ABCMeta):
9
    """Annif backend mixin that implements chunking of input"""
10
11
    @abc.abstractmethod
12
    def _suggest_chunks(self, chunktexts, project):
13
        """Suggest subjects for the chunked text; should be implemented by
14
        the subclass inheriting this mixin"""
15
16
        pass  # pragma: no cover
17
18
    def _suggest(self, text, project, params):
19
        self.initialize()
20
        self.debug('Suggesting subjects for text "{}..." (len={})'.format(
21
            text[:20], len(text)))
22
        sentences = project.analyzer.tokenize_sentences(text)
23
        self.debug('Found {} sentences'.format(len(sentences)))
24
        chunksize = int(params['chunksize'])
25
        chunktexts = []
26
        for i in range(0, len(sentences), chunksize):
27
            chunktexts.append(' '.join(sentences[i:i + chunksize]))
28
        self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
29
        if len(chunktexts) == 0:  # no input, empty result
30
            return ListSuggestionResult(
31
                hits=[], subject_index=project.subjects)
32
        return self._suggest_chunks(chunktexts, project)
33