| Total Complexity | 4 | 
| Total Lines | 33 | 
| Duplicated Lines | 0 % | 
| Changes | 0 | ||
| 1 | """Annif backend mixins that can be used to implement features"""  | 
            ||
| 2 | |||
| 3 | |||
| 4 | import abc  | 
            ||
| 5 | from annif.suggestion import ListSuggestionResult  | 
            ||
| 6 | |||
| 7 | |||
| 8 | class ChunkingBackend(metaclass=abc.ABCMeta):  | 
            ||
| 9 | """Annif backend mixin that implements chunking of input"""  | 
            ||
| 10 | |||
| 11 | @abc.abstractmethod  | 
            ||
| 12 | def _suggest_chunks(self, chunktexts, project):  | 
            ||
| 13 | """Suggest subjects for the chunked text; should be implemented by  | 
            ||
| 14 | the subclass inheriting this mixin"""  | 
            ||
| 15 | |||
| 16 | pass # pragma: no cover  | 
            ||
| 17 | |||
| 18 | def _suggest(self, text, project, params):  | 
            ||
| 19 | self.initialize()  | 
            ||
| 20 |         self.debug('Suggesting subjects for text "{}..." (len={})'.format( | 
            ||
| 21 | text[:20], len(text)))  | 
            ||
| 22 | sentences = project.analyzer.tokenize_sentences(text)  | 
            ||
| 23 |         self.debug('Found {} sentences'.format(len(sentences))) | 
            ||
| 24 | chunksize = int(params['chunksize'])  | 
            ||
| 25 | chunktexts = []  | 
            ||
| 26 | for i in range(0, len(sentences), chunksize):  | 
            ||
| 27 |             chunktexts.append(' '.join(sentences[i:i + chunksize])) | 
            ||
| 28 |         self.debug('Split sentences into {} chunks'.format(len(chunktexts))) | 
            ||
| 29 | if len(chunktexts) == 0: # no input, empty result  | 
            ||
| 30 | return ListSuggestionResult(  | 
            ||
| 31 | hits=[], subject_index=project.subjects)  | 
            ||
| 32 | return self._suggest_chunks(chunktexts, project)  | 
            ||
| 33 |