annif.backend.mixins.ChunkingBackend._analyze() - Code Metrics - Inspection of "Merge pull request #249 from NatLibFi/vw-backend" - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( d8a4d2...b3163f )

by Osma

created 2019-01-29 14:21 UTC

annif.backend.mixins.ChunkingBackend._analyze() A

↳ Parent: annif.backend.mixins

Complexity

Conditions

Size

Total Lines	17
Code Lines	17

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	17
dl	0
loc	17
rs	9.55
c	0
b	0
f	0
cc	4
nop	4

"""Annif backend mixins that can be used to implement features"""


import abc
from annif.hit import ListAnalysisResult


class ChunkingBackend(metaclass=abc.ABCMeta):
    """Annif backend mixin that implements chunking of input"""

    @abc.abstractmethod
    def _analyze_chunks(self, chunktexts, project):
        """Analyze the chunked text; should be implemented by the subclass
        inheriting this mixin"""

        pass  # pragma: no cover

    def _analyze(self, text, project, params):
        self.initialize()
        self.debug('Analyzing text "{}..." (len={})'.format(
            text[:20], len(text)))
        sentences = project.analyzer.tokenize_sentences(text)
        self.debug('Found {} sentences'.format(len(sentences)))
        chunksize = int(params['chunksize'])
        chunktexts = []
        for i in range(0, len(sentences), chunksize):
            chunktext = ' '.join(sentences[i:i + chunksize])
            normalized = self._normalize_text(project, chunktext)
            if normalized != '':
                chunktexts.append(normalized)
        self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
        if len(chunktexts) == 0:  # nothing to analyze, empty result
            return ListAnalysisResult(hits=[], subject_index=project.subjects)
        return self._analyze_chunks(chunktexts, project)


1			"""Annif backend mixins that can be used to implement features"""
2
3
4			import abc
5			from annif.hit import ListAnalysisResult
6
7
8			class ChunkingBackend(metaclass=abc.ABCMeta):
9			"""Annif backend mixin that implements chunking of input"""
10
11			@abc.abstractmethod
12			def _analyze_chunks(self, chunktexts, project):
13			"""Analyze the chunked text; should be implemented by the subclass
14			inheriting this mixin"""
15
16			pass # pragma: no cover
17
18			def _analyze(self, text, project, params):
19			self.initialize()
20			self.debug('Analyzing text "{}..." (len={})'.format(
21			text[:20], len(text)))
22			sentences = project.analyzer.tokenize_sentences(text)
23			self.debug('Found {} sentences'.format(len(sentences)))
24			chunksize = int(params['chunksize'])
25			chunktexts = []
26			for i in range(0, len(sentences), chunksize):
27			chunktext = ' '.join(sentences[i:i + chunksize])
28			normalized = self._normalize_text(project, chunktext)
29			if normalized != '':
30			chunktexts.append(normalized)
31			self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
32			if len(chunktexts) == 0: # nothing to analyze, empty result
33			return ListAnalysisResult(hits=[], subject_index=project.subjects)
34			return self._analyze_chunks(chunktexts, project)
35

NatLibFi / Annif

Push — master ( d8a4d2...b3163f )

annif.backend.mixins.ChunkingBackend._analyze() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like