annif.backend.mixins - Code Metrics - Inspection of "First implementation of VW regular backend" - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#249)

by Osma

created 2019-01-29 14:11 UTC

annif.backend.mixins A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	35
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	5
eloc	24
dl	0
loc	35
rs	10
c	0
b	0
f	0

2 Methods

Rating	Name	Duplication	Size	Complexity
A	ChunkingBackend._analyze_chunks()	0	6	1
A	ChunkingBackend._analyze()	0	17	4

"""Annif backend mixins that can be used to implement features"""


import abc
from annif.hit import ListAnalysisResult


class ChunkingBackend(metaclass=abc.ABCMeta):
    """Annif backend mixin that implements chunking of input"""

    @abc.abstractmethod
    def _analyze_chunks(self, chunktexts, project):
        """Analyze the chunked text; should be implemented by the subclass
        inheriting this mixin"""

        pass  # pragma: no cover

    def _analyze(self, text, project, params):
        self.initialize()
        self.debug('Analyzing text "{}..." (len={})'.format(
            text[:20], len(text)))
        sentences = project.analyzer.tokenize_sentences(text)
        self.debug('Found {} sentences'.format(len(sentences)))
        chunksize = int(params['chunksize'])
        chunktexts = []
        for i in range(0, len(sentences), chunksize):
            chunktext = ' '.join(sentences[i:i + chunksize])
            normalized = self._normalize_text(project, chunktext)
            if normalized != '':
                chunktexts.append(normalized)
        self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
        if len(chunktexts) == 0:  # nothing to analyze, empty result
            return ListAnalysisResult(hits=[], subject_index=project.subjects)
        return self._analyze_chunks(chunktexts, project)


1			"""Annif backend mixins that can be used to implement features"""
2
3
4			import abc
5			from annif.hit import ListAnalysisResult
6
7
8			class ChunkingBackend(metaclass=abc.ABCMeta):
9			"""Annif backend mixin that implements chunking of input"""
10
11			@abc.abstractmethod
12			def _analyze_chunks(self, chunktexts, project):
13			"""Analyze the chunked text; should be implemented by the subclass
14			inheriting this mixin"""
15
16			pass # pragma: no cover
17
18			def _analyze(self, text, project, params):
19			self.initialize()
20			self.debug('Analyzing text "{}..." (len={})'.format(
21			text[:20], len(text)))
22			sentences = project.analyzer.tokenize_sentences(text)
23			self.debug('Found {} sentences'.format(len(sentences)))
24			chunksize = int(params['chunksize'])
25			chunktexts = []
26			for i in range(0, len(sentences), chunksize):
27			chunktext = ' '.join(sentences[i:i + chunksize])
28			normalized = self._normalize_text(project, chunktext)
29			if normalized != '':
30			chunktexts.append(normalized)
31			self.debug('Split sentences into {} chunks'.format(len(chunktexts)))
32			if len(chunktexts) == 0: # nothing to analyze, empty result
33			return ListAnalysisResult(hits=[], subject_index=project.subjects)
34			return self._analyze_chunks(chunktexts, project)
35

NatLibFi / Annif

Pull Request — master (#249)

annif.backend.mixins A

Complexity

Size/Duplication

Importance

2 Methods

Duplication Side-by-Side

Filter issues like