annif.analyzer.estnltk.EstNLTKAnalyzer.tokenize_words() - Code Metrics - Inspection of "Merge pull request #818 from NatLibFi/feature-estn..." - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — main ( 2ffd82...8f13d7 )

by Osma

created 2024-12-20 10:31 UTC

EstNLTKAnalyzer.tokenize_words() A

↳ Parent: annif.analyzer.estnltk

Complexity

Conditions

Size

Total Lines	9
Code Lines	8

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	8
nop	3
dl	0
loc	9
rs	10
c	0
b	0
f	0

"""EstNLTK analyzer for Annif which uses EstNLTK for lemmatization"""

from __future__ import annotations

import importlib

from . import analyzer


class EstNLTKAnalyzer(analyzer.Analyzer):
    name = "estnltk"

    @staticmethod
    def is_available() -> bool:
        # return True iff EstNLTK is installed
        return importlib.util.find_spec("estnltk") is not None

    def __init__(self, param: str, **kwargs) -> None:
        self.param = param
        super().__init__(**kwargs)

    def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
        import estnltk

        txt = estnltk.Text(text.strip())
        txt.tag_layer()
        return [
            lemma
            for lemma in [lemmas[0] for lemmas in txt.lemma]
            if (not filter or self.is_valid_token(lemma))
        ]


1			"""EstNLTK analyzer for Annif which uses EstNLTK for lemmatization"""
2
3			from __future__ import annotations
4
5			import importlib
6
7			from . import analyzer
8
9
10			class EstNLTKAnalyzer(analyzer.Analyzer):
11			name = "estnltk"
12
13			@staticmethod
14			def is_available() -> bool:
15			# return True iff EstNLTK is installed
16			return importlib.util.find_spec("estnltk") is not None
17
18			def __init__(self, param: str, **kwargs) -> None:
19			self.param = param
20			super().__init__(**kwargs)
21
22			def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
23			import estnltk
24
25			txt = estnltk.Text(text.strip())
26			txt.tag_layer()
27			return [
28			lemma
29			for lemma in [lemmas[0] for lemmas in txt.lemma]
30			if (not filter or self.is_valid_token(lemma))
31			]
32

NatLibFi / Annif

Push — main ( 2ffd82...8f13d7 )

EstNLTKAnalyzer.tokenize_words() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like