Passed
Pull Request — main (#818)
by Osma
04:04 queued 58s
created

annif.analyzer.estnltk.EstNLTKAnalyzer.__init__()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
"""EstNLTK analyzer for Annif which uses EstNLTK for lemmatization"""
2
3
from __future__ import annotations
4
5
import annif.util
6
from annif.exception import OperationFailedException
7
8
from . import analyzer
9
10
11
class EstNLTKAnalyzer(analyzer.Analyzer):
12
    name = "estnltk"
13
14
    def __init__(self, param: str, **kwargs) -> None:
15
        self.param = param
16
        super().__init__(**kwargs)
17
18
    def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
19
        import estnltk
20
21
        txt = estnltk.Text(text.strip())
22
        txt.tag_layer()
23
        lemmas = [
24
            lemma
25
            for lemma in [l[0] for l in txt.lemma]
26
            if (not filter or self.is_valid_token(lemma))
27
        ]
28
        return lemmas
29