Passed
Pull Request — main (#818)
by Osma
06:57 queued 03:35
created

annif.analyzer.estnltk   A

Complexity

Total Complexity 3

Size/Duplication

Total Lines 31
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 20
dl 0
loc 31
rs 10
c 0
b 0
f 0
wmc 3

3 Methods

Rating   Name   Duplication   Size   Complexity  
A EstNLTKAnalyzer.is_available() 0 4 1
A EstNLTKAnalyzer.__init__() 0 3 1
A EstNLTKAnalyzer.tokenize_words() 0 9 1
1
"""EstNLTK analyzer for Annif which uses EstNLTK for lemmatization"""
2
3
from __future__ import annotations
4
5
import importlib
6
7
from . import analyzer
8
9
10
class EstNLTKAnalyzer(analyzer.Analyzer):
11
    name = "estnltk"
12
13
    @staticmethod
14
    def is_available() -> bool:
15
        # return True iff EstNLTK is installed
16
        return importlib.util.find_spec("estnltk") is not None
17
18
    def __init__(self, param: str, **kwargs) -> None:
19
        self.param = param
20
        super().__init__(**kwargs)
21
22
    def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
23
        import estnltk
24
25
        txt = estnltk.Text(text.strip())
26
        txt.tag_layer()
27
        return [
28
            lemma
29
            for lemma in [lemmas[0] for lemmas in txt.lemma]
30
            if (not filter or self.is_valid_token(lemma))
31
        ]
32