Conditions | 1 |
Total Lines | 11 |
Code Lines | 9 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | """EstNLTK analyzer for Annif which uses EstNLTK for lemmatization""" |
||
18 | def tokenize_words(self, text: str, filter: bool = True) -> list[str]: |
||
19 | import estnltk |
||
20 | |||
21 | txt = estnltk.Text(text.strip()) |
||
22 | txt.tag_layer() |
||
23 | lemmas = [ |
||
24 | lemma |
||
25 | for lemma in [l[0] for l in txt.lemma] |
||
26 | if (not filter or self.is_valid_token(lemma)) |
||
27 | ] |
||
28 | return lemmas |
||
29 |