| Conditions | 1 |
| Total Lines | 9 |
| Code Lines | 8 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
| 1 | """EstNLTK analyzer for Annif which uses EstNLTK for lemmatization""" |
||
| 22 | def tokenize_words(self, text: str, filter: bool = True) -> list[str]: |
||
| 23 | import estnltk |
||
| 24 | |||
| 25 | txt = estnltk.Text(text.strip()) |
||
| 26 | txt.tag_layer() |
||
| 27 | return [ |
||
| 28 | lemma |
||
| 29 | for lemma in [lemmas[0] for lemmas in txt.lemma] |
||
| 30 | if (not filter or self.is_valid_token(lemma)) |
||
| 31 | ] |
||
| 32 |