Conditions | 2 |
Total Lines | 10 |
Code Lines | 8 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | """spaCy analyzer for Annif which uses spaCy for lemmatization""" |
||
40 | def tokenize_words(self, text: str, filter: bool = True) -> list[str]: |
||
41 | lemmas = [ |
||
42 | lemma |
||
43 | for lemma in (token.lemma_ for token in self.nlp(text.strip())) |
||
44 | if (not filter or self.is_valid_token(lemma)) |
||
45 | ] |
||
46 | if self.lowercase: |
||
47 | return [lemma.lower() for lemma in lemmas] |
||
48 | else: |
||
49 | return lemmas |
||
50 |