| Conditions | 2 |
| Total Lines | 10 |
| Code Lines | 8 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
| 1 | """spaCy analyzer for Annif which uses spaCy for lemmatization""" |
||
| 40 | def tokenize_words(self, text: str, filter: bool = True) -> list[str]: |
||
| 41 | lemmas = [ |
||
| 42 | lemma |
||
| 43 | for lemma in (token.lemma_ for token in self.nlp(text.strip())) |
||
| 44 | if (not filter or self.is_valid_token(lemma)) |
||
| 45 | ] |
||
| 46 | if self.lowercase: |
||
| 47 | return [lemma.lower() for lemma in lemmas] |
||
| 48 | else: |
||
| 49 | return lemmas |
||
| 50 |