| Total Complexity | 2 |
| Total Lines | 23 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Snowball analyzer for Annif, based on nltk Snowball stemmer.""" |
||
| 2 | |||
| 3 | from __future__ import annotations |
||
| 4 | |||
| 5 | import functools |
||
| 6 | |||
| 7 | from . import analyzer |
||
| 8 | |||
| 9 | |||
| 10 | class SnowballAnalyzer(analyzer.Analyzer): |
||
| 11 | name = "snowball" |
||
| 12 | |||
| 13 | def __init__(self, param: str, **kwargs) -> None: |
||
| 14 | self.param = param |
||
| 15 | import nltk.stem.snowball |
||
| 16 | |||
| 17 | self.stemmer = nltk.stem.snowball.SnowballStemmer(param) |
||
| 18 | super().__init__(**kwargs) |
||
| 19 | |||
| 20 | @functools.lru_cache(maxsize=500000) |
||
| 21 | def _normalize_word(self, word: str) -> str: |
||
| 22 | return self.stemmer.stem(word.lower()) |
||
| 23 |