Total Complexity | 2 |
Total Lines | 23 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | """Snowball analyzer for Annif, based on nltk Snowball stemmer.""" |
||
2 | |||
3 | from __future__ import annotations |
||
4 | |||
5 | import functools |
||
6 | |||
7 | from . import analyzer |
||
8 | |||
9 | |||
10 | class SnowballAnalyzer(analyzer.Analyzer): |
||
11 | name = "snowball" |
||
12 | |||
13 | def __init__(self, param: str, **kwargs) -> None: |
||
14 | self.param = param |
||
15 | import nltk.stem.snowball |
||
16 | |||
17 | self.stemmer = nltk.stem.snowball.SnowballStemmer(param) |
||
18 | super().__init__(**kwargs) |
||
19 | |||
20 | @functools.lru_cache(maxsize=500000) |
||
21 | def _normalize_word(self, word: str) -> str: |
||
22 | return self.stemmer.stem(word.lower()) |
||
23 |