Total Complexity | 7 |
Total Lines | 41 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | """Voikko analyzer for Annif, based on libvoikko library.""" |
||
2 | |||
3 | from __future__ import annotations |
||
4 | |||
5 | import functools |
||
6 | import importlib |
||
7 | |||
8 | from . import analyzer |
||
9 | |||
10 | |||
11 | class VoikkoAnalyzer(analyzer.Analyzer): |
||
12 | name = "voikko" |
||
13 | |||
14 | @staticmethod |
||
15 | def is_available() -> bool: |
||
16 | # return True iff Voikko is installed |
||
17 | return importlib.util.find_spec("voikko") is not None |
||
18 | |||
19 | def __init__(self, param: str, **kwargs) -> None: |
||
20 | self.param = param |
||
21 | self.voikko = None |
||
22 | super().__init__(**kwargs) |
||
23 | |||
24 | def __getstate__(self) -> dict[str, str | None]: |
||
25 | """Return the state of the object for pickling purposes. The Voikko |
||
26 | instance is set to None because as a ctypes object it cannot be |
||
27 | pickled.""" |
||
28 | |||
29 | return {"param": self.param, "voikko": None} |
||
30 | |||
31 | @functools.lru_cache(maxsize=500000) |
||
32 | def _normalize_word(self, word: str) -> str: |
||
33 | import voikko.libvoikko |
||
34 | |||
35 | if self.voikko is None: |
||
36 | self.voikko = voikko.libvoikko.Voikko(self.param) |
||
37 | result = self.voikko.analyze(word) |
||
38 | if len(result) > 0 and "BASEFORM" in result[0]: |
||
39 | return result[0]["BASEFORM"] |
||
40 | return word |
||
41 |