| Total Complexity | 7 |
| Total Lines | 41 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Voikko analyzer for Annif, based on libvoikko library.""" |
||
| 2 | |||
| 3 | from __future__ import annotations |
||
| 4 | |||
| 5 | import functools |
||
| 6 | import importlib |
||
| 7 | |||
| 8 | from . import analyzer |
||
| 9 | |||
| 10 | |||
| 11 | class VoikkoAnalyzer(analyzer.Analyzer): |
||
| 12 | name = "voikko" |
||
| 13 | |||
| 14 | @staticmethod |
||
| 15 | def is_available() -> bool: |
||
| 16 | # return True iff Voikko is installed |
||
| 17 | return importlib.util.find_spec("voikko") is not None |
||
| 18 | |||
| 19 | def __init__(self, param: str, **kwargs) -> None: |
||
| 20 | self.param = param |
||
| 21 | self.voikko = None |
||
| 22 | super().__init__(**kwargs) |
||
| 23 | |||
| 24 | def __getstate__(self) -> dict[str, str | None]: |
||
| 25 | """Return the state of the object for pickling purposes. The Voikko |
||
| 26 | instance is set to None because as a ctypes object it cannot be |
||
| 27 | pickled.""" |
||
| 28 | |||
| 29 | return {"param": self.param, "voikko": None} |
||
| 30 | |||
| 31 | @functools.lru_cache(maxsize=500000) |
||
| 32 | def _normalize_word(self, word: str) -> str: |
||
| 33 | import voikko.libvoikko |
||
| 34 | |||
| 35 | if self.voikko is None: |
||
| 36 | self.voikko = voikko.libvoikko.Voikko(self.param) |
||
| 37 | result = self.voikko.analyze(word) |
||
| 38 | if len(result) > 0 and "BASEFORM" in result[0]: |
||
| 39 | return result[0]["BASEFORM"] |
||
| 40 | return word |
||
| 41 |