annif.analyzer.voikko   A
last analyzed

Complexity

Total Complexity 7

Size/Duplication

Total Lines 41
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 41
rs 10
c 0
b 0
f 0
wmc 7

4 Methods

Rating   Name   Duplication   Size   Complexity  
A VoikkoAnalyzer.is_available() 0 4 1
A VoikkoAnalyzer.__init__() 0 4 1
A VoikkoAnalyzer._normalize_word() 0 10 4
A VoikkoAnalyzer.__getstate__() 0 6 1
1
"""Voikko analyzer for Annif, based on libvoikko library."""
2
3
from __future__ import annotations
4
5
import functools
6
import importlib
7
8
from . import analyzer
9
10
11
class VoikkoAnalyzer(analyzer.Analyzer):
12
    name = "voikko"
13
14
    @staticmethod
15
    def is_available() -> bool:
16
        # return True iff Voikko is installed
17
        return importlib.util.find_spec("voikko") is not None
18
19
    def __init__(self, param: str, **kwargs) -> None:
20
        self.param = param
21
        self.voikko = None
22
        super().__init__(**kwargs)
23
24
    def __getstate__(self) -> dict[str, str | None]:
25
        """Return the state of the object for pickling purposes. The Voikko
26
        instance is set to None because as a ctypes object it cannot be
27
        pickled."""
28
29
        return {"param": self.param, "voikko": None}
30
31
    @functools.lru_cache(maxsize=500000)
32
    def _normalize_word(self, word: str) -> str:
33
        import voikko.libvoikko
34
35
        if self.voikko is None:
36
            self.voikko = voikko.libvoikko.Voikko(self.param)
37
        result = self.voikko.analyze(word)
38
        if len(result) > 0 and "BASEFORM" in result[0]:
39
            return result[0]["BASEFORM"]
40
        return word
41