| Total Complexity | 1 |
| Total Lines | 18 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Wrapper code for using Simplemma functionality in Annif""" |
||
| 2 | |||
| 3 | from typing import Tuple, Union |
||
| 4 | |||
| 5 | from simplemma import LanguageDetector, Lemmatizer |
||
| 6 | from simplemma.strategies import DefaultStrategy |
||
| 7 | from simplemma.strategies.dictionaries import DefaultDictionaryFactory |
||
| 8 | |||
| 9 | LANG_CACHE_SIZE = 5 # How many language dictionaries to keep in memory at once (max) |
||
| 10 | |||
| 11 | _dictionary_factory = DefaultDictionaryFactory(cache_max_size=LANG_CACHE_SIZE) |
||
| 12 | _lemmatization_strategy = DefaultStrategy(dictionary_factory=_dictionary_factory) |
||
| 13 | lemmatizer = Lemmatizer(lemmatization_strategy=_lemmatization_strategy) |
||
| 14 | |||
| 15 | |||
| 16 | def get_language_detector(lang: Union[str, Tuple[str, ...]]) -> LanguageDetector: |
||
| 17 | return LanguageDetector(lang, lemmatization_strategy=_lemmatization_strategy) |
||
| 18 |