Total Complexity | 1 |
Total Lines | 18 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | """Wrapper code for using Simplemma functionality in Annif""" |
||
2 | |||
3 | from typing import Tuple, Union |
||
4 | |||
5 | from simplemma import LanguageDetector, Lemmatizer |
||
6 | from simplemma.strategies import DefaultStrategy |
||
7 | from simplemma.strategies.dictionaries import DefaultDictionaryFactory |
||
8 | |||
9 | LANG_CACHE_SIZE = 5 # How many language dictionaries to keep in memory at once (max) |
||
10 | |||
11 | _dictionary_factory = DefaultDictionaryFactory(cache_max_size=LANG_CACHE_SIZE) |
||
12 | _lemmatization_strategy = DefaultStrategy(dictionary_factory=_dictionary_factory) |
||
13 | lemmatizer = Lemmatizer(lemmatization_strategy=_lemmatization_strategy) |
||
14 | |||
15 | |||
16 | def get_language_detector(lang: Union[str, Tuple[str, ...]]) -> LanguageDetector: |
||
17 | return LanguageDetector(lang, lemmatization_strategy=_lemmatization_strategy) |
||
18 |