Total Complexity | 4 |
Total Lines | 27 |
Duplicated Lines | 70.37 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
1 | """Simplemma analyzer for Annif, based on simplemma lemmatizer.""" |
||
2 | |||
3 | import functools |
||
4 | import simplemma |
||
5 | from . import analyzer |
||
6 | |||
7 | |||
8 | View Code Duplication | class SimplemmaAnalyzer(analyzer.Analyzer): |
|
|
|||
9 | name = "simplemma" |
||
10 | |||
11 | def __init__(self, param, **kwargs): |
||
12 | self.lang = param |
||
13 | self.langdata = None |
||
14 | super().__init__(**kwargs) |
||
15 | |||
16 | def __getstate__(self): |
||
17 | """Return the state of the object for pickling purposes. The langdata |
||
18 | field is set to None as it's more efficient to use load_data.""" |
||
19 | |||
20 | return {'lang': self.lang, 'langdata': None} |
||
21 | |||
22 | @functools.lru_cache(maxsize=500000) |
||
23 | def _normalize_word(self, word): |
||
24 | if self.langdata is None: |
||
25 | self.langdata = simplemma.load_data(self.lang) |
||
26 | return simplemma.lemmatize(word, self.langdata) |
||
27 |