Passed
Pull Request — master (#527)
by Osma
02:42
created

SpacyAnalyzer.normalize_word()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 3
Ratio 100 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 2
dl 3
loc 3
rs 10
c 0
b 0
f 0
1
"""Simple analyzer for Annif. Only folds words to lower case."""
2
3
import spacy
4
from . import analyzer
5
6
7 View Code Duplication
class SpacyAnalyzer(analyzer.Analyzer):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
8
    name = "spacy"
9
10
    def __init__(self, param, **kwargs):
11
        self.param = param
12
        self.nlp = spacy.load(param, exclude=['ner', 'parser'])
13
        super().__init__(**kwargs)
14
15
    def tokenize_words(self, text):
16
        return [lemma for lemma in (token.lemma_ for token in self.nlp(text))
17
                if self.is_valid_token(lemma)]
18
19
    def normalize_word(self, word):
20
        doc = self.nlp(word)
21
        return doc[:].lemma_
22