annif.analyzer.spacy - Code Metrics - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

annif.analyzer.spacy A
last analyzed 2025-08-06 13:59 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	50
Duplicated Lines	0 %

Importance

Changes

Metric	Value
eloc	33
dl	0
loc	50
rs	10
c	0
b	0
f	0
wmc	6

3 Methods

Rating	Name	Size	Complexity
A	SpacyAnalyzer.is_available()	4	1
A	SpacyAnalyzer.__init__()	16	3
A	SpacyAnalyzer.tokenize_words()	10	2

"""spaCy analyzer for Annif which uses spaCy for lemmatization"""

from __future__ import annotations

import importlib

import annif.util
from annif.exception import OperationFailedException

from . import analyzer

_KEY_LOWERCASE = "lowercase"


class SpacyAnalyzer(analyzer.Analyzer):
    name = "spacy"

    @staticmethod
    def is_available() -> bool:
        # return True iff spaCy is installed
        return importlib.util.find_spec("spacy") is not None

    def __init__(self, param: str, **kwargs) -> None:
        import spacy

        self.param = param
        try:
            self.nlp = spacy.load(param, exclude=["ner", "parser"])
        except IOError as err:
            raise OperationFailedException(
                f"Loading spaCy model '{param}' failed - "
                + f"please download the model.\n{err}"
            )
        if _KEY_LOWERCASE in kwargs:
            self.lowercase = annif.util.boolean(kwargs[_KEY_LOWERCASE])
        else:
            self.lowercase = False
        super().__init__(**kwargs)

    def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
        lemmas = [
            lemma
            for lemma in (token.lemma_ for token in self.nlp(text.strip()))
            if (not filter or self.is_valid_token(lemma))
        ]
        if self.lowercase:
            return [lemma.lower() for lemma in lemmas]
        else:
            return lemmas


1			"""spaCy analyzer for Annif which uses spaCy for lemmatization"""
2
3			from __future__ import annotations
4
5			import importlib
6
7			import annif.util
8			from annif.exception import OperationFailedException
9
10			from . import analyzer
11
12			_KEY_LOWERCASE = "lowercase"
13
14
15			class SpacyAnalyzer(analyzer.Analyzer):
16			name = "spacy"
17
18			@staticmethod
19			def is_available() -> bool:
20			# return True iff spaCy is installed
21			return importlib.util.find_spec("spacy") is not None
22
23			def __init__(self, param: str, **kwargs) -> None:
24			import spacy
25
26			self.param = param
27			try:
28			self.nlp = spacy.load(param, exclude=["ner", "parser"])
29			except IOError as err:
30			raise OperationFailedException(
31			f"Loading spaCy model '{param}' failed - "
32			+ f"please download the model.\n{err}"
33			)
34			if _KEY_LOWERCASE in kwargs:
35			self.lowercase = annif.util.boolean(kwargs[_KEY_LOWERCASE])
36			else:
37			self.lowercase = False
38			super().__init__(**kwargs)
39
40			def tokenize_words(self, text: str, filter: bool = True) -> list[str]:
41			lemmas = [
42			lemma
43			for lemma in (token.lemma_ for token in self.nlp(text.strip()))
44			if (not filter or self.is_valid_token(lemma))
45			]
46			if self.lowercase:
47			return [lemma.lower() for lemma in lemmas]
48			else:
49			return lemmas
50

NatLibFi / Annif

annif.analyzer.spacy A last analyzed 2025-08-06 13:59 UTC

Complexity

Size/Duplication

Importance

3 Methods

Duplication Side-by-Side

Filter issues like

annif.analyzer.spacy A
last analyzed 2025-08-06 13:59 UTC