annif.transform.transform   A
last analyzed

Complexity

Total Complexity 13

Size/Duplication

Total Lines 95
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 57
dl 0
loc 95
rs 10
c 0
b 0
f 0
wmc 13

8 Methods

Rating   Name   Duplication   Size   Complexity  
A BaseTransform.transform_doc() 0 7 1
A TransformChain.transform_corpus() 0 2 1
A BaseTransform.transform_text() 0 5 1
A TransformChain.__init__() 0 8 1
A BaseTransform.__init__() 0 8 3
A IdentityTransform.transform_text() 0 2 1
A TransformChain._init_transforms() 0 16 3
A TransformChain.transform_doc() 0 4 2
1
"""Common functionality for transforming text of input documents."""
2
3
from __future__ import annotations
4
5
import abc
6
from typing import TYPE_CHECKING, Type
7
8
from annif.corpus import Document, TransformingDocumentCorpus
9
from annif.exception import ConfigurationException
10
11
if TYPE_CHECKING:
12
    from annif.corpus.types import DocumentCorpus
13
    from annif.project import AnnifProject
14
15
16
class BaseTransform(abc.ABC):
17
    """Base class for text transformations, which need to implement either the
18
    transform_doc function or the transform_text function."""
19
20
    name = None
21
22
    def __init__(self, project: AnnifProject | None) -> None:
23
        self.project = project
24
        if (
25
            type(self).transform_text == BaseTransform.transform_text
26
            and type(self).transform_doc == BaseTransform.transform_doc
27
        ):
28
            raise NotImplementedError(
29
                "Subclasses must override transform_text or transform_doc"
30
            )
31
32
    def transform_doc(self, doc: Document) -> Document:
33
        """Perform a transformation on a Document. By default, only the text is
34
        transformed by calling self.transform_text()."""
35
36
        transformed_text = self.transform_text(doc.text)
37
        return Document(
38
            text=transformed_text, subject_set=doc.subject_set, metadata=doc.metadata
39
        )
40
41
    def transform_text(self, text: str) -> str:
42
        """Perform a transformation on the document text."""
43
44
        raise NotImplementedError(
45
            "Subclasses must implement transform_text if they call it"
46
        )  # pragma: no cover
47
48
49
class IdentityTransform(BaseTransform):
50
    """Transform that does not modify the document but simply passes it through."""
51
52
    name = "pass"
53
54
    def transform_text(self, text: str) -> str:
55
        return text
56
57
58
class TransformChain:
59
    """Class instantiating and holding the transformation objects performing
60
    the actual text transformation."""
61
62
    def __init__(
63
        self,
64
        transform_classes: list[Type[BaseTransform]],
65
        args: list[tuple[list, dict]],
66
        project: AnnifProject | None,
67
    ) -> None:
68
        self.project = project
69
        self.transforms = self._init_transforms(transform_classes, args)
70
71
    def _init_transforms(
72
        self,
73
        transform_classes: list[Type[BaseTransform]],
74
        args: list[tuple[list, dict]],
75
    ) -> list[BaseTransform]:
76
        transforms = []
77
        for trans, (posargs, kwargs) in zip(transform_classes, args):
78
            try:
79
                transforms.append(trans(self.project, *posargs, **kwargs))
80
            except (ValueError, TypeError):
81
                raise ConfigurationException(
82
                    f"Invalid arguments to {trans.name} transform: "
83
                    f"{posargs}, {kwargs})",
84
                    project_id=self.project.project_id,
85
                )
86
        return transforms
87
88
    def transform_doc(self, doc: Document) -> Document:
89
        for trans in self.transforms:
90
            doc = trans.transform_doc(doc)
91
        return doc
92
93
    def transform_corpus(self, corpus: DocumentCorpus) -> TransformingDocumentCorpus:
94
        return TransformingDocumentCorpus(corpus, self.transform_doc)
95