Total Complexity | 5 |
Total Lines | 34 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | """A transformation that allows selecting Document metadata fields to be |
||
2 | used instead of the main text.""" |
||
3 | |||
4 | from __future__ import annotations |
||
5 | |||
6 | from typing import TYPE_CHECKING |
||
7 | |||
8 | from annif.corpus import Document |
||
9 | |||
10 | from . import transform |
||
11 | |||
12 | if TYPE_CHECKING: |
||
13 | from annif.project import AnnifProject |
||
14 | |||
15 | |||
16 | class SelectTransform(transform.BaseTransform): |
||
17 | name = "select" |
||
18 | |||
19 | def __init__(self, project: AnnifProject | None, *fields: str) -> None: |
||
20 | super().__init__(project) |
||
21 | self.fields = fields |
||
22 | |||
23 | def _get_texts(self, doc): |
||
24 | for fld in self.fields: |
||
25 | if fld == "text": |
||
26 | yield doc.text |
||
27 | else: |
||
28 | yield doc.metadata.get(fld, "") |
||
29 | |||
30 | def transform_doc(self, doc: Document) -> Document: |
||
31 | new_text = "\n".join(self._get_texts(doc)) |
||
32 | return Document( |
||
33 | text=new_text, subject_set=doc.subject_set, metadata=doc.metadata |
||
34 | ) |
||
35 |