| Total Complexity | 5 |
| Total Lines | 34 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """A transformation that allows selecting Document metadata fields to be |
||
| 2 | used instead of the main text.""" |
||
| 3 | |||
| 4 | from __future__ import annotations |
||
| 5 | |||
| 6 | from typing import TYPE_CHECKING |
||
| 7 | |||
| 8 | from annif.corpus import Document |
||
| 9 | |||
| 10 | from . import transform |
||
| 11 | |||
| 12 | if TYPE_CHECKING: |
||
| 13 | from annif.project import AnnifProject |
||
| 14 | |||
| 15 | |||
| 16 | class SelectTransform(transform.BaseTransform): |
||
| 17 | name = "select" |
||
| 18 | |||
| 19 | def __init__(self, project: AnnifProject | None, *fields: str) -> None: |
||
| 20 | super().__init__(project) |
||
| 21 | self.fields = fields |
||
| 22 | |||
| 23 | def _get_texts(self, doc): |
||
| 24 | for fld in self.fields: |
||
| 25 | if fld == "text": |
||
| 26 | yield doc.text |
||
| 27 | else: |
||
| 28 | yield doc.metadata[fld] |
||
| 29 | |||
| 30 | def transform_doc(self, doc: Document) -> Document: |
||
| 31 | new_text = "\n".join(self._get_texts(doc)) |
||
| 32 | return Document( |
||
| 33 | text=new_text, subject_set=doc.subject_set, metadata=doc.metadata |
||
| 34 | ) |
||
| 35 |