1
|
|
|
"""Functionality for obtaining text transformation from string specification""" |
2
|
|
|
|
3
|
|
|
from __future__ import annotations |
4
|
|
|
|
5
|
|
|
import re |
6
|
|
|
from typing import TYPE_CHECKING |
7
|
|
|
|
8
|
|
|
import annif |
9
|
|
|
from annif.exception import ConfigurationException |
10
|
|
|
from annif.util import parse_args |
11
|
|
|
|
12
|
|
|
from . import inputlimiter, select, transform |
13
|
|
|
|
14
|
|
|
if TYPE_CHECKING: |
15
|
|
|
from annif.project import AnnifProject |
16
|
|
|
from annif.transform.transform import TransformChain |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
def parse_specs( |
20
|
|
|
transform_specs: str, |
21
|
|
|
) -> list[tuple[str, list, dict]]: |
22
|
|
|
"""Parse a transformation specification into a list of tuples, e.g. |
23
|
|
|
'transf_1(x),transf_2(y=42),transf_3' is parsed to |
24
|
|
|
[(transf_1, [x], {}), (transf_2, [], {y: 42}), (transf_3, [], {})].""" |
25
|
|
|
|
26
|
|
|
parsed = [] |
27
|
|
|
# Split by commas not inside parentheses |
28
|
|
|
parts = re.split(r",\s*(?![^()]*\))", transform_specs) |
29
|
|
|
for part in parts: |
30
|
|
|
match = re.match(r"(\w+)(\((.*)\))?", part) |
31
|
|
|
if match is None: |
32
|
|
|
continue |
33
|
|
|
transform = match.group(1) |
34
|
|
|
posargs, kwargs = parse_args(match.group(3)) |
35
|
|
|
parsed.append((transform, posargs, kwargs)) |
36
|
|
|
return parsed |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def get_transform(transform_specs: str, project: AnnifProject | None) -> TransformChain: |
40
|
|
|
transform_defs = parse_specs(transform_specs) |
41
|
|
|
transform_classes = [] |
42
|
|
|
args = [] |
43
|
|
|
for trans, posargs, kwargs in transform_defs: |
44
|
|
|
if trans not in _transforms: |
45
|
|
|
raise ConfigurationException(f"No such transform {trans}") |
46
|
|
|
transform_classes.append(_transforms[trans]) |
47
|
|
|
args.append((posargs, kwargs)) |
48
|
|
|
return transform.TransformChain(transform_classes, args, project) |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
_transforms = { |
52
|
|
|
transform.IdentityTransform.name: transform.IdentityTransform, |
53
|
|
|
inputlimiter.InputLimiter.name: inputlimiter.InputLimiter, |
54
|
|
|
select.SelectTransform.name: select.SelectTransform, |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
# Optional transforms |
58
|
|
|
try: |
59
|
|
|
from . import langfilter |
60
|
|
|
|
61
|
|
|
_transforms.update({langfilter.LangFilter.name: langfilter.LangFilter}) |
62
|
|
|
except ImportError: |
63
|
|
|
annif.logger.debug("pycld3 not available, not enabling filter_language transform") |
64
|
|
|
|