annif.transform   A
last analyzed

Complexity

Total Complexity 6

Size/Duplication

Total Lines 64
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 43
dl 0
loc 64
rs 10
c 0
b 0
f 0
wmc 6

2 Functions

Rating   Name   Duplication   Size   Complexity  
A get_transform() 0 10 3
A parse_specs() 0 18 3
1
"""Functionality for obtaining text transformation from string specification"""
2
3
from __future__ import annotations
4
5
import re
6
from typing import TYPE_CHECKING
7
8
import annif
9
from annif.exception import ConfigurationException
10
from annif.util import parse_args
11
12
from . import inputlimiter, select, transform
13
14
if TYPE_CHECKING:
15
    from annif.project import AnnifProject
16
    from annif.transform.transform import TransformChain
17
18
19
def parse_specs(
20
    transform_specs: str,
21
) -> list[tuple[str, list, dict]]:
22
    """Parse a transformation specification into a list of tuples, e.g.
23
    'transf_1(x),transf_2(y=42),transf_3' is parsed to
24
    [(transf_1, [x], {}), (transf_2, [], {y: 42}), (transf_3, [], {})]."""
25
26
    parsed = []
27
    # Split by commas not inside parentheses
28
    parts = re.split(r",\s*(?![^()]*\))", transform_specs)
29
    for part in parts:
30
        match = re.match(r"(\w+)(\((.*)\))?", part)
31
        if match is None:
32
            continue
33
        transform = match.group(1)
34
        posargs, kwargs = parse_args(match.group(3))
35
        parsed.append((transform, posargs, kwargs))
36
    return parsed
37
38
39
def get_transform(transform_specs: str, project: AnnifProject | None) -> TransformChain:
40
    transform_defs = parse_specs(transform_specs)
41
    transform_classes = []
42
    args = []
43
    for trans, posargs, kwargs in transform_defs:
44
        if trans not in _transforms:
45
            raise ConfigurationException(f"No such transform {trans}")
46
        transform_classes.append(_transforms[trans])
47
        args.append((posargs, kwargs))
48
    return transform.TransformChain(transform_classes, args, project)
49
50
51
_transforms = {
52
    transform.IdentityTransform.name: transform.IdentityTransform,
53
    inputlimiter.InputLimiter.name: inputlimiter.InputLimiter,
54
    select.SelectTransform.name: select.SelectTransform,
55
}
56
57
# Optional transforms
58
try:
59
    from . import langfilter
60
61
    _transforms.update({langfilter.LangFilter.name: langfilter.LangFilter})
62
except ImportError:
63
    annif.logger.debug("pycld3 not available, not enabling filter_language transform")
64