1
|
|
|
"""Functionality for obtaining text transformation from string specification""" |
2
|
|
|
|
3
|
|
|
import re |
4
|
|
|
from . import transform |
5
|
|
|
from . import inputlimiter |
6
|
|
|
from annif.util import parse_args |
7
|
|
|
from annif.exception import ConfigurationException |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
def parse_specs(transform_specs): |
11
|
|
|
"""Parse a transformation specification into a list of tuples, e.g. |
12
|
|
|
'transf_1(x),transf_2(y=42),transf_3' is parsed to |
13
|
|
|
[(transf_1, [x], {}), (transf_2, [], {y: 42}), (transf_3, [], {})].""" |
14
|
|
|
|
15
|
|
|
parsed = [] |
16
|
|
|
# Split by commas not inside parentheses |
17
|
|
|
parts = re.split(r',\s*(?![^()]*\))', transform_specs) |
18
|
|
|
for part in parts: |
19
|
|
|
match = re.match(r'(\w+)(\((.*)\))?', part) |
20
|
|
|
if match is None: |
21
|
|
|
continue |
22
|
|
|
transform = match.group(1) |
23
|
|
|
posargs, kwargs = parse_args(match.group(3)) |
24
|
|
|
parsed.append((transform, posargs, kwargs)) |
25
|
|
|
return parsed |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
def get_transform(transform_specs, project): |
29
|
|
|
transform_defs = parse_specs(transform_specs) |
30
|
|
|
transform_classes = [] |
31
|
|
|
args = [] |
32
|
|
|
for trans, posargs, kwargs in transform_defs: |
33
|
|
|
if trans not in _transforms: |
34
|
|
|
raise ConfigurationException(f"No such transform {trans}") |
35
|
|
|
transform_classes.append(_transforms[trans]) |
36
|
|
|
args.append((posargs, kwargs)) |
37
|
|
|
return transform.TransformChain(transform_classes, args, project) |
38
|
|
|
|
39
|
|
|
|
40
|
|
|
_transforms = { |
41
|
|
|
transform.IdentityTransform.name: transform.IdentityTransform, |
42
|
|
|
inputlimiter.InputLimiter.name: inputlimiter.InputLimiter} |
43
|
|
|
|