1
|
|
|
# TODO Add docstring |
2
|
|
|
import re |
3
|
|
|
from . import transformer |
4
|
|
|
from . import inputlimiter |
5
|
|
|
from annif.exception import ConfigurationException |
6
|
|
|
|
7
|
|
|
|
8
|
|
View Code Duplication |
def _parse_transformer_args(param_string): |
|
|
|
|
9
|
|
|
if not param_string: |
10
|
|
|
return [], {} |
11
|
|
|
kwargs = {} |
12
|
|
|
posargs = [] |
13
|
|
|
param_strings = param_string.split(',') |
14
|
|
|
for p_string in param_strings: |
15
|
|
|
parts = p_string.split('=') |
16
|
|
|
if len(parts) == 1: |
17
|
|
|
posargs.append(p_string) |
18
|
|
|
elif len(parts) == 2: |
19
|
|
|
kwargs[parts[0]] = parts[1] |
20
|
|
|
return posargs, kwargs |
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def parse_specs(transformers_spec): |
24
|
|
|
"""parse a configuration definition such as 'A(x),B(y=1),C' into a tuples |
25
|
|
|
of ((A, [x], {}), (B, [None], {y: 1}))...""" # TODO |
26
|
|
|
parsed = [] |
27
|
|
|
# Split by commas not inside parentheses |
28
|
|
|
parts = re.split(r',\s*(?![^()]*\))', transformers_spec) |
29
|
|
|
for part in parts: |
30
|
|
|
match = re.match(r'(\w+)(\((.*)\))?', part) |
31
|
|
|
if match is None: |
32
|
|
|
continue |
33
|
|
|
transformer = match.group(1) |
34
|
|
|
posargs, kwargs = _parse_transformer_args(match.group(3)) |
35
|
|
|
parsed.append((transformer, posargs, kwargs)) |
36
|
|
|
return parsed |
37
|
|
|
|
38
|
|
|
|
39
|
|
|
def get_transformer(transformer_specs, project): |
40
|
|
|
transformer_defs = parse_specs(transformer_specs) |
41
|
|
|
transformer_classes = [] |
42
|
|
|
args = [] |
43
|
|
|
for trans, posargs, kwargs in transformer_defs: |
44
|
|
|
if trans not in _transformers: |
45
|
|
|
raise ConfigurationException(f"No such transformer {trans}") |
46
|
|
|
transformer_classes.append(_transformers[trans]) |
47
|
|
|
args.append((posargs, kwargs)) |
48
|
|
|
return transformer.Transformer(transformer_classes, args, project) |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
_transformers = { |
52
|
|
|
'pass': transformer.IdentityTransformer, |
53
|
|
|
'limit_input': inputlimiter.InputLimiter} |
54
|
|
|
|