Total Complexity | 9 |
Total Lines | 50 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | """Collection of language-specific analyzers and analyzer registry for Annif""" |
||
2 | |||
3 | import re |
||
4 | from . import simple |
||
5 | from . import snowball |
||
6 | import annif |
||
7 | |||
8 | _analyzers = {} |
||
9 | |||
10 | |||
11 | def register_analyzer(analyzer): |
||
12 | _analyzers[analyzer.name] = analyzer |
||
13 | |||
14 | |||
15 | def get_analyzer(analyzerspec): |
||
16 | match = re.match(r'(\w+)(\((.*)\))?', analyzerspec) |
||
17 | if match is None: |
||
18 | raise ValueError( |
||
19 | "Invalid analyzer specification {}".format(analyzerspec)) |
||
20 | |||
21 | analyzer = match.group(1) |
||
22 | param_string = match.group(3) |
||
23 | kwargs = {} |
||
24 | pos_args = [] |
||
25 | if param_string: |
||
26 | param_strings = param_string.split(',') |
||
27 | for p_string in param_strings: |
||
28 | parts = p_string.split('=') |
||
29 | if len(parts) == 1: |
||
30 | pos_args.append(p_string) |
||
31 | elif len(parts) == 2: |
||
32 | kwargs[parts[0]] = parts[1] |
||
33 | if not pos_args: |
||
34 | pos_args = [None] |
||
35 | try: |
||
36 | return _analyzers[analyzer](*pos_args, **kwargs) |
||
37 | except KeyError: |
||
38 | raise ValueError("No such analyzer {}".format(analyzer)) |
||
39 | |||
40 | |||
41 | register_analyzer(simple.SimpleAnalyzer) |
||
42 | register_analyzer(snowball.SnowballAnalyzer) |
||
43 | |||
44 | # Optional analyzers |
||
45 | try: |
||
46 | from . import voikko |
||
47 | register_analyzer(voikko.VoikkoAnalyzer) |
||
48 | except ImportError: |
||
49 | annif.logger.debug("voikko not available, not enabling voikko analyzer") |
||
50 |