| Total Complexity | 10 |
| Total Lines | 55 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Collection of language-specific analyzers and analyzer registry for Annif""" |
||
| 2 | |||
| 3 | import re |
||
| 4 | from . import simple |
||
| 5 | from . import snowball |
||
| 6 | import annif |
||
| 7 | |||
| 8 | _analyzers = {} |
||
| 9 | |||
| 10 | |||
| 11 | def register_analyzer(analyzer): |
||
| 12 | _analyzers[analyzer.name] = analyzer |
||
| 13 | |||
| 14 | |||
| 15 | def _parse_analyzer_args(param_string): |
||
| 16 | if not param_string: |
||
| 17 | return [None], {} |
||
| 18 | kwargs = {} |
||
| 19 | pos_args = [] |
||
| 20 | param_strings = param_string.split(',') |
||
| 21 | for p_string in param_strings: |
||
| 22 | parts = p_string.split('=') |
||
| 23 | if len(parts) == 1: |
||
| 24 | pos_args.append(p_string) |
||
| 25 | elif len(parts) == 2: |
||
| 26 | kwargs[parts[0]] = parts[1] |
||
| 27 | if not pos_args: |
||
| 28 | pos_args = [None] |
||
| 29 | return pos_args, kwargs |
||
| 30 | |||
| 31 | |||
| 32 | def get_analyzer(analyzerspec): |
||
| 33 | match = re.match(r'(\w+)(\((.*)\))?', analyzerspec) |
||
| 34 | if match is None: |
||
| 35 | raise ValueError( |
||
| 36 | "Invalid analyzer specification {}".format(analyzerspec)) |
||
| 37 | |||
| 38 | analyzer = match.group(1) |
||
| 39 | pos_args, kwargs = _parse_analyzer_args(match.group(3)) |
||
| 40 | try: |
||
| 41 | return _analyzers[analyzer](*pos_args, **kwargs) |
||
| 42 | except KeyError: |
||
| 43 | raise ValueError("No such analyzer {}".format(analyzer)) |
||
| 44 | |||
| 45 | |||
| 46 | register_analyzer(simple.SimpleAnalyzer) |
||
| 47 | register_analyzer(snowball.SnowballAnalyzer) |
||
| 48 | |||
| 49 | # Optional analyzers |
||
| 50 | try: |
||
| 51 | from . import voikko |
||
| 52 | register_analyzer(voikko.VoikkoAnalyzer) |
||
| 53 | except ImportError: |
||
| 54 | annif.logger.debug("voikko not available, not enabling voikko analyzer") |
||
| 55 |