Passed
Push — lazy-imports ( f5a695...70018e )
by Juho
16:14 queued 08:48
created

annif.cli_util   A

Complexity

Total Complexity 32

Size/Duplication

Total Lines 195
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 119
dl 0
loc 195
rs 9.84
c 0
b 0
f 0
wmc 32

15 Functions

Rating   Name   Duplication   Size   Complexity  
A _set_project_config_file_path() 0 5 3
A common_options() 0 12 1
A get_vocab() 0 9 2
A backend_param_option() 0 9 1
A docs_limit_option() 0 10 1
A _validate_backend_params() 0 5 2
A generate_filter_params() 0 4 1
A open_documents() 0 26 5
A get_project() 0 8 2
A parse_backend_params() 0 10 2
A open_text_documents() 0 18 4
A project_id() 0 3 1
A show_hits() 0 14 2
A _get_completion_choices() 0 7 3
A complete_param() 0 6 2
1
"""Utility functions for Annif CLI commands"""
2
3
4
import collections
5
import itertools
6
import os
7
import sys
8
9
import click
10
import click_log
11
from flask import current_app
12
13
import annif
14
from annif.exception import ConfigurationException
15
from annif.project import Access
16
17
logger = annif.logger
18
19
20
def _set_project_config_file_path(ctx, param, value):
21
    """Override the default path or the path given in env by CLI option"""
22
    with ctx.obj.load_app().app_context():
23
        if value:
24
            current_app.config["PROJECTS_CONFIG_PATH"] = value
25
26
27
def common_options(f):
28
    """Decorator to add common options for all CLI commands"""
29
    f = click.option(
30
        "-p",
31
        "--projects",
32
        help="Set path to project configuration file or directory",
33
        type=click.Path(dir_okay=True, exists=True),
34
        callback=_set_project_config_file_path,
35
        expose_value=False,
36
        is_eager=True,
37
    )(f)
38
    return click_log.simple_verbosity_option(logger)(f)
39
40
41
def project_id(f):
42
    """Decorator to add a project ID parameter to a CLI command"""
43
    return click.argument("project_id", shell_complete=complete_param)(f)
44
45
46
def backend_param_option(f):
47
    """Decorator to add an option for CLI commands to override BE parameters"""
48
    return click.option(
49
        "--backend-param",
50
        "-b",
51
        multiple=True,
52
        help="Override backend parameter of the config file. "
53
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
54
    )(f)
55
56
57
def docs_limit_option(f):
58
    """Decorator to add an option for CLI commands to limit the number of documents to
59
    use"""
60
    return click.option(
61
        "--docs-limit",
62
        "-d",
63
        default=None,
64
        type=click.IntRange(0, None),
65
        help="Maximum number of documents to use",
66
    )(f)
67
68
69
def get_project(project_id):
70
    """
71
    Helper function to get a project by ID and bail out if it doesn't exist"""
72
    try:
73
        return annif.registry.get_project(project_id, min_access=Access.private)
74
    except ValueError:
75
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
76
        sys.exit(1)
77
78
79
def get_vocab(vocab_id):
80
    """
81
    Helper function to get a vocabulary by ID and bail out if it doesn't
82
    exist"""
83
    try:
84
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
85
    except ValueError:
86
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
87
        sys.exit(1)
88
89
90
def open_documents(paths, subject_index, vocab_lang, docs_limit):
91
    """Helper function to open a document corpus from a list of pathnames,
92
    each of which is either a TSV file or a directory of TXT files. For
93
    directories with subjects in TSV files, the given vocabulary language
94
    will be used to convert subject labels into URIs. The corpus will be
95
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
96
97
    def open_doc_path(path, subject_index):
98
        """open a single path and return it as a DocumentCorpus"""
99
        if os.path.isdir(path):
100
            return annif.corpus.DocumentDirectory(
101
                path, subject_index, vocab_lang, require_subjects=True
102
            )
103
        return annif.corpus.DocumentFile(path, subject_index)
104
105
    if len(paths) == 0:
106
        logger.warning("Reading empty file")
107
        docs = open_doc_path(os.path.devnull, subject_index)
108
    elif len(paths) == 1:
109
        docs = open_doc_path(paths[0], subject_index)
110
    else:
111
        corpora = [open_doc_path(path, subject_index) for path in paths]
112
        docs = annif.corpus.CombinedCorpus(corpora)
113
    if docs_limit is not None:
114
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
115
    return docs
116
117
118
def open_text_documents(paths, docs_limit):
119
    """
120
    Helper function to read text documents from the given file paths. Returns a
121
    DocumentList object with Documents having no subjects. If a path is "-", the
122
    document text is read from standard input. The maximum number of documents to read
123
    is set by docs_limit parameter.
124
    """
125
126
    def _docs(paths):
127
        for path in paths:
128
            if path == "-":
129
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
130
            else:
131
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
132
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
133
            yield doc
134
135
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
136
137
138
def show_hits(hits, project, lang, file=None):
139
    """
140
    Print subject suggestions to the console or a file. The suggestions are displayed as
141
    a table, with one row per hit. Each row contains the URI, label, possible notation,
142
    and score of the suggestion. The label is given in the specified language.
143
    """
144
    for hit in hits:
145
        subj = project.subjects[hit.subject_id]
146
        line = "<{}>\t{}\t{}".format(
147
            subj.uri,
148
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
149
            hit.score,
150
        )
151
        click.echo(line, file=file)
152
153
154
def parse_backend_params(backend_param, project):
155
    """Parse a list of backend parameters given with the --backend-param
156
    option into a nested dict structure"""
157
    backend_params = collections.defaultdict(dict)
158
    for beparam in backend_param:
159
        backend, param = beparam.split(".", 1)
160
        key, val = param.split("=", 1)
161
        _validate_backend_params(backend, beparam, project)
162
        backend_params[backend][key] = val
163
    return backend_params
164
165
166
def _validate_backend_params(backend, beparam, project):
167
    if backend != project.config["backend"]:
168
        raise ConfigurationException(
169
            'The backend {} in CLI option "-b {}" not matching the project'
170
            " backend {}.".format(backend, beparam, project.config["backend"])
171
        )
172
173
174
def generate_filter_params(filter_batch_max_limit):
175
    limits = range(1, filter_batch_max_limit + 1)
176
    thresholds = [i * 0.05 for i in range(20)]
177
    return list(itertools.product(limits, thresholds))
178
179
180
def _get_completion_choices(param):
181
    if param.name == "project_id":
182
        return annif.registry.get_projects()
183
    elif param.name == "vocab_id":
184
        return annif.registry.get_vocabs()
185
    else:
186
        return []
187
188
189
def complete_param(ctx, param, incomplete):
190
    with ctx.obj.load_app().app_context():
191
        return [
192
            choice
193
            for choice in _get_completion_choices(param)
194
            if choice.startswith(incomplete)
195
        ]
196