Passed
Pull Request — master (#675)
by Juho
03:07
created

annif.cli_util._validate_backend_params()   A

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nop 3
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
import collections
2
import os
3
import sys
4
5
import click
6
import click_log
7
from flask import current_app
8
from flask.cli import ScriptInfo
9
10
import annif
11
from annif.exception import ConfigurationException
12
from annif.project import Access
13
from annif.suggestion import SuggestionFilter
14
15
logger = annif.logger
16
17
18
def _set_project_config_file_path(ctx, param, value):
19
    """Override the default path or the path given in env by CLI option"""
20
    with ctx.ensure_object(ScriptInfo).load_app().app_context():
21
        if value:
22
            current_app.config["PROJECTS_CONFIG_PATH"] = value
23
24
25
def common_options(f):
26
    """Decorator to add common options for all CLI commands"""
27
    f = click.option(
28
        "-p",
29
        "--projects",
30
        help="Set path to project configuration file or directory",
31
        type=click.Path(dir_okay=True, exists=True),
32
        callback=_set_project_config_file_path,
33
        expose_value=False,
34
        is_eager=True,
35
    )(f)
36
    return click_log.simple_verbosity_option(logger)(f)
37
38
39
def backend_param_option(f):
40
    """Decorator to add an option for CLI commands to override BE parameters"""
41
    return click.option(
42
        "--backend-param",
43
        "-b",
44
        multiple=True,
45
        help="Override backend parameter of the config file. "
46
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
47
    )(f)
48
49
50
def docs_limit_option(f):
51
    """Decorator to add an option for CLI commands to limit the number of documents to
52
    use"""
53
    return click.option(
54
        "--docs-limit",
55
        "-d",
56
        default=None,
57
        type=click.IntRange(0, None),
58
        help="Maximum number of documents to use",
59
    )(f)
60
61
62
def get_project(project_id):
63
    """
64
    Helper function to get a project by ID and bail out if it doesn't exist"""
65
    try:
66
        return annif.registry.get_project(project_id, min_access=Access.private)
67
    except ValueError:
68
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
69
        sys.exit(1)
70
71
72
def get_vocab(vocab_id):
73
    """
74
    Helper function to get a vocabulary by ID and bail out if it doesn't
75
    exist"""
76
    try:
77
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
78
    except ValueError:
79
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
80
        sys.exit(1)
81
82
83
def open_documents(paths, subject_index, vocab_lang, docs_limit):
84
    """Helper function to open a document corpus from a list of pathnames,
85
    each of which is either a TSV file or a directory of TXT files. For
86
    directories with subjects in TSV files, the given vocabulary language
87
    will be used to convert subject labels into URIs. The corpus will be
88
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
89
90
    def open_doc_path(path, subject_index):
91
        """open a single path and return it as a DocumentCorpus"""
92
        if os.path.isdir(path):
93
            return annif.corpus.DocumentDirectory(
94
                path, subject_index, vocab_lang, require_subjects=True
95
            )
96
        return annif.corpus.DocumentFile(path, subject_index)
97
98
    if len(paths) == 0:
99
        logger.warning("Reading empty file")
100
        docs = open_doc_path(os.path.devnull, subject_index)
101
    elif len(paths) == 1:
102
        docs = open_doc_path(paths[0], subject_index)
103
    else:
104
        corpora = [open_doc_path(path, subject_index) for path in paths]
105
        docs = annif.corpus.CombinedCorpus(corpora)
106
    if docs_limit is not None:
107
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
108
    return docs
109
110
111
def open_text_documents(paths, docs_limit):
112
    """
113
    Helper function to read text documents from the given file paths. Returns a
114
    DocumentList object with Documents having no subjects. If a path is "-", the
115
    document text is read from standard input. The maximum number of documents to read
116
    is set by docs_limit parameter.
117
    """
118
119
    def _docs(paths):
120
        for path in paths:
121
            if path == "-":
122
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
123
            else:
124
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
125
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
126
            yield doc
127
128
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
129
130
131
def show_hits(hits, project, lang, file=None):
132
    """
133
    Print subject suggestions to the console or a file. The suggestions are displayed as
134
    a table, with one row per hit. Each row contains the URI, label, possible notation,
135
    and score of the suggestion. The label is given in the specified language.
136
    """
137
    for hit in hits.as_list():
138
        subj = project.subjects[hit.subject_id]
139
        line = "<{}>\t{}\t{}".format(
140
            subj.uri,
141
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
142
            hit.score,
143
        )
144
        click.echo(line, file=file)
145
146
147
def parse_backend_params(backend_param, project):
148
    """Parse a list of backend parameters given with the --backend-param
149
    option into a nested dict structure"""
150
    backend_params = collections.defaultdict(dict)
151
    for beparam in backend_param:
152
        backend, param = beparam.split(".", 1)
153
        key, val = param.split("=", 1)
154
        _validate_backend_params(backend, beparam, project)
155
        backend_params[backend][key] = val
156
    return backend_params
157
158
159
def _validate_backend_params(backend, beparam, project):
160
    if backend != project.config["backend"]:
161
        raise ConfigurationException(
162
            'The backend {} in CLI option "-b {}" not matching the project'
163
            " backend {}.".format(backend, beparam, project.config["backend"])
164
        )
165
166
167
def generate_filter_batches(subjects, filter_batch_max_limit):
168
    import annif.eval
169
170
    filter_batches = {}
171
    for limit in range(1, filter_batch_max_limit + 1):
172
        for threshold in [i * 0.05 for i in range(20)]:
173
            hit_filter = SuggestionFilter(subjects, limit, threshold)
174
            batch = annif.eval.EvaluationBatch(subjects)
175
            filter_batches[(limit, threshold)] = (hit_filter, batch)
176
    return filter_batches
177