Passed
Pull Request — main (#694)
by Juho
05:15 queued 02:36
created

annif.cli_util._validate_backend_params()   A

Complexity

Conditions 2

Size

Total Lines 5
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nop 3
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
"""Utility functions for Annif CLI commands"""
2
3
4
import collections
5
import itertools
6
import os
7
import sys
8
9
import click
10
import click_log
11
from flask import current_app
12
13
import annif
14
from annif.exception import ConfigurationException
15
from annif.project import Access
16
17
logger = annif.logger
18
19
20
def _set_project_config_file_path(ctx, param, value):
21
    """Override the default path or the path given in env by CLI option"""
22
    with ctx.obj.load_app().app_context():
23
        if value:
24
            current_app.config["PROJECTS_CONFIG_PATH"] = value
25
26
27
def common_options(f):
28
    """Decorator to add common options for all CLI commands"""
29
    f = click.option(
30
        "-p",
31
        "--projects",
32
        help="Set path to project configuration file or directory",
33
        type=click.Path(dir_okay=True, exists=True),
34
        callback=_set_project_config_file_path,
35
        expose_value=False,
36
        is_eager=True,
37
    )(f)
38
    return click_log.simple_verbosity_option(logger)(f)
39
40
41
def backend_param_option(f):
42
    """Decorator to add an option for CLI commands to override BE parameters"""
43
    return click.option(
44
        "--backend-param",
45
        "-b",
46
        multiple=True,
47
        help="Override backend parameter of the config file. "
48
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
49
    )(f)
50
51
52
def docs_limit_option(f):
53
    """Decorator to add an option for CLI commands to limit the number of documents to
54
    use"""
55
    return click.option(
56
        "--docs-limit",
57
        "-d",
58
        default=None,
59
        type=click.IntRange(0, None),
60
        help="Maximum number of documents to use",
61
    )(f)
62
63
64
def get_project(project_id):
65
    """
66
    Helper function to get a project by ID and bail out if it doesn't exist"""
67
    try:
68
        return annif.registry.get_project(project_id, min_access=Access.private)
69
    except ValueError:
70
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
71
        sys.exit(1)
72
73
74
def get_vocab(vocab_id):
75
    """
76
    Helper function to get a vocabulary by ID and bail out if it doesn't
77
    exist"""
78
    try:
79
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
80
    except ValueError:
81
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
82
        sys.exit(1)
83
84
85
def make_list_template(*rows):
86
    """Helper function to create a template for a list of entries with fields of
87
    variable width. The width of each field is determined by the longest item in the
88
    field in the given rows."""
89
90
    max_field_widths = collections.defaultdict(int)
91
    for row in rows:
92
        for field_ind, item in enumerate(row):
93
            max_field_widths[field_ind] = max(max_field_widths[field_ind], len(item))
94
95
    return "  ".join(
96
        [
97
            f"{{{field_ind}: <{field_width}}}"
98
            for field_ind, field_width in max_field_widths.items()
99
        ]
100
    )
101
102
103
def format_datetime(dt):
104
    """Helper function to format a datetime object as a string in the local time."""
105
    if dt is None:
106
        return "-"
107
    return dt.astimezone().strftime("%Y-%m-%d %H:%M:%S")
108
109
110
def open_documents(paths, subject_index, vocab_lang, docs_limit):
111
    """Helper function to open a document corpus from a list of pathnames,
112
    each of which is either a TSV file or a directory of TXT files. For
113
    directories with subjects in TSV files, the given vocabulary language
114
    will be used to convert subject labels into URIs. The corpus will be
115
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
116
117
    def open_doc_path(path, subject_index):
118
        """open a single path and return it as a DocumentCorpus"""
119
        if os.path.isdir(path):
120
            return annif.corpus.DocumentDirectory(
121
                path, subject_index, vocab_lang, require_subjects=True
122
            )
123
        return annif.corpus.DocumentFile(path, subject_index)
124
125
    if len(paths) == 0:
126
        logger.warning("Reading empty file")
127
        docs = open_doc_path(os.path.devnull, subject_index)
128
    elif len(paths) == 1:
129
        docs = open_doc_path(paths[0], subject_index)
130
    else:
131
        corpora = [open_doc_path(path, subject_index) for path in paths]
132
        docs = annif.corpus.CombinedCorpus(corpora)
133
    if docs_limit is not None:
134
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
135
    return docs
136
137
138
def open_text_documents(paths, docs_limit):
139
    """
140
    Helper function to read text documents from the given file paths. Returns a
141
    DocumentList object with Documents having no subjects. If a path is "-", the
142
    document text is read from standard input. The maximum number of documents to read
143
    is set by docs_limit parameter.
144
    """
145
146
    def _docs(paths):
147
        for path in paths:
148
            if path == "-":
149
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
150
            else:
151
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
152
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
153
            yield doc
154
155
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
156
157
158
def show_hits(hits, project, lang, file=None):
159
    """
160
    Print subject suggestions to the console or a file. The suggestions are displayed as
161
    a table, with one row per hit. Each row contains the URI, label, possible notation,
162
    and score of the suggestion. The label is given in the specified language.
163
    """
164
    for hit in hits:
165
        subj = project.subjects[hit.subject_id]
166
        line = "<{}>\t{}\t{}".format(
167
            subj.uri,
168
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
169
            hit.score,
170
        )
171
        click.echo(line, file=file)
172
173
174
def parse_backend_params(backend_param, project):
175
    """Parse a list of backend parameters given with the --backend-param
176
    option into a nested dict structure"""
177
    backend_params = collections.defaultdict(dict)
178
    for beparam in backend_param:
179
        backend, param = beparam.split(".", 1)
180
        key, val = param.split("=", 1)
181
        _validate_backend_params(backend, beparam, project)
182
        backend_params[backend][key] = val
183
    return backend_params
184
185
186
def _validate_backend_params(backend, beparam, project):
187
    if backend != project.config["backend"]:
188
        raise ConfigurationException(
189
            'The backend {} in CLI option "-b {}" not matching the project'
190
            " backend {}.".format(backend, beparam, project.config["backend"])
191
        )
192
193
194
def generate_filter_params(filter_batch_max_limit):
195
    limits = range(1, filter_batch_max_limit + 1)
196
    thresholds = [i * 0.05 for i in range(20)]
197
    return list(itertools.product(limits, thresholds))
198