Passed
Push — issue686-cli-command-list-proj... ( 271af9...54b5e8 )
by Juho
02:42
created

annif.cli_util.render_list()   A

Complexity

Conditions 2

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 3
dl 0
loc 9
rs 10
c 0
b 0
f 0
1
"""Utility functions for Annif CLI commands"""
2
3
4
import collections
5
import itertools
6
import os
7
import sys
8
9
import click
10
import click_log
11
from flask import current_app
12
13
import annif
14
from annif.exception import ConfigurationException
15
from annif.project import Access
16
17
logger = annif.logger
18
19
20
def _set_project_config_file_path(ctx, param, value):
21
    """Override the default path or the path given in env by CLI option"""
22
    with ctx.obj.load_app().app_context():
23
        if value:
24
            current_app.config["PROJECTS_CONFIG_PATH"] = value
25
26
27
def common_options(f):
28
    """Decorator to add common options for all CLI commands"""
29
    f = click.option(
30
        "-p",
31
        "--projects",
32
        help="Set path to project configuration file or directory",
33
        type=click.Path(dir_okay=True, exists=True),
34
        callback=_set_project_config_file_path,
35
        expose_value=False,
36
        is_eager=True,
37
    )(f)
38
    return click_log.simple_verbosity_option(logger)(f)
39
40
41
def backend_param_option(f):
42
    """Decorator to add an option for CLI commands to override BE parameters"""
43
    return click.option(
44
        "--backend-param",
45
        "-b",
46
        multiple=True,
47
        help="Override backend parameter of the config file. "
48
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
49
    )(f)
50
51
52
def docs_limit_option(f):
53
    """Decorator to add an option for CLI commands to limit the number of documents to
54
    use"""
55
    return click.option(
56
        "--docs-limit",
57
        "-d",
58
        default=None,
59
        type=click.IntRange(0, None),
60
        help="Maximum number of documents to use",
61
    )(f)
62
63
64
def get_project(project_id):
65
    """
66
    Helper function to get a project by ID and bail out if it doesn't exist"""
67
    try:
68
        return annif.registry.get_project(project_id, min_access=Access.private)
69
    except ValueError:
70
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
71
        sys.exit(1)
72
73
74
def get_vocab(vocab_id):
75
    """
76
    Helper function to get a vocabulary by ID and bail out if it doesn't
77
    exist"""
78
    try:
79
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
80
    except ValueError:
81
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
82
        sys.exit(1)
83
84
85
def make_list_template(*rows):
86
    """Helper function to create a template for a list of entries with fields of
87
    variable width. The width of each field is determined by the longest item in the
88
    field in the given rows."""
89
90
    max_field_widths = collections.defaultdict(int)
91
    for row in rows:
92
        for field_ind, item in enumerate(row):
93
            max_field_widths[field_ind] = max(max_field_widths[field_ind], len(item))
94
95
    return "  ".join(
96
        [
97
            f"{{{field_ind}: <{field_width}}}"
98
            for field_ind, field_width in max_field_widths.items()
99
        ]
100
    )
101
102
103
def open_documents(paths, subject_index, vocab_lang, docs_limit):
104
    """Helper function to open a document corpus from a list of pathnames,
105
    each of which is either a TSV file or a directory of TXT files. For
106
    directories with subjects in TSV files, the given vocabulary language
107
    will be used to convert subject labels into URIs. The corpus will be
108
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
109
110
    def open_doc_path(path, subject_index):
111
        """open a single path and return it as a DocumentCorpus"""
112
        if os.path.isdir(path):
113
            return annif.corpus.DocumentDirectory(
114
                path, subject_index, vocab_lang, require_subjects=True
115
            )
116
        return annif.corpus.DocumentFile(path, subject_index)
117
118
    if len(paths) == 0:
119
        logger.warning("Reading empty file")
120
        docs = open_doc_path(os.path.devnull, subject_index)
121
    elif len(paths) == 1:
122
        docs = open_doc_path(paths[0], subject_index)
123
    else:
124
        corpora = [open_doc_path(path, subject_index) for path in paths]
125
        docs = annif.corpus.CombinedCorpus(corpora)
126
    if docs_limit is not None:
127
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
128
    return docs
129
130
131
def open_text_documents(paths, docs_limit):
132
    """
133
    Helper function to read text documents from the given file paths. Returns a
134
    DocumentList object with Documents having no subjects. If a path is "-", the
135
    document text is read from standard input. The maximum number of documents to read
136
    is set by docs_limit parameter.
137
    """
138
139
    def _docs(paths):
140
        for path in paths:
141
            if path == "-":
142
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
143
            else:
144
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
145
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
146
            yield doc
147
148
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
149
150
151
def show_hits(hits, project, lang, file=None):
152
    """
153
    Print subject suggestions to the console or a file. The suggestions are displayed as
154
    a table, with one row per hit. Each row contains the URI, label, possible notation,
155
    and score of the suggestion. The label is given in the specified language.
156
    """
157
    for hit in hits:
158
        subj = project.subjects[hit.subject_id]
159
        line = "<{}>\t{}\t{}".format(
160
            subj.uri,
161
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
162
            hit.score,
163
        )
164
        click.echo(line, file=file)
165
166
167
def parse_backend_params(backend_param, project):
168
    """Parse a list of backend parameters given with the --backend-param
169
    option into a nested dict structure"""
170
    backend_params = collections.defaultdict(dict)
171
    for beparam in backend_param:
172
        backend, param = beparam.split(".", 1)
173
        key, val = param.split("=", 1)
174
        _validate_backend_params(backend, beparam, project)
175
        backend_params[backend][key] = val
176
    return backend_params
177
178
179
def _validate_backend_params(backend, beparam, project):
180
    if backend != project.config["backend"]:
181
        raise ConfigurationException(
182
            'The backend {} in CLI option "-b {}" not matching the project'
183
            " backend {}.".format(backend, beparam, project.config["backend"])
184
        )
185
186
187
def generate_filter_params(filter_batch_max_limit):
188
    limits = range(1, filter_batch_max_limit + 1)
189
    thresholds = [i * 0.05 for i in range(20)]
190
    return list(itertools.product(limits, thresholds))
191