Passed
Push — issue686-cli-command-list-proj... ( 770cce...271af9 )
by Juho
05:48
created

annif.cli_util   A

Complexity

Total Complexity 31

Size/Duplication

Total Lines 201
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 122
dl 0
loc 201
rs 9.92
c 0
b 0
f 0
wmc 31

14 Functions

Rating   Name   Duplication   Size   Complexity  
A get_vocab() 0 9 2
A _set_project_config_file_path() 0 5 3
A backend_param_option() 0 9 1
A docs_limit_option() 0 10 1
A common_options() 0 12 1
A get_project() 0 8 2
A make_list_template() 0 15 3
A _validate_backend_params() 0 5 2
A render_list() 0 9 2
A generate_filter_params() 0 4 1
A open_documents() 0 26 5
A parse_backend_params() 0 10 2
A open_text_documents() 0 18 4
A show_hits() 0 14 2
1
"""Utility functions for Annif CLI commands"""
2
3
4
import collections
5
import itertools
6
import os
7
import sys
8
9
import click
10
import click_log
11
from flask import current_app
12
13
import annif
14
from annif.exception import ConfigurationException
15
from annif.project import Access
16
17
logger = annif.logger
18
19
20
def _set_project_config_file_path(ctx, param, value):
21
    """Override the default path or the path given in env by CLI option"""
22
    with ctx.obj.load_app().app_context():
23
        if value:
24
            current_app.config["PROJECTS_CONFIG_PATH"] = value
25
26
27
def common_options(f):
28
    """Decorator to add common options for all CLI commands"""
29
    f = click.option(
30
        "-p",
31
        "--projects",
32
        help="Set path to project configuration file or directory",
33
        type=click.Path(dir_okay=True, exists=True),
34
        callback=_set_project_config_file_path,
35
        expose_value=False,
36
        is_eager=True,
37
    )(f)
38
    return click_log.simple_verbosity_option(logger)(f)
39
40
41
def backend_param_option(f):
42
    """Decorator to add an option for CLI commands to override BE parameters"""
43
    return click.option(
44
        "--backend-param",
45
        "-b",
46
        multiple=True,
47
        help="Override backend parameter of the config file. "
48
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
49
    )(f)
50
51
52
def docs_limit_option(f):
53
    """Decorator to add an option for CLI commands to limit the number of documents to
54
    use"""
55
    return click.option(
56
        "--docs-limit",
57
        "-d",
58
        default=None,
59
        type=click.IntRange(0, None),
60
        help="Maximum number of documents to use",
61
    )(f)
62
63
64
def get_project(project_id):
65
    """
66
    Helper function to get a project by ID and bail out if it doesn't exist"""
67
    try:
68
        return annif.registry.get_project(project_id, min_access=Access.private)
69
    except ValueError:
70
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
71
        sys.exit(1)
72
73
74
def get_vocab(vocab_id):
75
    """
76
    Helper function to get a vocabulary by ID and bail out if it doesn't
77
    exist"""
78
    try:
79
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
80
    except ValueError:
81
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
82
        sys.exit(1)
83
84
85
def make_list_template(column_headings, entries):
86
    """Helper function to create a template for header and entries with columns of
87
    variable width."""
88
    max_field_lengths = collections.defaultdict(int)
89
    for items in (column_headings, *entries):
90
        for field_ind, item in enumerate(items):
91
            max_field_lengths[field_ind] = max(max_field_lengths[field_ind], len(item))
92
93
    aux_template = "  ".join(
94
        [
95
            "{{x: <{x}}}".replace("x", str(field_ind))
96
            for field_ind, _ in enumerate(column_headings)
97
        ]
98
    )
99
    return aux_template.format(*max_field_lengths.values())
100
101
102
def render_list(template, column_headings, entries):
103
    """Helper function to render a list of entries as a table with the given template
104
    and column headings. The template is a format string with one placeholder for each
105
    column."""
106
    header = template.format(*column_headings)
107
    click.echo(header)
108
    click.echo("-" * len(header))
109
    for entry in entries:
110
        click.echo(template.format(*entry))
111
112
113
def open_documents(paths, subject_index, vocab_lang, docs_limit):
114
    """Helper function to open a document corpus from a list of pathnames,
115
    each of which is either a TSV file or a directory of TXT files. For
116
    directories with subjects in TSV files, the given vocabulary language
117
    will be used to convert subject labels into URIs. The corpus will be
118
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
119
120
    def open_doc_path(path, subject_index):
121
        """open a single path and return it as a DocumentCorpus"""
122
        if os.path.isdir(path):
123
            return annif.corpus.DocumentDirectory(
124
                path, subject_index, vocab_lang, require_subjects=True
125
            )
126
        return annif.corpus.DocumentFile(path, subject_index)
127
128
    if len(paths) == 0:
129
        logger.warning("Reading empty file")
130
        docs = open_doc_path(os.path.devnull, subject_index)
131
    elif len(paths) == 1:
132
        docs = open_doc_path(paths[0], subject_index)
133
    else:
134
        corpora = [open_doc_path(path, subject_index) for path in paths]
135
        docs = annif.corpus.CombinedCorpus(corpora)
136
    if docs_limit is not None:
137
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
138
    return docs
139
140
141
def open_text_documents(paths, docs_limit):
142
    """
143
    Helper function to read text documents from the given file paths. Returns a
144
    DocumentList object with Documents having no subjects. If a path is "-", the
145
    document text is read from standard input. The maximum number of documents to read
146
    is set by docs_limit parameter.
147
    """
148
149
    def _docs(paths):
150
        for path in paths:
151
            if path == "-":
152
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
153
            else:
154
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
155
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
156
            yield doc
157
158
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
159
160
161
def show_hits(hits, project, lang, file=None):
162
    """
163
    Print subject suggestions to the console or a file. The suggestions are displayed as
164
    a table, with one row per hit. Each row contains the URI, label, possible notation,
165
    and score of the suggestion. The label is given in the specified language.
166
    """
167
    for hit in hits:
168
        subj = project.subjects[hit.subject_id]
169
        line = "<{}>\t{}\t{}".format(
170
            subj.uri,
171
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
172
            hit.score,
173
        )
174
        click.echo(line, file=file)
175
176
177
def parse_backend_params(backend_param, project):
178
    """Parse a list of backend parameters given with the --backend-param
179
    option into a nested dict structure"""
180
    backend_params = collections.defaultdict(dict)
181
    for beparam in backend_param:
182
        backend, param = beparam.split(".", 1)
183
        key, val = param.split("=", 1)
184
        _validate_backend_params(backend, beparam, project)
185
        backend_params[backend][key] = val
186
    return backend_params
187
188
189
def _validate_backend_params(backend, beparam, project):
190
    if backend != project.config["backend"]:
191
        raise ConfigurationException(
192
            'The backend {} in CLI option "-b {}" not matching the project'
193
            " backend {}.".format(backend, beparam, project.config["backend"])
194
        )
195
196
197
def generate_filter_params(filter_batch_max_limit):
198
    limits = range(1, filter_batch_max_limit + 1)
199
    thresholds = [i * 0.05 for i in range(20)]
200
    return list(itertools.product(limits, thresholds))
201