Passed
Push — issue684-cli-command-completio... ( 1d41b3...8e38a8 )
by Juho
03:07
created

annif.cli_util.open_documents()   A

Complexity

Conditions 5

Size

Total Lines 26
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 16
nop 4
dl 0
loc 26
rs 9.1333
c 0
b 0
f 0
1
"""Utility functions for Annif CLI commands"""
2
3
4
import collections
5
import os
6
import sys
7
8
import click
9
import click_log
10
from flask import current_app
11
12
import annif
13
from annif.exception import ConfigurationException
14
from annif.project import Access
15
from annif.suggestion import SuggestionFilter
16
17
logger = annif.logger
18
19
20
def _set_project_config_file_path(ctx, param, value):
21
    """Override the default path or the path given in env by CLI option"""
22
    with ctx.obj.load_app().app_context():
23
        if value:
24
            current_app.config["PROJECTS_CONFIG_PATH"] = value
25
26
27
def common_options(f):
28
    """Decorator to add common options for all CLI commands"""
29
    f = click.option(
30
        "-p",
31
        "--projects",
32
        help="Set path to project configuration file or directory",
33
        type=click.Path(dir_okay=True, exists=True),
34
        callback=_set_project_config_file_path,
35
        expose_value=False,
36
        is_eager=True,
37
    )(f)
38
    return click_log.simple_verbosity_option(logger)(f)
39
40
41
def backend_param_option(f):
42
    """Decorator to add an option for CLI commands to override BE parameters"""
43
    return click.option(
44
        "--backend-param",
45
        "-b",
46
        multiple=True,
47
        help="Override backend parameter of the config file. "
48
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
49
    )(f)
50
51
52
def docs_limit_option(f):
53
    """Decorator to add an option for CLI commands to limit the number of documents to
54
    use"""
55
    return click.option(
56
        "--docs-limit",
57
        "-d",
58
        default=None,
59
        type=click.IntRange(0, None),
60
        help="Maximum number of documents to use",
61
    )(f)
62
63
64
def get_project(project_id):
65
    """
66
    Helper function to get a project by ID and bail out if it doesn't exist"""
67
    try:
68
        return annif.registry.get_project(project_id, min_access=Access.private)
69
    except ValueError:
70
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
71
        sys.exit(1)
72
73
74
def get_vocab(vocab_id):
75
    """
76
    Helper function to get a vocabulary by ID and bail out if it doesn't
77
    exist"""
78
    try:
79
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
80
    except ValueError:
81
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
82
        sys.exit(1)
83
84
85
def open_documents(paths, subject_index, vocab_lang, docs_limit):
86
    """Helper function to open a document corpus from a list of pathnames,
87
    each of which is either a TSV file or a directory of TXT files. For
88
    directories with subjects in TSV files, the given vocabulary language
89
    will be used to convert subject labels into URIs. The corpus will be
90
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
91
92
    def open_doc_path(path, subject_index):
93
        """open a single path and return it as a DocumentCorpus"""
94
        if os.path.isdir(path):
95
            return annif.corpus.DocumentDirectory(
96
                path, subject_index, vocab_lang, require_subjects=True
97
            )
98
        return annif.corpus.DocumentFile(path, subject_index)
99
100
    if len(paths) == 0:
101
        logger.warning("Reading empty file")
102
        docs = open_doc_path(os.path.devnull, subject_index)
103
    elif len(paths) == 1:
104
        docs = open_doc_path(paths[0], subject_index)
105
    else:
106
        corpora = [open_doc_path(path, subject_index) for path in paths]
107
        docs = annif.corpus.CombinedCorpus(corpora)
108
    if docs_limit is not None:
109
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
110
    return docs
111
112
113
def open_text_documents(paths, docs_limit):
114
    """
115
    Helper function to read text documents from the given file paths. Returns a
116
    DocumentList object with Documents having no subjects. If a path is "-", the
117
    document text is read from standard input. The maximum number of documents to read
118
    is set by docs_limit parameter.
119
    """
120
121
    def _docs(paths):
122
        for path in paths:
123
            if path == "-":
124
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
125
            else:
126
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
127
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
128
            yield doc
129
130
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
131
132
133
def show_hits(hits, project, lang, file=None):
134
    """
135
    Print subject suggestions to the console or a file. The suggestions are displayed as
136
    a table, with one row per hit. Each row contains the URI, label, possible notation,
137
    and score of the suggestion. The label is given in the specified language.
138
    """
139
    for hit in hits.as_list():
140
        subj = project.subjects[hit.subject_id]
141
        line = "<{}>\t{}\t{}".format(
142
            subj.uri,
143
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
144
            hit.score,
145
        )
146
        click.echo(line, file=file)
147
148
149
def parse_backend_params(backend_param, project):
150
    """Parse a list of backend parameters given with the --backend-param
151
    option into a nested dict structure"""
152
    backend_params = collections.defaultdict(dict)
153
    for beparam in backend_param:
154
        backend, param = beparam.split(".", 1)
155
        key, val = param.split("=", 1)
156
        _validate_backend_params(backend, beparam, project)
157
        backend_params[backend][key] = val
158
    return backend_params
159
160
161
def _validate_backend_params(backend, beparam, project):
162
    if backend != project.config["backend"]:
163
        raise ConfigurationException(
164
            'The backend {} in CLI option "-b {}" not matching the project'
165
            " backend {}.".format(backend, beparam, project.config["backend"])
166
        )
167
168
169
def generate_filter_batches(subjects, filter_batch_max_limit):
170
    import annif.eval
171
172
    filter_batches = {}
173
    for limit in range(1, filter_batch_max_limit + 1):
174
        for threshold in [i * 0.05 for i in range(20)]:
175
            hit_filter = SuggestionFilter(subjects, limit, threshold)
176
            batch = annif.eval.EvaluationBatch(subjects)
177
            filter_batches[(limit, threshold)] = (hit_filter, batch)
178
    return filter_batches
179
180
181
def complete_project_id(ctx, param, incomplete):
182
    with ctx.obj.load_app().app_context():
183
        return [p for p in annif.registry.get_projects() if p.startswith(incomplete)]
184
185
186
def complete_vocab_id(ctx, param, incomplete):
187
    with ctx.obj.load_app().app_context():
188
        return [p for p in annif.registry.get_vocabs() if p.startswith(incomplete)]
189