Passed
Push — issue678-refactor-suggestionre... ( 4d4f9a...830b3a )
by Osma
02:37
created

annif.cli_util.get_project()   A

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
import collections
2
import os
3
import sys
4
5
import click
6
import click_log
7
from flask import current_app
8
9
import annif
10
from annif.exception import ConfigurationException
11
from annif.project import Access
12
from annif.suggestion import SuggestionFilter
13
14
logger = annif.logger
15
16
17
def _set_project_config_file_path(ctx, param, value):
18
    """Override the default path or the path given in env by CLI option"""
19
    with ctx.obj.load_app().app_context():
20
        if value:
21
            current_app.config["PROJECTS_CONFIG_PATH"] = value
22
23
24
def common_options(f):
25
    """Decorator to add common options for all CLI commands"""
26
    f = click.option(
27
        "-p",
28
        "--projects",
29
        help="Set path to project configuration file or directory",
30
        type=click.Path(dir_okay=True, exists=True),
31
        callback=_set_project_config_file_path,
32
        expose_value=False,
33
        is_eager=True,
34
    )(f)
35
    return click_log.simple_verbosity_option(logger)(f)
36
37
38
def backend_param_option(f):
39
    """Decorator to add an option for CLI commands to override BE parameters"""
40
    return click.option(
41
        "--backend-param",
42
        "-b",
43
        multiple=True,
44
        help="Override backend parameter of the config file. "
45
        + "Syntax: `-b <backend>.<parameter>=<value>`.",
46
    )(f)
47
48
49
def docs_limit_option(f):
50
    """Decorator to add an option for CLI commands to limit the number of documents to
51
    use"""
52
    return click.option(
53
        "--docs-limit",
54
        "-d",
55
        default=None,
56
        type=click.IntRange(0, None),
57
        help="Maximum number of documents to use",
58
    )(f)
59
60
61
def get_project(project_id):
62
    """
63
    Helper function to get a project by ID and bail out if it doesn't exist"""
64
    try:
65
        return annif.registry.get_project(project_id, min_access=Access.private)
66
    except ValueError:
67
        click.echo("No projects found with id '{0}'.".format(project_id), err=True)
68
        sys.exit(1)
69
70
71
def get_vocab(vocab_id):
72
    """
73
    Helper function to get a vocabulary by ID and bail out if it doesn't
74
    exist"""
75
    try:
76
        return annif.registry.get_vocab(vocab_id, min_access=Access.private)
77
    except ValueError:
78
        click.echo(f"No vocabularies found with the id '{vocab_id}'.", err=True)
79
        sys.exit(1)
80
81
82
def open_documents(paths, subject_index, vocab_lang, docs_limit):
83
    """Helper function to open a document corpus from a list of pathnames,
84
    each of which is either a TSV file or a directory of TXT files. For
85
    directories with subjects in TSV files, the given vocabulary language
86
    will be used to convert subject labels into URIs. The corpus will be
87
    returned as an instance of DocumentCorpus or LimitingDocumentCorpus."""
88
89
    def open_doc_path(path, subject_index):
90
        """open a single path and return it as a DocumentCorpus"""
91
        if os.path.isdir(path):
92
            return annif.corpus.DocumentDirectory(
93
                path, subject_index, vocab_lang, require_subjects=True
94
            )
95
        return annif.corpus.DocumentFile(path, subject_index)
96
97
    if len(paths) == 0:
98
        logger.warning("Reading empty file")
99
        docs = open_doc_path(os.path.devnull, subject_index)
100
    elif len(paths) == 1:
101
        docs = open_doc_path(paths[0], subject_index)
102
    else:
103
        corpora = [open_doc_path(path, subject_index) for path in paths]
104
        docs = annif.corpus.CombinedCorpus(corpora)
105
    if docs_limit is not None:
106
        docs = annif.corpus.LimitingDocumentCorpus(docs, docs_limit)
107
    return docs
108
109
110
def open_text_documents(paths, docs_limit):
111
    """
112
    Helper function to read text documents from the given file paths. Returns a
113
    DocumentList object with Documents having no subjects. If a path is "-", the
114
    document text is read from standard input. The maximum number of documents to read
115
    is set by docs_limit parameter.
116
    """
117
118
    def _docs(paths):
119
        for path in paths:
120
            if path == "-":
121
                doc = annif.corpus.Document(text=sys.stdin.read(), subject_set=None)
122
            else:
123
                with open(path, errors="replace", encoding="utf-8-sig") as docfile:
124
                    doc = annif.corpus.Document(text=docfile.read(), subject_set=None)
125
            yield doc
126
127
    return annif.corpus.DocumentList(_docs(paths[:docs_limit]))
128
129
130
def show_hits(hits, project, lang, file=None):
131
    """
132
    Print subject suggestions to the console or a file. The suggestions are displayed as
133
    a table, with one row per hit. Each row contains the URI, label, possible notation,
134
    and score of the suggestion. The label is given in the specified language.
135
    """
136
    for hit in hits.as_list():
137
        subj = project.subjects[hit.subject_id]
138
        line = "<{}>\t{}\t{}".format(
139
            subj.uri,
140
            "\t".join(filter(None, (subj.labels[lang], subj.notation))),
141
            hit.score,
142
        )
143
        click.echo(line, file=file)
144
145
146
def parse_backend_params(backend_param, project):
147
    """Parse a list of backend parameters given with the --backend-param
148
    option into a nested dict structure"""
149
    backend_params = collections.defaultdict(dict)
150
    for beparam in backend_param:
151
        backend, param = beparam.split(".", 1)
152
        key, val = param.split("=", 1)
153
        _validate_backend_params(backend, beparam, project)
154
        backend_params[backend][key] = val
155
    return backend_params
156
157
158
def _validate_backend_params(backend, beparam, project):
159
    if backend != project.config["backend"]:
160
        raise ConfigurationException(
161
            'The backend {} in CLI option "-b {}" not matching the project'
162
            " backend {}.".format(backend, beparam, project.config["backend"])
163
        )
164
165
166
def generate_filter_batches(subjects, filter_batch_max_limit):
167
    import annif.eval
168
169
    filter_batches = {}
170
    for limit in range(1, filter_batch_max_limit + 1):
171
        for threshold in [i * 0.05 for i in range(20)]:
172
            hit_filter = SuggestionFilter(subjects, limit, threshold)
173
            batch = annif.eval.EvaluationBatch(subjects)
174
            filter_batches[(limit, threshold)] = (hit_filter, batch)
175
    return filter_batches
176