Passed
Push — issue678-refactor-suggestionre... ( 911e14...37d225 )
by Osma
06:49 queued 02:55
created

annif.rest._hit_sets_to_list()   A

Complexity

Conditions 1

Size

Total Lines 4
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 3
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
import importlib
5
6
import connexion
7
8
import annif.registry
9
from annif.corpus import Document, DocumentList, SubjectSet
10
from annif.exception import AnnifException
11
from annif.project import Access
12
13
14
def project_not_found_error(project_id):
15
    """return a Connexion error object when a project is not found"""
16
17
    return connexion.problem(
18
        status=404,
19
        title="Project not found",
20
        detail="Project '{}' not found".format(project_id),
21
    )
22
23
24
def server_error(err):
25
    """return a Connexion error object when there is a server error (project
26
    or backend problem)"""
27
28
    return connexion.problem(
29
        status=503, title="Service unavailable", detail=err.format_message()
30
    )
31
32
33
def show_info():
34
    """return version of annif and a title for the api according to OpenAPI spec"""
35
36
    return {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
37
38
39
def language_not_supported_error(lang):
40
    """return a Connexion error object when attempting to use unsupported language"""
41
42
    return connexion.problem(
43
        status=400,
44
        title="Bad Request",
45
        detail=f'language "{lang}" not supported by vocabulary',
46
    )
47
48
49
def list_projects():
50
    """return a dict with projects formatted according to OpenAPI spec"""
51
52
    return {
53
        "projects": [
54
            proj.dump()
55
            for proj in annif.registry.get_projects(min_access=Access.public).values()
56
        ]
57
    }
58
59
60
def show_project(project_id):
61
    """return a single project formatted according to OpenAPI spec"""
62
63
    try:
64
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
65
    except ValueError:
66
        return project_not_found_error(project_id)
67
    return project.dump()
68
69
70
def _suggestion_to_dict(suggestion, subject_index, language):
71
    subject = subject_index[suggestion.subject_id]
72
    return {
73
        "uri": subject.uri,
74
        "label": subject.labels[language],
75
        "notation": subject.notation,
76
        "score": suggestion.score,
77
    }
78
79
80
def _hit_sets_to_list(hit_sets, subjects, lang):
81
    return [
82
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
83
        for hits in hit_sets
84
    ]
85
86
87
def _is_error(result):
88
    return (
89
        isinstance(result, connexion.lifecycle.ConnexionResponse)
90
        and result.status_code >= 400
91
    )
92
93
94
def suggest(project_id, body):
95
    """suggest subjects for the given text and return a dict with results
96
    formatted according to OpenAPI spec"""
97
98
    parameters = dict(
99
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
100
    )
101
    documents = [{"text": body["text"]}]
102
    result = _suggest(project_id, documents, parameters)
103
104
    if _is_error(result):
105
        return result
106
    return result[0]
107
108
109
def suggest_batch(project_id, body, **query_parameters):
110
    """suggest subjects for the given documents and return a list of dicts with results
111
    formatted according to OpenAPI spec"""
112
113
    documents = body["documents"]
114
    result = _suggest(project_id, documents, query_parameters)
115
116
    if _is_error(result):
117
        return result
118
    for document_results, document in zip(result, documents):
119
        document_results["document_id"] = document.get("document_id")
120
    return result
121
122
123
def _suggest(project_id, documents, parameters):
124
    corpus = _documents_to_corpus(documents, subject_index=None)
125
    try:
126
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
127
    except ValueError:
128
        return project_not_found_error(project_id)
129
130
    try:
131
        lang = parameters.get("language") or project.vocab_lang
132
    except AnnifException as err:
133
        return server_error(err)
134
135
    if lang not in project.vocab.languages:
136
        return language_not_supported_error(lang)
137
138
    limit = parameters.get("limit", 10)
139
    threshold = parameters.get("threshold", 0.0)
140
141
    try:
142
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
143
    except AnnifException as err:
144
        return server_error(err)
145
146
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
147
148
149
def _documents_to_corpus(documents, subject_index):
150
    if subject_index is not None:
151
        corpus = [
152
            Document(
153
                text=d["text"],
154
                subject_set=SubjectSet(
155
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
156
                ),
157
            )
158
            for d in documents
159
            if "text" in d and "subjects" in d
160
        ]
161
    else:
162
        corpus = [
163
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
164
        ]
165
    return DocumentList(corpus)
166
167
168
def learn(project_id, body):
169
    """learn from documents and return an empty 204 response if succesful"""
170
171
    try:
172
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
173
    except ValueError:
174
        return project_not_found_error(project_id)
175
176
    try:
177
        corpus = _documents_to_corpus(body, project.subjects)
178
        project.learn(corpus)
179
    except AnnifException as err:
180
        return server_error(err)
181
182
    return None, 204
183