Passed
Push — update-dependencies-v1.2 ( 4d82fb...53f16b )
by Juho
06:24 queued 03:21
created

annif.rest.server_error()   A

Complexity

Conditions 1

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 4
nop 1
dl 0
loc 8
rs 10
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
from annif.corpus import Document, DocumentList, SubjectSet
13
from annif.exception import AnnifException
14
from annif.project import Access
15
from annif.simplemma_util import get_language_detector
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def server_error(
35
    err: AnnifException,
36
) -> ConnexionResponse:
37
    """return a Connexion error object when there is a server error (project
38
    or backend problem)"""
39
40
    return connexion.problem(
41
        status=503, title="Service unavailable", detail=err.format_message()
42
    )
43
44
45
def show_info() -> tuple:
46
    """return version of annif and a title for the api according to OpenAPI spec"""
47
48
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
49
    return result, 200, {"Content-Type": "application/json"}
50
51
52
def language_not_supported_error(lang: str) -> ConnexionResponse:
53
    """return a Connexion error object when attempting to use unsupported language"""
54
55
    return connexion.problem(
56
        status=400,
57
        title="Bad Request",
58
        detail=f'language "{lang}" not supported by vocabulary',
59
    )
60
61
62
def list_projects() -> tuple:
63
    """return a dict with projects formatted according to OpenAPI spec"""
64
65
    result = {
66
        "projects": [
67
            proj.dump()
68
            for proj in annif.registry.get_projects(min_access=Access.public).values()
69
        ]
70
    }
71
    return result, 200, {"Content-Type": "application/json"}
72
73
74
def show_project(
75
    project_id: str,
76
) -> dict | ConnexionResponse:
77
    """return a single project formatted according to OpenAPI spec"""
78
79
    try:
80
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
81
    except ValueError:
82
        return project_not_found_error(project_id)
83
    return project.dump(), 200, {"Content-Type": "application/json"}
84
85
86
def detect_language(body: dict[str, Any]):
87
    """return scores for detected languages formatted according to Swagger spec"""
88
89
    text = body.get("text")
90
    languages = body.get("languages")
91
92
    detector = get_language_detector(tuple(languages))
93
    try:
94
        proportions = detector.proportion_in_each_language(text)
95
    except ValueError:
96
        return connexion.problem(
97
            status=400,
98
            title="Bad Request",
99
            detail="unsupported candidate languages",
100
        )
101
102
    result = {
103
        "results": [
104
            {"language": lang if lang != "unk" else None, "score": score}
105
            for lang, score in proportions.items()
106
        ]
107
    }
108
    return result, 200, {"Content-Type": "application/json"}
109
110
111
def _suggestion_to_dict(
112
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
113
) -> dict[str, str | float | None]:
114
    subject = subject_index[suggestion.subject_id]
115
    return {
116
        "uri": subject.uri,
117
        "label": subject.labels[language],
118
        "notation": subject.notation,
119
        "score": suggestion.score,
120
    }
121
122
123
def _hit_sets_to_list(
124
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
125
) -> list[dict[str, list]]:
126
    return [
127
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
128
        for hits in hit_sets
129
    ]
130
131
132
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
133
    return (
134
        isinstance(result, connexion.lifecycle.ConnexionResponse)
135
        and result.status_code >= 400
136
    )
137
138
139
def suggest(
140
    project_id: str, body: dict[str, Any]
141
) -> dict[str, list] | ConnexionResponse:
142
    """suggest subjects for the given text and return a dict with results
143
    formatted according to OpenAPI spec"""
144
145
    parameters = dict(
146
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
147
    )
148
    documents = [{"text": body["text"]}]
149
    result = _suggest(project_id, documents, parameters)
150
151
    if _is_error(result):
152
        return result
153
    return result[0], 200, {"Content-Type": "application/json"}
154
155
156
def suggest_batch(
157
    project_id: str,
158
    body: dict[str, list],
159
    **query_parameters,
160
) -> list[dict[str, Any]] | ConnexionResponse:
161
    """suggest subjects for the given documents and return a list of dicts with results
162
    formatted according to OpenAPI spec"""
163
164
    documents = body["documents"]
165
    result = _suggest(project_id, documents, query_parameters)
166
167
    if _is_error(result):
168
        return result
169
    for document_results, document in zip(result, documents):
170
        document_results["document_id"] = document.get("document_id")
171
    return result, 200, {"Content-Type": "application/json"}
172
173
174
def _suggest(
175
    project_id: str,
176
    documents: list[dict[str, str]],
177
    parameters: dict[str, Any],
178
) -> list[dict[str, list]] | ConnexionResponse:
179
    corpus = _documents_to_corpus(documents, subject_index=None)
180
    try:
181
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
182
    except ValueError:
183
        return project_not_found_error(project_id)
184
185
    try:
186
        lang = parameters.get("language") or project.vocab_lang
187
    except AnnifException as err:
188
        return server_error(err)
189
190
    if lang not in project.vocab.languages:
191
        return language_not_supported_error(lang)
192
193
    limit = parameters.get("limit", 10)
194
    threshold = parameters.get("threshold", 0.0)
195
196
    try:
197
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
198
    except AnnifException as err:
199
        return server_error(err)
200
201
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
202
203
204
def _documents_to_corpus(
205
    documents: list[dict[str, Any]],
206
    subject_index: SubjectIndex | None,
207
) -> annif.corpus.document.DocumentList:
208
    if subject_index is not None:
209
        corpus = [
210
            Document(
211
                text=d["text"],
212
                subject_set=SubjectSet(
213
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
214
                ),
215
            )
216
            for d in documents
217
            if "text" in d and "subjects" in d
218
        ]
219
    else:
220
        corpus = [
221
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
222
        ]
223
    return DocumentList(corpus)
224
225
226
def learn(
227
    project_id: str,
228
    body: list[dict[str, Any]],
229
) -> ConnexionResponse | tuple[None, int]:
230
    """learn from documents and return an empty 204 response if succesful"""
231
232
    try:
233
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
234
    except ValueError:
235
        return project_not_found_error(project_id)
236
237
    try:
238
        corpus = _documents_to_corpus(body, project.subjects)
239
        project.learn(corpus)
240
    except AnnifException as err:
241
        return server_error(err)
242
243
    return None, 204, {"Content-Type": "application/json"}
244