Passed
Push — testing-on-windows-and-macos ( 782857...ea99ad )
by Juho
04:06
created

annif.rest.project_not_found_error()   A

Complexity

Conditions 1

Size

Total Lines 7
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 7
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
import annif.simplemma_util
13
from annif.corpus import Document, DocumentList, SubjectSet
14
from annif.exception import AnnifException
15
from annif.project import Access
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def server_error(
35
    err: AnnifException,
36
) -> ConnexionResponse:
37
    """return a Connexion error object when there is a server error (project
38
    or backend problem)"""
39
40
    return connexion.problem(
41
        status=503, title="Service unavailable", detail=err.format_message()
42
    )
43
44
45
def show_info() -> tuple:
46
    """return version of annif and a title for the api according to OpenAPI spec"""
47
48
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
49
    return result, 200, {"Content-Type": "application/json"}
50
51
52
def language_not_supported_error(lang: str) -> ConnexionResponse:
53
    """return a Connexion error object when attempting to use unsupported language"""
54
55
    return connexion.problem(
56
        status=400,
57
        title="Bad Request",
58
        detail=f'language "{lang}" not supported by vocabulary',
59
    )
60
61
62
def list_projects() -> tuple:
63
    """return a dict with projects formatted according to OpenAPI spec"""
64
65
    result = {
66
        "projects": [
67
            proj.dump()
68
            for proj in annif.registry.get_projects(min_access=Access.public).values()
69
        ]
70
    }
71
    return result, 200, {"Content-Type": "application/json"}
72
73
74
def show_project(
75
    project_id: str,
76
) -> dict | ConnexionResponse:
77
    """return a single project formatted according to OpenAPI spec"""
78
79
    try:
80
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
81
    except ValueError:
82
        return project_not_found_error(project_id)
83
    return project.dump(), 200, {"Content-Type": "application/json"}
84
85
86
def detect_language(body: dict[str, Any]):
87
    """return scores for detected languages formatted according to Swagger spec"""
88
89
    text = body.get("text")
90
    languages = body.get("languages")
91
92
    try:
93
        proportions = annif.simplemma_util.detect_language(text, tuple(languages))
94
    except ValueError:
95
        return connexion.problem(
96
            status=400,
97
            title="Bad Request",
98
            detail="unsupported candidate languages",
99
        )
100
101
    result = {
102
        "results": [
103
            {"language": lang if lang != "unk" else None, "score": score}
104
            for lang, score in proportions.items()
105
        ]
106
    }
107
    return result, 200, {"Content-Type": "application/json"}
108
109
110
def _suggestion_to_dict(
111
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
112
) -> dict[str, str | float | None]:
113
    subject = subject_index[suggestion.subject_id]
114
    return {
115
        "uri": subject.uri,
116
        "label": subject.labels[language],
117
        "notation": subject.notation,
118
        "score": suggestion.score,
119
    }
120
121
122
def _hit_sets_to_list(
123
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
124
) -> list[dict[str, list]]:
125
    return [
126
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
127
        for hits in hit_sets
128
    ]
129
130
131
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
132
    return (
133
        isinstance(result, connexion.lifecycle.ConnexionResponse)
134
        and result.status_code >= 400
135
    )
136
137
138
def suggest(
139
    project_id: str, body: dict[str, Any]
140
) -> dict[str, list] | ConnexionResponse:
141
    """suggest subjects for the given text and return a dict with results
142
    formatted according to OpenAPI spec"""
143
144
    parameters = dict(
145
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
146
    )
147
    documents = [{"text": body["text"]}]
148
    result = _suggest(project_id, documents, parameters)
149
150
    if _is_error(result):
151
        return result
152
    return result[0], 200, {"Content-Type": "application/json"}
153
154
155
def suggest_batch(
156
    project_id: str,
157
    body: dict[str, list],
158
    **query_parameters,
159
) -> list[dict[str, Any]] | ConnexionResponse:
160
    """suggest subjects for the given documents and return a list of dicts with results
161
    formatted according to OpenAPI spec"""
162
163
    documents = body["documents"]
164
    result = _suggest(project_id, documents, query_parameters)
165
166
    if _is_error(result):
167
        return result
168
    for document_results, document in zip(result, documents):
169
        document_results["document_id"] = document.get("document_id")
170
    return result, 200, {"Content-Type": "application/json"}
171
172
173
def _suggest(
174
    project_id: str,
175
    documents: list[dict[str, str]],
176
    parameters: dict[str, Any],
177
) -> list[dict[str, list]] | ConnexionResponse:
178
    corpus = _documents_to_corpus(documents, subject_index=None)
179
    try:
180
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
181
    except ValueError:
182
        return project_not_found_error(project_id)
183
184
    try:
185
        lang = parameters.get("language") or project.vocab_lang
186
    except AnnifException as err:
187
        return server_error(err)
188
189
    if lang not in project.vocab.languages:
190
        return language_not_supported_error(lang)
191
192
    limit = parameters.get("limit", 10)
193
    threshold = parameters.get("threshold", 0.0)
194
195
    try:
196
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
197
    except AnnifException as err:
198
        return server_error(err)
199
200
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
201
202
203
def _documents_to_corpus(
204
    documents: list[dict[str, Any]],
205
    subject_index: SubjectIndex | None,
206
) -> annif.corpus.document.DocumentList:
207
    if subject_index is not None:
208
        corpus = [
209
            Document(
210
                text=d["text"],
211
                subject_set=SubjectSet(
212
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
213
                ),
214
            )
215
            for d in documents
216
            if "text" in d and "subjects" in d
217
        ]
218
    else:
219
        corpus = [
220
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
221
        ]
222
    return DocumentList(corpus)
223
224
225
def learn(
226
    project_id: str,
227
    body: list[dict[str, Any]],
228
) -> ConnexionResponse | tuple[None, int]:
229
    """learn from documents and return an empty 204 response if succesful"""
230
231
    try:
232
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
233
    except ValueError:
234
        return project_not_found_error(project_id)
235
236
    try:
237
        corpus = _documents_to_corpus(body, project.subjects)
238
        project.learn(corpus)
239
    except AnnifException as err:
240
        return server_error(err)
241
242
    return None, 204, {"Content-Type": "application/json"}
243