annif.rest   A
last analyzed

Complexity

Total Complexity 31

Size/Duplication

Total Lines 267
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 159
dl 0
loc 267
rs 9.92
c 0
b 0
f 0
wmc 31

17 Functions

Rating   Name   Duplication   Size   Complexity  
A project_not_found_error() 0 7 1
A list_vocabs() 0 10 1
A _suggestion_to_dict() 0 9 1
A _documents_to_corpus() 0 20 2
A language_not_supported_error() 0 7 1
A suggest() 0 15 2
A learning_not_enabled_error() 0 7 1
A _is_error() 0 4 1
A detect_language() 0 22 3
A suggest_batch() 0 16 3
A show_project() 0 10 2
A server_error() 0 8 1
A show_info() 0 5 1
B _suggest() 0 28 5
A _hit_sets_to_list() 0 6 1
A list_projects() 0 10 1
A learn() 0 20 4
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
import annif.simplemma_util
13
from annif.corpus import Document, DocumentList, SubjectSet
14
from annif.exception import AnnifException, NotEnabledException
15
from annif.project import Access
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def learning_not_enabled_error(project_id) -> ConnexionResponse:
35
    """return a Connexion error object when a project is not configured for learning"""
36
37
    return connexion.problem(
38
        status=403,
39
        title="Learning not allowed",
40
        detail=f"Project '{project_id}' is not configured to allow learning via API",
41
    )
42
43
44
def server_error(
45
    err: AnnifException,
46
) -> ConnexionResponse:
47
    """return a Connexion error object when there is a server error (project
48
    or backend problem)"""
49
50
    return connexion.problem(
51
        status=503, title="Service unavailable", detail=err.format_message()
52
    )
53
54
55
def show_info() -> tuple:
56
    """return version of annif and a title for the api according to OpenAPI spec"""
57
58
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
59
    return result, 200, {"Content-Type": "application/json"}
60
61
62
def language_not_supported_error(lang: str) -> ConnexionResponse:
63
    """return a Connexion error object when attempting to use unsupported language"""
64
65
    return connexion.problem(
66
        status=400,
67
        title="Bad Request",
68
        detail=f'language "{lang}" not supported by vocabulary',
69
    )
70
71
72
def list_vocabs() -> tuple:
73
    """return a dict with vocabularies formatted according to OpenAPI spec"""
74
75
    result = {
76
        "vocabs": [
77
            vocab.dump()
78
            for vocab in annif.registry.get_vocabs(min_access=Access.public).values()
79
        ]
80
    }
81
    return result, 200, {"Content-Type": "application/json"}
82
83
84
def list_projects() -> tuple:
85
    """return a dict with projects formatted according to OpenAPI spec"""
86
87
    result = {
88
        "projects": [
89
            proj.dump()
90
            for proj in annif.registry.get_projects(min_access=Access.public).values()
91
        ]
92
    }
93
    return result, 200, {"Content-Type": "application/json"}
94
95
96
def show_project(
97
    project_id: str,
98
) -> dict | ConnexionResponse:
99
    """return a single project formatted according to OpenAPI spec"""
100
101
    try:
102
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
103
    except ValueError:
104
        return project_not_found_error(project_id)
105
    return project.dump(), 200, {"Content-Type": "application/json"}
106
107
108
def detect_language(body: dict[str, Any]):
109
    """return scores for detected languages formatted according to Swagger spec"""
110
111
    text = body.get("text")
112
    languages = body.get("languages")
113
114
    try:
115
        proportions = annif.simplemma_util.detect_language(text, tuple(languages))
116
    except ValueError:
117
        return connexion.problem(
118
            status=400,
119
            title="Bad Request",
120
            detail="unsupported candidate languages",
121
        )
122
123
    result = {
124
        "results": [
125
            {"language": lang if lang != "unk" else None, "score": score}
126
            for lang, score in proportions.items()
127
        ]
128
    }
129
    return result, 200, {"Content-Type": "application/json"}
130
131
132
def _suggestion_to_dict(
133
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
134
) -> dict[str, str | float | None]:
135
    subject = subject_index[suggestion.subject_id]
136
    return {
137
        "uri": subject.uri,
138
        "label": subject.labels[language],
139
        "notation": subject.notation,
140
        "score": suggestion.score,
141
    }
142
143
144
def _hit_sets_to_list(
145
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
146
) -> list[dict[str, list]]:
147
    return [
148
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
149
        for hits in hit_sets
150
    ]
151
152
153
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
154
    return (
155
        isinstance(result, connexion.lifecycle.ConnexionResponse)
156
        and result.status_code >= 400
157
    )
158
159
160
def suggest(
161
    project_id: str, body: dict[str, Any]
162
) -> dict[str, list] | ConnexionResponse:
163
    """suggest subjects for the given text and return a dict with results
164
    formatted according to OpenAPI spec"""
165
166
    parameters = dict(
167
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
168
    )
169
    documents = [{"text": body["text"]}]
170
    result = _suggest(project_id, documents, parameters)
171
172
    if _is_error(result):
173
        return result
174
    return result[0], 200, {"Content-Type": "application/json"}
175
176
177
def suggest_batch(
178
    project_id: str,
179
    body: dict[str, list],
180
    **query_parameters,
181
) -> list[dict[str, Any]] | ConnexionResponse:
182
    """suggest subjects for the given documents and return a list of dicts with results
183
    formatted according to OpenAPI spec"""
184
185
    documents = body["documents"]
186
    result = _suggest(project_id, documents, query_parameters)
187
188
    if _is_error(result):
189
        return result
190
    for document_results, document in zip(result, documents):
191
        document_results["document_id"] = document.get("document_id")
192
    return result, 200, {"Content-Type": "application/json"}
193
194
195
def _suggest(
196
    project_id: str,
197
    documents: list[dict[str, str]],
198
    parameters: dict[str, Any],
199
) -> list[dict[str, list]] | ConnexionResponse:
200
    corpus = _documents_to_corpus(documents, subject_index=None)
201
    try:
202
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
203
    except ValueError:
204
        return project_not_found_error(project_id)
205
206
    try:
207
        lang = parameters.get("language") or project.vocab_lang
208
    except AnnifException as err:
209
        return server_error(err)
210
211
    if lang not in project.vocab.languages:
212
        return language_not_supported_error(lang)
213
214
    limit = parameters.get("limit", 10)
215
    threshold = parameters.get("threshold", 0.0)
216
217
    try:
218
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
219
    except AnnifException as err:
220
        return server_error(err)
221
222
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
223
224
225
def _documents_to_corpus(
226
    documents: list[dict[str, Any]],
227
    subject_index: SubjectIndex | None,
228
) -> annif.corpus.document.DocumentList:
229
    if subject_index is not None:
230
        corpus = [
231
            Document(
232
                text=d["text"],
233
                subject_set=SubjectSet(
234
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
235
                ),
236
            )
237
            for d in documents
238
            if "text" in d and "subjects" in d
239
        ]
240
    else:
241
        corpus = [
242
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
243
        ]
244
    return DocumentList(corpus)
245
246
247
def learn(
248
    project_id: str,
249
    body: list[dict[str, Any]],
250
) -> ConnexionResponse | tuple[None, int]:
251
    """learn from documents and return an empty 204 response if succesful"""
252
253
    try:
254
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
255
    except ValueError:
256
        return project_not_found_error(project_id)
257
258
    try:
259
        corpus = _documents_to_corpus(body, project.subjects)
260
        project.learn(corpus)
261
    except NotEnabledException:
262
        return learning_not_enabled_error(project_id)
263
    except AnnifException as err:
264
        return server_error(err)
265
266
    return None, 204, {"Content-Type": "application/json"}
267