annif.rest   A
last analyzed

Complexity

Total Complexity 29

Size/Duplication

Total Lines 254
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 153
dl 0
loc 254
rs 10
c 0
b 0
f 0
wmc 29

15 Functions

Rating   Name   Duplication   Size   Complexity  
A project_not_found_error() 0 7 1
A _is_error() 0 4 1
A list_vocabs() 0 10 1
A language_not_supported_error() 0 7 1
A learning_not_enabled_error() 0 7 1
A detect_language() 0 22 3
A show_project() 0 10 2
A server_error() 0 8 1
A show_info() 0 5 1
A list_projects() 0 10 1
A _documents_to_corpus() 0 23 2
A suggest() 0 20 2
A learn() 0 20 4
A suggest_batch() 0 16 3
B _suggest() 0 28 5
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
import annif.simplemma_util
13
from annif.corpus import Document, DocumentList, SubjectSet
14
from annif.exception import AnnifException, NotEnabledException
15
from annif.project import Access
16
from annif.util import suggestion_results_to_list
17
18
if TYPE_CHECKING:
19
    from connexion.lifecycle import ConnexionResponse
20
21
    from annif.corpus.subject import SubjectIndex
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def learning_not_enabled_error(project_id) -> ConnexionResponse:
35
    """return a Connexion error object when a project is not configured for learning"""
36
37
    return connexion.problem(
38
        status=403,
39
        title="Learning not allowed",
40
        detail=f"Project '{project_id}' is not configured to allow learning via API",
41
    )
42
43
44
def server_error(
45
    err: AnnifException,
46
) -> ConnexionResponse:
47
    """return a Connexion error object when there is a server error (project
48
    or backend problem)"""
49
50
    return connexion.problem(
51
        status=503, title="Service unavailable", detail=err.format_message()
52
    )
53
54
55
def show_info() -> tuple:
56
    """return version of annif and a title for the api according to OpenAPI spec"""
57
58
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
59
    return result, 200, {"Content-Type": "application/json"}
60
61
62
def language_not_supported_error(lang: str) -> ConnexionResponse:
63
    """return a Connexion error object when attempting to use unsupported language"""
64
65
    return connexion.problem(
66
        status=400,
67
        title="Bad Request",
68
        detail=f'language "{lang}" not supported by vocabulary',
69
    )
70
71
72
def list_vocabs() -> tuple:
73
    """return a dict with vocabularies formatted according to OpenAPI spec"""
74
75
    result = {
76
        "vocabs": [
77
            vocab.dump()
78
            for vocab in annif.registry.get_vocabs(min_access=Access.public).values()
79
        ]
80
    }
81
    return result, 200, {"Content-Type": "application/json"}
82
83
84
def list_projects() -> tuple:
85
    """return a dict with projects formatted according to OpenAPI spec"""
86
87
    result = {
88
        "projects": [
89
            proj.dump()
90
            for proj in annif.registry.get_projects(min_access=Access.public).values()
91
        ]
92
    }
93
    return result, 200, {"Content-Type": "application/json"}
94
95
96
def show_project(
97
    project_id: str,
98
) -> dict | ConnexionResponse:
99
    """return a single project formatted according to OpenAPI spec"""
100
101
    try:
102
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
103
    except ValueError:
104
        return project_not_found_error(project_id)
105
    return project.dump(), 200, {"Content-Type": "application/json"}
106
107
108
def detect_language(body: dict[str, Any]):
109
    """return scores for detected languages formatted according to Swagger spec"""
110
111
    text = body.get("text")
112
    languages = body.get("languages")
113
114
    try:
115
        proportions = annif.simplemma_util.detect_language(text, tuple(languages))
116
    except ValueError:
117
        return connexion.problem(
118
            status=400,
119
            title="Bad Request",
120
            detail="unsupported candidate languages",
121
        )
122
123
    result = {
124
        "results": [
125
            {"language": lang if lang != "unk" else None, "score": score}
126
            for lang, score in proportions.items()
127
        ]
128
    }
129
    return result, 200, {"Content-Type": "application/json"}
130
131
132
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
133
    return (
134
        isinstance(result, connexion.lifecycle.ConnexionResponse)
135
        and result.status_code >= 400
136
    )
137
138
139
def suggest(
140
    project_id: str, body: dict[str, Any]
141
) -> dict[str, list] | ConnexionResponse:
142
    """suggest subjects for the given text and return a dict with results
143
    formatted according to OpenAPI spec"""
144
145
    parameters = dict(
146
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
147
    )
148
    metadata = {
149
        key[len("metadata_") :]: value
150
        for key, value in body.items()
151
        if key.startswith("metadata_")
152
    }
153
    documents = [{"text": body["text"], "metadata": metadata}]
154
    result = _suggest(project_id, documents, parameters)
155
156
    if _is_error(result):
157
        return result
158
    return result[0], 200, {"Content-Type": "application/json"}
159
160
161
def suggest_batch(
162
    project_id: str,
163
    body: dict[str, list],
164
    **query_parameters,
165
) -> list[dict[str, Any]] | ConnexionResponse:
166
    """suggest subjects for the given documents and return a list of dicts with results
167
    formatted according to OpenAPI spec"""
168
169
    documents = body["documents"]
170
    result = _suggest(project_id, documents, query_parameters)
171
172
    if _is_error(result):
173
        return result
174
    for document_results, document in zip(result, documents):
175
        document_results["document_id"] = document.get("document_id")
176
    return result, 200, {"Content-Type": "application/json"}
177
178
179
def _suggest(
180
    project_id: str,
181
    documents: list[dict[str, str]],
182
    parameters: dict[str, Any],
183
) -> list[dict[str, list]] | ConnexionResponse:
184
    corpus = _documents_to_corpus(documents, subject_index=None)
185
    try:
186
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
187
    except ValueError:
188
        return project_not_found_error(project_id)
189
190
    try:
191
        lang = parameters.get("language") or project.vocab_lang
192
    except AnnifException as err:
193
        return server_error(err)
194
195
    if lang not in project.vocab.languages:
196
        return language_not_supported_error(lang)
197
198
    limit = parameters.get("limit", 10)
199
    threshold = parameters.get("threshold", 0.0)
200
201
    try:
202
        suggestion_results = project.suggest_corpus(corpus).filter(limit, threshold)
203
    except AnnifException as err:
204
        return server_error(err)
205
206
    return suggestion_results_to_list(suggestion_results, project.subjects, lang)
207
208
209
def _documents_to_corpus(
210
    documents: list[dict[str, Any]],
211
    subject_index: SubjectIndex | None,
212
) -> annif.corpus.document.DocumentList:
213
    if subject_index is not None:
214
        corpus = [
215
            Document(
216
                text=d["text"],
217
                subject_set=SubjectSet(
218
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
219
                ),
220
                metadata=d.get("metadata", {}),
221
            )
222
            for d in documents
223
            if "text" in d and "subjects" in d
224
        ]
225
    else:
226
        corpus = [
227
            Document(text=d["text"], subject_set=None, metadata=d.get("metadata", {}))
228
            for d in documents
229
            if "text" in d
230
        ]
231
    return DocumentList(corpus)
232
233
234
def learn(
235
    project_id: str,
236
    body: list[dict[str, Any]],
237
) -> ConnexionResponse | tuple[None, int]:
238
    """learn from documents and return an empty 204 response if succesful"""
239
240
    try:
241
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
242
    except ValueError:
243
        return project_not_found_error(project_id)
244
245
    try:
246
        corpus = _documents_to_corpus(body, project.subjects)
247
        project.learn(corpus)
248
    except NotEnabledException:
249
        return learning_not_enabled_error(project_id)
250
    except AnnifException as err:
251
        return server_error(err)
252
253
    return None, 204, {"Content-Type": "application/json"}
254