annif.rest.list_projects()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nop 0
dl 0
loc 10
rs 10
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
import annif.simplemma_util
13
from annif.corpus import Document, DocumentList, SubjectSet
14
from annif.exception import AnnifException, NotEnabledException
15
from annif.project import Access
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def learning_not_enabled_error(project_id) -> ConnexionResponse:
35
    """return a Connexion error object when a project is not configured for learning"""
36
37
    return connexion.problem(
38
        status=403,
39
        title="Learning not allowed",
40
        detail=f"Project '{project_id}' is not configured to allow learning via API",
41
    )
42
43
44
def server_error(
45
    err: AnnifException,
46
) -> ConnexionResponse:
47
    """return a Connexion error object when there is a server error (project
48
    or backend problem)"""
49
50
    return connexion.problem(
51
        status=503, title="Service unavailable", detail=err.format_message()
52
    )
53
54
55
def show_info() -> tuple:
56
    """return version of annif and a title for the api according to OpenAPI spec"""
57
58
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
59
    return result, 200, {"Content-Type": "application/json"}
60
61
62
def language_not_supported_error(lang: str) -> ConnexionResponse:
63
    """return a Connexion error object when attempting to use unsupported language"""
64
65
    return connexion.problem(
66
        status=400,
67
        title="Bad Request",
68
        detail=f'language "{lang}" not supported by vocabulary',
69
    )
70
71
72
def list_vocabs() -> tuple:
73
    """return a dict with vocabularies formatted according to OpenAPI spec"""
74
75
    result = {
76
        "vocabs": [
77
            vocab.dump()
78
            for vocab in annif.registry.get_vocabs(min_access=Access.public).values()
79
        ]
80
    }
81
    return result, 200, {"Content-Type": "application/json"}
82
83
84
def list_projects() -> tuple:
85
    """return a dict with projects formatted according to OpenAPI spec"""
86
87
    result = {
88
        "projects": [
89
            proj.dump()
90
            for proj in annif.registry.get_projects(min_access=Access.public).values()
91
        ]
92
    }
93
    return result, 200, {"Content-Type": "application/json"}
94
95
96
def show_project(
97
    project_id: str,
98
) -> dict | ConnexionResponse:
99
    """return a single project formatted according to OpenAPI spec"""
100
101
    try:
102
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
103
    except ValueError:
104
        return project_not_found_error(project_id)
105
    return project.dump(), 200, {"Content-Type": "application/json"}
106
107
108
def detect_language(body: dict[str, Any]):
109
    """return scores for detected languages formatted according to Swagger spec"""
110
111
    text = body.get("text")
112
    languages = body.get("languages")
113
114
    try:
115
        proportions = annif.simplemma_util.detect_language(text, tuple(languages))
116
    except ValueError:
117
        return connexion.problem(
118
            status=400,
119
            title="Bad Request",
120
            detail="unsupported candidate languages",
121
        )
122
123
    result = {
124
        "results": [
125
            {"language": lang if lang != "unk" else None, "score": score}
126
            for lang, score in proportions.items()
127
        ]
128
    }
129
    return result, 200, {"Content-Type": "application/json"}
130
131
132
def _suggestion_to_dict(
133
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
134
) -> dict[str, str | float | None]:
135
    subject = subject_index[suggestion.subject_id]
136
    return {
137
        "uri": subject.uri,
138
        "label": subject.labels[language],
139
        "notation": subject.notation,
140
        "score": suggestion.score,
141
    }
142
143
144
def _hit_sets_to_list(
145
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
146
) -> list[dict[str, list]]:
147
    return [
148
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
149
        for hits in hit_sets
150
    ]
151
152
153
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
154
    return (
155
        isinstance(result, connexion.lifecycle.ConnexionResponse)
156
        and result.status_code >= 400
157
    )
158
159
160
def suggest(
161
    project_id: str, body: dict[str, Any]
162
) -> dict[str, list] | ConnexionResponse:
163
    """suggest subjects for the given text and return a dict with results
164
    formatted according to OpenAPI spec"""
165
166
    parameters = dict(
167
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
168
    )
169
    metadata = {
170
        key[len("metadata_") :]: value
171
        for key, value in body.items()
172
        if key.startswith("metadata_")
173
    }
174
    documents = [{"text": body["text"], "metadata": metadata}]
175
    result = _suggest(project_id, documents, parameters)
176
177
    if _is_error(result):
178
        return result
179
    return result[0], 200, {"Content-Type": "application/json"}
180
181
182
def suggest_batch(
183
    project_id: str,
184
    body: dict[str, list],
185
    **query_parameters,
186
) -> list[dict[str, Any]] | ConnexionResponse:
187
    """suggest subjects for the given documents and return a list of dicts with results
188
    formatted according to OpenAPI spec"""
189
190
    documents = body["documents"]
191
    result = _suggest(project_id, documents, query_parameters)
192
193
    if _is_error(result):
194
        return result
195
    for document_results, document in zip(result, documents):
196
        document_results["document_id"] = document.get("document_id")
197
    return result, 200, {"Content-Type": "application/json"}
198
199
200
def _suggest(
201
    project_id: str,
202
    documents: list[dict[str, str]],
203
    parameters: dict[str, Any],
204
) -> list[dict[str, list]] | ConnexionResponse:
205
    corpus = _documents_to_corpus(documents, subject_index=None)
206
    try:
207
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
208
    except ValueError:
209
        return project_not_found_error(project_id)
210
211
    try:
212
        lang = parameters.get("language") or project.vocab_lang
213
    except AnnifException as err:
214
        return server_error(err)
215
216
    if lang not in project.vocab.languages:
217
        return language_not_supported_error(lang)
218
219
    limit = parameters.get("limit", 10)
220
    threshold = parameters.get("threshold", 0.0)
221
222
    try:
223
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
224
    except AnnifException as err:
225
        return server_error(err)
226
227
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
228
229
230
def _documents_to_corpus(
231
    documents: list[dict[str, Any]],
232
    subject_index: SubjectIndex | None,
233
) -> annif.corpus.document.DocumentList:
234
    if subject_index is not None:
235
        corpus = [
236
            Document(
237
                text=d["text"],
238
                subject_set=SubjectSet(
239
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
240
                ),
241
                metadata=d.get("metadata", {}),
242
            )
243
            for d in documents
244
            if "text" in d and "subjects" in d
245
        ]
246
    else:
247
        corpus = [
248
            Document(text=d["text"], subject_set=None, metadata=d.get("metadata", {}))
249
            for d in documents
250
            if "text" in d
251
        ]
252
    return DocumentList(corpus)
253
254
255
def learn(
256
    project_id: str,
257
    body: list[dict[str, Any]],
258
) -> ConnexionResponse | tuple[None, int]:
259
    """learn from documents and return an empty 204 response if succesful"""
260
261
    try:
262
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
263
    except ValueError:
264
        return project_not_found_error(project_id)
265
266
    try:
267
        corpus = _documents_to_corpus(body, project.subjects)
268
        project.learn(corpus)
269
    except NotEnabledException:
270
        return learning_not_enabled_error(project_id)
271
    except AnnifException as err:
272
        return server_error(err)
273
274
    return None, 204, {"Content-Type": "application/json"}
275