Passed
Pull Request — main (#839)
by Osma
06:48 queued 03:30
created

annif.rest.list_vocabs()   A

Complexity

Conditions 1

Size

Total Lines 10
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nop 0
dl 0
loc 10
rs 10
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
import annif.simplemma_util
13
from annif.corpus import Document, DocumentList, SubjectSet
14
from annif.exception import AnnifException
15
from annif.project import Access
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def server_error(
35
    err: AnnifException,
36
) -> ConnexionResponse:
37
    """return a Connexion error object when there is a server error (project
38
    or backend problem)"""
39
40
    return connexion.problem(
41
        status=503, title="Service unavailable", detail=err.format_message()
42
    )
43
44
45
def show_info() -> tuple:
46
    """return version of annif and a title for the api according to OpenAPI spec"""
47
48
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
49
    return result, 200, {"Content-Type": "application/json"}
50
51
52
def language_not_supported_error(lang: str) -> ConnexionResponse:
53
    """return a Connexion error object when attempting to use unsupported language"""
54
55
    return connexion.problem(
56
        status=400,
57
        title="Bad Request",
58
        detail=f'language "{lang}" not supported by vocabulary',
59
    )
60
61
62
def list_vocabs() -> tuple:
63
    """return a dict with vocabularies formatted according to OpenAPI spec"""
64
65
    result = {
66
        "vocabs": [
67
            vocab.dump()
68
            for vocab in annif.registry.get_vocabs(min_access=Access.public).values()
69
        ]
70
    }
71
    return result, 200, {"Content-Type": "application/json"}
72
73
74
def list_projects() -> tuple:
75
    """return a dict with projects formatted according to OpenAPI spec"""
76
77
    result = {
78
        "projects": [
79
            proj.dump()
80
            for proj in annif.registry.get_projects(min_access=Access.public).values()
81
        ]
82
    }
83
    return result, 200, {"Content-Type": "application/json"}
84
85
86
def show_project(
87
    project_id: str,
88
) -> dict | ConnexionResponse:
89
    """return a single project formatted according to OpenAPI spec"""
90
91
    try:
92
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
93
    except ValueError:
94
        return project_not_found_error(project_id)
95
    return project.dump(), 200, {"Content-Type": "application/json"}
96
97
98
def detect_language(body: dict[str, Any]):
99
    """return scores for detected languages formatted according to Swagger spec"""
100
101
    text = body.get("text")
102
    languages = body.get("languages")
103
104
    try:
105
        proportions = annif.simplemma_util.detect_language(text, tuple(languages))
106
    except ValueError:
107
        return connexion.problem(
108
            status=400,
109
            title="Bad Request",
110
            detail="unsupported candidate languages",
111
        )
112
113
    result = {
114
        "results": [
115
            {"language": lang if lang != "unk" else None, "score": score}
116
            for lang, score in proportions.items()
117
        ]
118
    }
119
    return result, 200, {"Content-Type": "application/json"}
120
121
122
def _suggestion_to_dict(
123
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
124
) -> dict[str, str | float | None]:
125
    subject = subject_index[suggestion.subject_id]
126
    return {
127
        "uri": subject.uri,
128
        "label": subject.labels[language],
129
        "notation": subject.notation,
130
        "score": suggestion.score,
131
    }
132
133
134
def _hit_sets_to_list(
135
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
136
) -> list[dict[str, list]]:
137
    return [
138
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
139
        for hits in hit_sets
140
    ]
141
142
143
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
144
    return (
145
        isinstance(result, connexion.lifecycle.ConnexionResponse)
146
        and result.status_code >= 400
147
    )
148
149
150
def suggest(
151
    project_id: str, body: dict[str, Any]
152
) -> dict[str, list] | ConnexionResponse:
153
    """suggest subjects for the given text and return a dict with results
154
    formatted according to OpenAPI spec"""
155
156
    parameters = dict(
157
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
158
    )
159
    documents = [{"text": body["text"]}]
160
    result = _suggest(project_id, documents, parameters)
161
162
    if _is_error(result):
163
        return result
164
    return result[0], 200, {"Content-Type": "application/json"}
165
166
167
def suggest_batch(
168
    project_id: str,
169
    body: dict[str, list],
170
    **query_parameters,
171
) -> list[dict[str, Any]] | ConnexionResponse:
172
    """suggest subjects for the given documents and return a list of dicts with results
173
    formatted according to OpenAPI spec"""
174
175
    documents = body["documents"]
176
    result = _suggest(project_id, documents, query_parameters)
177
178
    if _is_error(result):
179
        return result
180
    for document_results, document in zip(result, documents):
181
        document_results["document_id"] = document.get("document_id")
182
    return result, 200, {"Content-Type": "application/json"}
183
184
185
def _suggest(
186
    project_id: str,
187
    documents: list[dict[str, str]],
188
    parameters: dict[str, Any],
189
) -> list[dict[str, list]] | ConnexionResponse:
190
    corpus = _documents_to_corpus(documents, subject_index=None)
191
    try:
192
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
193
    except ValueError:
194
        return project_not_found_error(project_id)
195
196
    try:
197
        lang = parameters.get("language") or project.vocab_lang
198
    except AnnifException as err:
199
        return server_error(err)
200
201
    if lang not in project.vocab.languages:
202
        return language_not_supported_error(lang)
203
204
    limit = parameters.get("limit", 10)
205
    threshold = parameters.get("threshold", 0.0)
206
207
    try:
208
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
209
    except AnnifException as err:
210
        return server_error(err)
211
212
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
213
214
215
def _documents_to_corpus(
216
    documents: list[dict[str, Any]],
217
    subject_index: SubjectIndex | None,
218
) -> annif.corpus.document.DocumentList:
219
    if subject_index is not None:
220
        corpus = [
221
            Document(
222
                text=d["text"],
223
                subject_set=SubjectSet(
224
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
225
                ),
226
            )
227
            for d in documents
228
            if "text" in d and "subjects" in d
229
        ]
230
    else:
231
        corpus = [
232
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
233
        ]
234
    return DocumentList(corpus)
235
236
237
def learn(
238
    project_id: str,
239
    body: list[dict[str, Any]],
240
) -> ConnexionResponse | tuple[None, int]:
241
    """learn from documents and return an empty 204 response if succesful"""
242
243
    try:
244
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
245
    except ValueError:
246
        return project_not_found_error(project_id)
247
248
    try:
249
        corpus = _documents_to_corpus(body, project.subjects)
250
        project.learn(corpus)
251
    except AnnifException as err:
252
        return server_error(err)
253
254
    return None, 204, {"Content-Type": "application/json"}
255