Passed
Push — issue631-rest-api-language-det... ( 34c253...1cd800 )
by Osma
04:27
created

annif.rest.suggest()   A

Complexity

Conditions 2

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 9
nop 2
dl 0
loc 15
rs 9.95
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
4
from __future__ import annotations
5
6
import importlib
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
from annif.corpus import Document, DocumentList, SubjectSet
13
from annif.exception import AnnifException
14
from annif.project import Access
15
from annif.simplemma_util import get_language_detector
16
17
if TYPE_CHECKING:
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def server_error(
35
    err: AnnifException,
36
) -> ConnexionResponse:
37
    """return a Connexion error object when there is a server error (project
38
    or backend problem)"""
39
40
    return connexion.problem(
41
        status=503, title="Service unavailable", detail=err.format_message()
42
    )
43
44
45
def show_info() -> tuple:
46
    """return version of annif and a title for the api according to OpenAPI spec"""
47
48
    result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
49
    return result, 200, {"Content-Type": "application/json"}
50
51
52
def language_not_supported_error(lang: str) -> ConnexionResponse:
53
    """return a Connexion error object when attempting to use unsupported language"""
54
55
    return connexion.problem(
56
        status=400,
57
        title="Bad Request",
58
        detail=f'language "{lang}" not supported by vocabulary',
59
    )
60
61
62
def list_projects() -> tuple:
63
    """return a dict with projects formatted according to OpenAPI spec"""
64
65
    result = {
66
        "projects": [
67
            proj.dump()
68
            for proj in annif.registry.get_projects(min_access=Access.public).values()
69
        ]
70
    }
71
    return result, 200, {"Content-Type": "application/json"}
72
73
74
def show_project(
75
    project_id: str,
76
) -> dict | ConnexionResponse:
77
    """return a single project formatted according to OpenAPI spec"""
78
79
    try:
80
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
81
    except ValueError:
82
        return project_not_found_error(project_id)
83
    return project.dump(), 200, {"Content-Type": "application/json"}
84
85
86
def detect_language(body: dict[str, Any]):
87
    """return scores for detected languages formatted according to Swagger spec"""
88
89
    text = body.get("text")
90
    candidates = body.get("candidates")
91
92
    if not candidates:
93
        return connexion.problem(
94
            status=400,
95
            title="Bad Request",
96
            detail="no candidate languages given",
97
        )
98
99
    detector = get_language_detector(tuple(candidates))
100
    try:
101
        proportions = detector.proportion_in_each_language(text)
102
    except ValueError:
103
        return connexion.problem(
104
            status=400,
105
            title="Bad Request",
106
            detail="unsupported candidate languages",
107
        )
108
109
    result = {
110
        "results": [
111
            {"language": lang if lang != "unk" else None, "score": score}
112
            for lang, score in proportions.items()
113
        ]
114
    }
115
    return result, 200, {"Content-Type": "application/json"}
116
117
118
def _suggestion_to_dict(
119
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
120
) -> dict[str, str | float | None]:
121
    subject = subject_index[suggestion.subject_id]
122
    return {
123
        "uri": subject.uri,
124
        "label": subject.labels[language],
125
        "notation": subject.notation,
126
        "score": suggestion.score,
127
    }
128
129
130
def _hit_sets_to_list(
131
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
132
) -> list[dict[str, list]]:
133
    return [
134
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
135
        for hits in hit_sets
136
    ]
137
138
139
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
140
    return (
141
        isinstance(result, connexion.lifecycle.ConnexionResponse)
142
        and result.status_code >= 400
143
    )
144
145
146
def suggest(
147
    project_id: str, body: dict[str, Any]
148
) -> dict[str, list] | ConnexionResponse:
149
    """suggest subjects for the given text and return a dict with results
150
    formatted according to OpenAPI spec"""
151
152
    parameters = dict(
153
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
154
    )
155
    documents = [{"text": body["text"]}]
156
    result = _suggest(project_id, documents, parameters)
157
158
    if _is_error(result):
159
        return result
160
    return result[0], 200, {"Content-Type": "application/json"}
161
162
163
def suggest_batch(
164
    project_id: str,
165
    body: dict[str, list],
166
    **query_parameters,
167
) -> list[dict[str, Any]] | ConnexionResponse:
168
    """suggest subjects for the given documents and return a list of dicts with results
169
    formatted according to OpenAPI spec"""
170
171
    documents = body["documents"]
172
    result = _suggest(project_id, documents, query_parameters)
173
174
    if _is_error(result):
175
        return result
176
    for document_results, document in zip(result, documents):
177
        document_results["document_id"] = document.get("document_id")
178
    return result, 200, {"Content-Type": "application/json"}
179
180
181
def _suggest(
182
    project_id: str,
183
    documents: list[dict[str, str]],
184
    parameters: dict[str, Any],
185
) -> list[dict[str, list]] | ConnexionResponse:
186
    corpus = _documents_to_corpus(documents, subject_index=None)
187
    try:
188
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
189
    except ValueError:
190
        return project_not_found_error(project_id)
191
192
    try:
193
        lang = parameters.get("language") or project.vocab_lang
194
    except AnnifException as err:
195
        return server_error(err)
196
197
    if lang not in project.vocab.languages:
198
        return language_not_supported_error(lang)
199
200
    limit = parameters.get("limit", 10)
201
    threshold = parameters.get("threshold", 0.0)
202
203
    try:
204
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
205
    except AnnifException as err:
206
        return server_error(err)
207
208
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
209
210
211
def _documents_to_corpus(
212
    documents: list[dict[str, Any]],
213
    subject_index: SubjectIndex | None,
214
) -> annif.corpus.document.DocumentList:
215
    if subject_index is not None:
216
        corpus = [
217
            Document(
218
                text=d["text"],
219
                subject_set=SubjectSet(
220
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
221
                ),
222
            )
223
            for d in documents
224
            if "text" in d and "subjects" in d
225
        ]
226
    else:
227
        corpus = [
228
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
229
        ]
230
    return DocumentList(corpus)
231
232
233
def learn(
234
    project_id: str,
235
    body: list[dict[str, Any]],
236
) -> ConnexionResponse | tuple[None, int]:
237
    """learn from documents and return an empty 204 response if succesful"""
238
239
    try:
240
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
241
    except ValueError:
242
        return project_not_found_error(project_id)
243
244
    try:
245
        corpus = _documents_to_corpus(body, project.subjects)
246
        project.learn(corpus)
247
    except AnnifException as err:
248
        return server_error(err)
249
250
    return None, 204, {"Content-Type": "application/json"}
251