Passed
Push — reconciliation-api ( 57c289...eaec71 )
by
unknown
03:06
created

annif.rest._hit_sets_to_list()   A

Complexity

Conditions 1

Size

Total Lines 6
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 5
nop 3
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
from __future__ import annotations
4
5
import importlib
6
import json
7
from typing import TYPE_CHECKING, Any
8
9
import connexion
10
11
import annif.registry
12
from annif.corpus import Document, DocumentList, SubjectSet
13
from annif.exception import AnnifException
14
from annif.project import Access
15
16
if TYPE_CHECKING:
17
    from datetime import datetime
18
19
    from connexion.lifecycle import ConnexionResponse
20
21
    from annif.corpus.subject import SubjectIndex
22
    from annif.suggestion import SubjectSuggestion, SuggestionResults
23
24
25
def project_not_found_error(project_id: str) -> ConnexionResponse:
26
    """return a Connexion error object when a project is not found"""
27
28
    return connexion.problem(
29
        status=404,
30
        title="Project not found",
31
        detail="Project '{}' not found".format(project_id),
32
    )
33
34
35
def server_error(
36
    err: AnnifException,
37
) -> ConnexionResponse:
38
    """return a Connexion error object when there is a server error (project
39
    or backend problem)"""
40
41
    return connexion.problem(
42
        status=503, title="Service unavailable", detail=err.format_message()
43
    )
44
45
46
def show_info() -> dict[str, str]:
47
    """return version of annif and a title for the api according to OpenAPI spec"""
48
49
    return {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
50
51
52
def language_not_supported_error(lang: str) -> ConnexionResponse:
53
    """return a Connexion error object when attempting to use unsupported language"""
54
55
    return connexion.problem(
56
        status=400,
57
        title="Bad Request",
58
        detail=f'language "{lang}" not supported by vocabulary',
59
    )
60
61
62
def list_projects() -> dict[str, list[dict[str, str | dict | bool | datetime | None]]]:
63
    """return a dict with projects formatted according to OpenAPI spec"""
64
65
    return {
66
        "projects": [
67
            proj.dump()
68
            for proj in annif.registry.get_projects(min_access=Access.public).values()
69
        ]
70
    }
71
72
73
def show_project(
74
    project_id: str,
75
) -> dict | ConnexionResponse:
76
    """return a single project formatted according to OpenAPI spec"""
77
78
    try:
79
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
80
    except ValueError:
81
        return project_not_found_error(project_id)
82
    return project.dump()
83
84
85
def _suggestion_to_dict(
86
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
87
) -> dict[str, str | float | None]:
88
    subject = subject_index[suggestion.subject_id]
89
    return {
90
        "uri": subject.uri,
91
        "label": subject.labels[language],
92
        "notation": subject.notation,
93
        "score": suggestion.score,
94
    }
95
96
97
def _hit_sets_to_list(
98
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
99
) -> list[dict[str, list]]:
100
    return [
101
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
102
        for hits in hit_sets
103
    ]
104
105
106
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
107
    return (
108
        isinstance(result, connexion.lifecycle.ConnexionResponse)
109
        and result.status_code >= 400
110
    )
111
112
113
def suggest(
114
    project_id: str, body: dict[str, Any]
115
) -> dict[str, list] | ConnexionResponse:
116
    """suggest subjects for the given text and return a dict with results
117
    formatted according to OpenAPI spec"""
118
119
    parameters = dict(
120
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
121
    )
122
    documents = [{"text": body["text"]}]
123
    result = _suggest(project_id, documents, parameters)
124
125
    if _is_error(result):
126
        return result
127
    return result[0]
128
129
130
def suggest_batch(
131
    project_id: str,
132
    body: dict[str, list],
133
    **query_parameters,
134
) -> list[dict[str, Any]] | ConnexionResponse:
135
    """suggest subjects for the given documents and return a list of dicts with results
136
    formatted according to OpenAPI spec"""
137
138
    documents = body["documents"]
139
    result = _suggest(project_id, documents, query_parameters)
140
141
    if _is_error(result):
142
        return result
143
    for document_results, document in zip(result, documents):
144
        document_results["document_id"] = document.get("document_id")
145
    return result
146
147
148
def _suggest(
149
    project_id: str,
150
    documents: list[dict[str, str]],
151
    parameters: dict[str, Any],
152
) -> list[dict[str, list]] | ConnexionResponse:
153
    corpus = _documents_to_corpus(documents, subject_index=None)
154
    try:
155
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
156
    except ValueError:
157
        return project_not_found_error(project_id)
158
159
    try:
160
        lang = parameters.get("language") or project.vocab_lang
161
    except AnnifException as err:
162
        return server_error(err)
163
164
    if lang not in project.vocab.languages:
165
        return language_not_supported_error(lang)
166
167
    limit = parameters.get("limit", 10)
168
    threshold = parameters.get("threshold", 0.0)
169
170
    try:
171
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
172
    except AnnifException as err:
173
        return server_error(err)
174
175
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
176
177
178
def _documents_to_corpus(
179
    documents: list[dict[str, Any]],
180
    subject_index: SubjectIndex | None,
181
) -> annif.corpus.document.DocumentList:
182
    if subject_index is not None:
183
        corpus = [
184
            Document(
185
                text=d["text"],
186
                subject_set=SubjectSet(
187
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
188
                ),
189
            )
190
            for d in documents
191
            if "text" in d and "subjects" in d
192
        ]
193
    else:
194
        corpus = [
195
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
196
        ]
197
    return DocumentList(corpus)
198
199
200
def learn(
201
    project_id: str,
202
    body: list[dict[str, Any]],
203
) -> ConnexionResponse | tuple[None, int]:
204
    """learn from documents and return an empty 204 response if succesful"""
205
206
    try:
207
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
208
    except ValueError:
209
        return project_not_found_error(project_id)
210
211
    try:
212
        corpus = _documents_to_corpus(body, project.subjects)
213
        project.learn(corpus)
214
    except AnnifException as err:
215
        return server_error(err)
216
217
    return None, 204
218
219
220
def _reconcile(project_id: str, query: dict[str, Any]) -> list[dict[str, Any]]:
221
    document = [{"text": query["query"]}]
222
    parameters = {"limit": query["limit"]} if "limit" in query else {}
223
    result = _suggest(project_id, document, parameters)
224
225
    if _is_error(result):
226
        return result
227
228
    results = [
229
        {
230
            "id": res["uri"],
231
            "name": res["label"],
232
            "score": res["score"],
233
            "match": res["label"] == query["query"],
234
        }
235
        for res in result[0]["results"]
236
    ]
237
    return results
238
239
240
def reconcile_metadata(
241
    project_id: str, **query_parameters
242
) -> ConnexionResponse | dict[str, Any]:
243
    """return service manifest or reconcile against a project and return a dict
244
    with results formatted according to OpenAPI spec"""
245
246
    try:
247
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
248
    except ValueError:
249
        return project_not_found_error(project_id)
250
251
    if not query_parameters:
252
        return {
253
            "versions": ["0.2"],
254
            "name": "Annif Reconciliation Service for " + project.name,
255
            "identifierSpace": "",
256
            "schemaSpace": "http://www.w3.org/2004/02/skos/core#Concept",
257
            "view": {"url": "{{id}}"},
258
            "defaultTypes": [{"id": "default-type", "name": "Default type"}],
259
            "suggest": {
260
                "entity": {
261
                    "service_path": "/suggest/entity",
262
                    "service_url": "http://localhost:5000/v1/projects/"
263
                    + project_id
264
                    + "/reconcile",  # change to actual host url (how?)
265
                }
266
            },
267
        }
268
    else:
269
        queries = json.loads(query_parameters["queries"])
270
        results = {}
271
        for key, query in queries.items():
272
            data = _reconcile(project_id, query)
273
            if _is_error(data):
274
                return data
275
            results[key] = {"result": data}
276
277
        return results
278
279
280
def reconcile(
281
    project_id: str, body: dict[str, Any]
282
) -> ConnexionResponse | dict[str, Any]:
283
    """reconcile against a project and return a dict with results
284
    formatted according to OpenAPI spec"""
285
286
    queries = body["queries"]
287
    results = {}
288
    for key, query in queries.items():
289
        data = _reconcile(project_id, query)
290
        if _is_error(data):
291
            return data
292
        results[key] = {"result": data}
293
294
    return results
295
296
297
def reconcile_suggest(
298
    project_id: str, **query_parameters
299
) -> ConnexionResponse | dict[str, Any]:
300
    """suggest results for the given search term and return a dict with results
301
    formatted according to OpenAPI spec"""
302
303
    prefix = query_parameters.get("prefix")
304
    cursor = query_parameters.get("cursor") if query_parameters.get("cursor") else 0
305
    limit = cursor + 10
306
307
    result = _suggest(project_id, [{"text": prefix}], {"limit": limit})
308
    if _is_error(result):
309
        return result
310
311
    results = [{"id": res["uri"], "name": res["label"]} for res in result[0]["results"]]
312
    return {"result": results[cursor:]}
313