Passed
Push — reconciliation-api ( 994a70 )
by
unknown
07:27
created

annif.rest.reconcile_metadata()   A

Complexity

Conditions 3

Size

Total Lines 21
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 14
nop 2
dl 0
loc 21
rs 9.7
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the OpenAPI specification."""
3
from __future__ import annotations
4
5
import importlib
6
from typing import TYPE_CHECKING, Any
7
8
import connexion
9
10
import annif.registry
11
from annif.corpus import Document, DocumentList, SubjectSet
12
from annif.exception import AnnifException
13
from annif.project import Access
14
15
if TYPE_CHECKING:
16
    from datetime import datetime
17
18
    from connexion.lifecycle import ConnexionResponse
19
20
    from annif.corpus.subject import SubjectIndex
21
    from annif.suggestion import SubjectSuggestion, SuggestionResults
22
23
24
def project_not_found_error(project_id: str) -> ConnexionResponse:
25
    """return a Connexion error object when a project is not found"""
26
27
    return connexion.problem(
28
        status=404,
29
        title="Project not found",
30
        detail="Project '{}' not found".format(project_id),
31
    )
32
33
34
def server_error(
35
    err: AnnifException,
36
) -> ConnexionResponse:
37
    """return a Connexion error object when there is a server error (project
38
    or backend problem)"""
39
40
    return connexion.problem(
41
        status=503, title="Service unavailable", detail=err.format_message()
42
    )
43
44
45
def show_info() -> dict[str, str]:
46
    """return version of annif and a title for the api according to OpenAPI spec"""
47
48
    return {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
49
50
51
def language_not_supported_error(lang: str) -> ConnexionResponse:
52
    """return a Connexion error object when attempting to use unsupported language"""
53
54
    return connexion.problem(
55
        status=400,
56
        title="Bad Request",
57
        detail=f'language "{lang}" not supported by vocabulary',
58
    )
59
60
61
def list_projects() -> dict[str, list[dict[str, str | dict | bool | datetime | None]]]:
62
    """return a dict with projects formatted according to OpenAPI spec"""
63
64
    return {
65
        "projects": [
66
            proj.dump()
67
            for proj in annif.registry.get_projects(min_access=Access.public).values()
68
        ]
69
    }
70
71
72
def show_project(
73
    project_id: str,
74
) -> dict | ConnexionResponse:
75
    """return a single project formatted according to OpenAPI spec"""
76
77
    try:
78
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
79
    except ValueError:
80
        return project_not_found_error(project_id)
81
    return project.dump()
82
83
84
def _suggestion_to_dict(
85
    suggestion: SubjectSuggestion, subject_index: SubjectIndex, language: str
86
) -> dict[str, str | float | None]:
87
    subject = subject_index[suggestion.subject_id]
88
    return {
89
        "uri": subject.uri,
90
        "label": subject.labels[language],
91
        "notation": subject.notation,
92
        "score": suggestion.score,
93
    }
94
95
96
def _hit_sets_to_list(
97
    hit_sets: SuggestionResults, subjects: SubjectIndex, lang: str
98
) -> list[dict[str, list]]:
99
    return [
100
        {"results": [_suggestion_to_dict(hit, subjects, lang) for hit in hits]}
101
        for hits in hit_sets
102
    ]
103
104
105
def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool:
106
    return (
107
        isinstance(result, connexion.lifecycle.ConnexionResponse)
108
        and result.status_code >= 400
109
    )
110
111
112
def suggest(
113
    project_id: str, body: dict[str, Any]
114
) -> dict[str, list] | ConnexionResponse:
115
    """suggest subjects for the given text and return a dict with results
116
    formatted according to OpenAPI spec"""
117
118
    parameters = dict(
119
        (key, body[key]) for key in ["language", "limit", "threshold"] if key in body
120
    )
121
    documents = [{"text": body["text"]}]
122
    result = _suggest(project_id, documents, parameters)
123
124
    if _is_error(result):
125
        return result
126
    return result[0]
127
128
129
def suggest_batch(
130
    project_id: str,
131
    body: dict[str, list],
132
    **query_parameters,
133
) -> list[dict[str, Any]] | ConnexionResponse:
134
    """suggest subjects for the given documents and return a list of dicts with results
135
    formatted according to OpenAPI spec"""
136
137
    documents = body["documents"]
138
    result = _suggest(project_id, documents, query_parameters)
139
140
    if _is_error(result):
141
        return result
142
    for document_results, document in zip(result, documents):
143
        document_results["document_id"] = document.get("document_id")
144
    return result
145
146
147
def _suggest(
148
    project_id: str,
149
    documents: list[dict[str, str]],
150
    parameters: dict[str, Any],
151
) -> list[dict[str, list]] | ConnexionResponse:
152
    corpus = _documents_to_corpus(documents, subject_index=None)
153
    try:
154
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
155
    except ValueError:
156
        return project_not_found_error(project_id)
157
158
    try:
159
        lang = parameters.get("language") or project.vocab_lang
160
    except AnnifException as err:
161
        return server_error(err)
162
163
    if lang not in project.vocab.languages:
164
        return language_not_supported_error(lang)
165
166
    limit = parameters.get("limit", 10)
167
    threshold = parameters.get("threshold", 0.0)
168
169
    try:
170
        hit_sets = project.suggest_corpus(corpus).filter(limit, threshold)
171
    except AnnifException as err:
172
        return server_error(err)
173
174
    return _hit_sets_to_list(hit_sets, project.subjects, lang)
175
176
177
def _documents_to_corpus(
178
    documents: list[dict[str, Any]],
179
    subject_index: SubjectIndex | None,
180
) -> annif.corpus.document.DocumentList:
181
    if subject_index is not None:
182
        corpus = [
183
            Document(
184
                text=d["text"],
185
                subject_set=SubjectSet(
186
                    [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
187
                ),
188
            )
189
            for d in documents
190
            if "text" in d and "subjects" in d
191
        ]
192
    else:
193
        corpus = [
194
            Document(text=d["text"], subject_set=None) for d in documents if "text" in d
195
        ]
196
    return DocumentList(corpus)
197
198
199
def learn(
200
    project_id: str,
201
    body: list[dict[str, Any]],
202
) -> ConnexionResponse | tuple[None, int]:
203
    """learn from documents and return an empty 204 response if succesful"""
204
205
    try:
206
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
207
    except ValueError:
208
        return project_not_found_error(project_id)
209
210
    try:
211
        corpus = _documents_to_corpus(body, project.subjects)
212
        project.learn(corpus)
213
    except AnnifException as err:
214
        return server_error(err)
215
216
    return None, 204
217
218
219
def _reconcile(project_id: str, query: dict[str, Any]) -> dict[str, Any]:
220
    document = [{"text": query["query"]}]
221
    parameters = {"limit": query["limit"]} if "limit" in query else {}
222
    result = _suggest(project_id, document, parameters)
223
224
    if _is_error(result):
225
        return result
226
227
    results = [
228
        {
229
            "id": res["uri"],
230
            "name": res["label"],
231
            "score": res["score"],
232
            "match": res["label"] == query["query"],
233
        }
234
        for res in result[0]["results"]
235
    ]
236
    return results
237
238
239
def reconcile_metadata(
240
    project_id: str, **query_parameters
241
) -> ConnexionResponse | dict[str, Any]:
242
    """return service manifest or reconcile against a project and return a dict
243
    with results formatted according to OpenAPI spec"""
244
245
    try:
246
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
247
    except ValueError:
248
        return project_not_found_error(project_id)
249
250
    if not query_parameters:
251
        return {
252
            "versions": ["0.2"],
253
            "name": "Annif Reconciliation Service for " + project.name,
254
            "identifierSpace": "",
255
            "schemaSpace": "",
256
            "view": {"url": "{{id}}"},
257
        }
258
    else:
259
        return {}
260
261
262
def reconcile(
263
    project_id: str, body: dict[str, Any]
264
) -> ConnexionResponse | dict[str, Any]:
265
    """reconcile against a project and return a dict with results
266
    formatted according to OpenAPI spec"""
267
268
    queries = body["queries"]
269
    results = {}
270
    for key, query in queries.items():
271
        data = _reconcile(project_id, query)
272
        if _is_error(data):
273
            return data
274
        results[key] = {"result": data}
275
276
    return results
277