Passed
Pull Request — master (#663)
by Juho
02:51
created

annif.rest.suggest_batch()   B

Complexity

Conditions 5

Size

Total Lines 49
Code Lines 33

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 33
nop 2
dl 0
loc 49
rs 8.6213
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the Swagger specification."""
3
4
import importlib
5
6
import connexion
7
8
import annif.registry
9
from annif.corpus import Document, DocumentList, SubjectSet
10
from annif.exception import AnnifException
11
from annif.project import Access
12
from annif.suggestion import SuggestionFilter
13
14
15
def project_not_found_error(project_id):
16
    """return a Connexion error object when a project is not found"""
17
18
    return connexion.problem(
19
        status=404,
20
        title="Project not found",
21
        detail="Project '{}' not found".format(project_id),
22
    )
23
24
25
def server_error(err):
26
    """return a Connexion error object when there is a server error (project
27
    or backend problem)"""
28
29
    return connexion.problem(
30
        status=503, title="Service unavailable", detail=err.format_message()
31
    )
32
33
34
def show_info():
35
    """return version of annif and a title for the api according to Swagger spec"""
36
37
    return {"title": "Annif REST API", "version": importlib.metadata.version("annif")}
38
39
40
def list_projects():
41
    """return a dict with projects formatted according to Swagger spec"""
42
43
    return {
44
        "projects": [
45
            proj.dump()
46
            for proj in annif.registry.get_projects(min_access=Access.public).values()
47
        ]
48
    }
49
50
51
def show_project(project_id):
52
    """return a single project formatted according to Swagger spec"""
53
54
    try:
55
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
56
    except ValueError:
57
        return project_not_found_error(project_id)
58
    return project.dump()
59
60
61
def _suggestion_to_dict(suggestion, subject_index, language):
62
    subject = subject_index[suggestion.subject_id]
63
    return {
64
        "uri": subject.uri,
65
        "label": subject.labels[language],
66
        "notation": subject.notation,
67
        "score": suggestion.score,
68
    }
69
70
71
def suggest(project_id, body):
72
    """suggest subjects for the given text and return a dict with results
73
    formatted according to Swagger spec"""
74
75
    try:
76
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
77
    except ValueError:
78
        return project_not_found_error(project_id)
79
80
    try:
81
        lang = body.get("language") or project.vocab_lang
82
    except AnnifException as err:
83
        return server_error(err)
84
85
    if lang not in project.vocab.languages:
86
        return connexion.problem(
87
            status=400,
88
            title="Bad Request",
89
            detail=f'language "{lang}" not supported by vocabulary',
90
        )
91
92
    limit = body.get("limit", 10)
93
    threshold = body.get("threshold", 0.0)
94
95
    try:
96
        hit_filter = SuggestionFilter(project.subjects, limit, threshold)
97
        result = project.suggest(body["text"])
98
    except AnnifException as err:
99
        return server_error(err)
100
101
    hits = hit_filter(result).as_list()
102
    return {
103
        "results": [_suggestion_to_dict(hit, project.subjects, lang) for hit in hits]
104
    }
105
106
107
def suggest_batch(project_id, body):
108
    """suggest subjects for the given documents and return a list of dicts with results
109
    formatted according to Swagger spec"""
110
111
    try:
112
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
113
    except ValueError:
114
        return project_not_found_error(project_id)
115
116
    parameters = body.get("parameters", {})
117
    try:
118
        lang = parameters.get("language") or project.vocab_lang
119
    except AnnifException as err:
120
        return server_error(err)
121
122
    if lang not in project.vocab.languages:
123
        return connexion.problem(
124
            status=400,
125
            title="Bad Request",
126
            detail=f'language "{lang}" not supported by vocabulary',
127
        )
128
129
    limit = parameters.get("limit", 10)
130
    threshold = parameters.get("threshold", 0.0)
131
132
    documents = DocumentList(
133
        [
134
            Document(
135
                text=d["text"],
136
                subject_set=None,
137
            )
138
            for d in body["documents"]
139
        ]
140
    )
141
142
    try:
143
        hit_filter = SuggestionFilter(project.subjects, limit, threshold)
144
        hit_sets = project.suggest_batch(documents)
145
    except AnnifException as err:
146
        return server_error(err)
147
148
    return [
149
        {
150
            "results": [
151
                _suggestion_to_dict(hit, project.subjects, lang)
152
                for hit in hit_filter(hits).as_list()
153
            ]
154
        }
155
        for hits in hit_sets
156
    ]
157
158
159
def _documents_to_corpus(documents, subject_index):
160
    corpus = [
161
        Document(
162
            text=d["text"],
163
            subject_set=SubjectSet(
164
                [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
165
            ),
166
        )
167
        for d in documents
168
        if "text" in d and "subjects" in d
169
    ]
170
    return DocumentList(corpus)
171
172
173
def learn(project_id, body):
174
    """learn from documents and return an empty 204 response if succesful"""
175
176
    try:
177
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
178
    except ValueError:
179
        return project_not_found_error(project_id)
180
181
    try:
182
        corpus = _documents_to_corpus(body, project.subjects)
183
        project.learn(corpus)
184
    except AnnifException as err:
185
        return server_error(err)
186
187
    return None, 204
188