Passed
Pull Request — master (#659)
by
unknown
03:39
created

annif.rest.learn()   A

Complexity

Conditions 3

Size

Total Lines 15
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 11
nop 2
dl 0
loc 15
rs 9.85
c 0
b 0
f 0
1
"""Definitions for REST API operations. These are wired via Connexion to
2
methods defined in the Swagger specification."""
3
4
import connexion
5
from simplemma.langdetect import lang_detector
6
7
import annif.registry
8
from annif.corpus import Document, DocumentList, SubjectSet
9
from annif.exception import AnnifException
10
from annif.project import Access
11
from annif.suggestion import SuggestionFilter
12
13
14
def project_not_found_error(project_id):
15
    """return a Connexion error object when a project is not found"""
16
17
    return connexion.problem(
18
        status=404,
19
        title="Project not found",
20
        detail="Project '{}' not found".format(project_id),
21
    )
22
23
24
def server_error(err):
25
    """return a Connexion error object when there is a server error (project
26
    or backend problem)"""
27
28
    return connexion.problem(
29
        status=503, title="Service unavailable", detail=err.format_message()
30
    )
31
32
33
def list_projects():
34
    """return a dict with projects formatted according to Swagger spec"""
35
36
    return {
37
        "projects": [
38
            proj.dump()
39
            for proj in annif.registry.get_projects(min_access=Access.public).values()
40
        ]
41
    }
42
43
44
def show_project(project_id):
45
    """return a single project formatted according to Swagger spec"""
46
47
    try:
48
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
49
    except ValueError:
50
        return project_not_found_error(project_id)
51
    return project.dump()
52
53
54
def detect_language(body):
55
    """return scores for detected languages formatted according to Swagger spec"""
56
57
    scores = lang_detector(body.get("text"), tuple(body.get("candidates")))
58
    return {
59
        "results": [
60
            {"language": s[0] if s[0] != "unk" else None, "score": s[1]}
61
            for s in scores
62
        ]
63
    }
64
65
66
def _suggestion_to_dict(suggestion, subject_index, language):
67
    subject = subject_index[suggestion.subject_id]
68
    return {
69
        "uri": subject.uri,
70
        "label": subject.labels[language],
71
        "notation": subject.notation,
72
        "score": suggestion.score,
73
    }
74
75
76
def suggest(project_id, body):
77
    """suggest subjects for the given text and return a dict with results
78
    formatted according to Swagger spec"""
79
80
    try:
81
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
82
    except ValueError:
83
        return project_not_found_error(project_id)
84
85
    try:
86
        lang = body.get("language") or project.vocab_lang
87
    except AnnifException as err:
88
        return server_error(err)
89
90
    if lang not in project.vocab.languages:
91
        return connexion.problem(
92
            status=400,
93
            title="Bad Request",
94
            detail=f'language "{lang}" not supported by vocabulary',
95
        )
96
97
    limit = body.get("limit", 10)
98
    threshold = body.get("threshold", 0.0)
99
100
    try:
101
        hit_filter = SuggestionFilter(project.subjects, limit, threshold)
102
        result = project.suggest(body["text"])
103
    except AnnifException as err:
104
        return server_error(err)
105
106
    hits = hit_filter(result).as_list()
107
    return {
108
        "results": [_suggestion_to_dict(hit, project.subjects, lang) for hit in hits]
109
    }
110
111
112
def _documents_to_corpus(documents, subject_index):
113
    corpus = [
114
        Document(
115
            text=d["text"],
116
            subject_set=SubjectSet(
117
                [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]]
118
            ),
119
        )
120
        for d in documents
121
        if "text" in d and "subjects" in d
122
    ]
123
    return DocumentList(corpus)
124
125
126
def learn(project_id, body):
127
    """learn from documents and return an empty 204 response if succesful"""
128
129
    try:
130
        project = annif.registry.get_project(project_id, min_access=Access.hidden)
131
    except ValueError:
132
        return project_not_found_error(project_id)
133
134
    try:
135
        corpus = _documents_to_corpus(body, project.subjects)
136
        project.learn(corpus)
137
    except AnnifException as err:
138
        return server_error(err)
139
140
    return None, 204
141