| 1 |  |  | """Definitions for REST API operations. These are wired via Connexion to | 
            
                                                        
            
                                    
            
            
                | 2 |  |  | methods defined in the OpenAPI specification.""" | 
            
                                                        
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 4 |  |  | from __future__ import annotations | 
            
                                                        
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 6 |  |  | import importlib | 
            
                                                        
            
                                    
            
            
                | 7 |  |  | from typing import TYPE_CHECKING, Any | 
            
                                                        
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 9 |  |  | import connexion | 
            
                                                        
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 11 |  |  | import annif.registry | 
            
                                                        
            
                                    
            
            
                | 12 |  |  | import annif.simplemma_util | 
            
                                                        
            
                                    
            
            
                | 13 |  |  | from annif.corpus import Document, DocumentList, SubjectSet | 
            
                                                        
            
                                    
            
            
                | 14 |  |  | from annif.exception import AnnifException, NotEnabledException | 
            
                                                        
            
                                    
            
            
                | 15 |  |  | from annif.project import Access | 
            
                                                        
            
                                    
            
            
                | 16 |  |  | from annif.util import suggestion_results_to_list | 
            
                                                        
            
                                    
            
            
                | 17 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 18 |  |  | if TYPE_CHECKING: | 
            
                                                        
            
                                    
            
            
                | 19 |  |  |     from connexion.lifecycle import ConnexionResponse | 
            
                                                        
            
                                    
            
            
                | 20 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 21 |  |  |     from annif.corpus.subject import SubjectIndex | 
            
                                                        
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 23 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 24 |  |  | def project_not_found_error(project_id: str) -> ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 25 |  |  |     """return a Connexion error object when a project is not found""" | 
            
                                                        
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 27 |  |  |     return connexion.problem( | 
            
                                                        
            
                                    
            
            
                | 28 |  |  |         status=404, | 
            
                                                        
            
                                    
            
            
                | 29 |  |  |         title="Project not found", | 
            
                                                        
            
                                    
            
            
                | 30 |  |  |         detail="Project '{}' not found".format(project_id), | 
            
                                                        
            
                                    
            
            
                | 31 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 33 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 34 |  |  | def learning_not_enabled_error(project_id) -> ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 35 |  |  |     """return a Connexion error object when a project is not configured for learning""" | 
            
                                                        
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 37 |  |  |     return connexion.problem( | 
            
                                                        
            
                                    
            
            
                | 38 |  |  |         status=403, | 
            
                                                        
            
                                    
            
            
                | 39 |  |  |         title="Learning not allowed", | 
            
                                                        
            
                                    
            
            
                | 40 |  |  |         detail=f"Project '{project_id}' is not configured to allow learning via API", | 
            
                                                        
            
                                    
            
            
                | 41 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 43 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 44 |  |  | def server_error( | 
            
                                                        
            
                                    
            
            
                | 45 |  |  |     err: AnnifException, | 
            
                                                        
            
                                    
            
            
                | 46 |  |  | ) -> ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 47 |  |  |     """return a Connexion error object when there is a server error (project | 
            
                                                        
            
                                    
            
            
                | 48 |  |  |     or backend problem)""" | 
            
                                                        
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 50 |  |  |     return connexion.problem( | 
            
                                                        
            
                                    
            
            
                | 51 |  |  |         status=503, title="Service unavailable", detail=err.format_message() | 
            
                                                        
            
                                    
            
            
                | 52 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 54 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 55 |  |  | def show_info() -> tuple: | 
            
                                                        
            
                                    
            
            
                | 56 |  |  |     """return version of annif and a title for the api according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 58 |  |  |     result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")} | 
            
                                                        
            
                                    
            
            
                | 59 |  |  |     return result, 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 60 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 62 |  |  | def language_not_supported_error(lang: str) -> ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 63 |  |  |     """return a Connexion error object when attempting to use unsupported language""" | 
            
                                                        
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 65 |  |  |     return connexion.problem( | 
            
                                                        
            
                                    
            
            
                | 66 |  |  |         status=400, | 
            
                                                        
            
                                    
            
            
                | 67 |  |  |         title="Bad Request", | 
            
                                                        
            
                                    
            
            
                | 68 |  |  |         detail=f'language "{lang}" not supported by vocabulary', | 
            
                                                        
            
                                    
            
            
                | 69 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 70 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 72 |  |  | def list_vocabs() -> tuple: | 
            
                                                        
            
                                    
            
            
                | 73 |  |  |     """return a dict with vocabularies formatted according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 74 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 75 |  |  |     result = { | 
            
                                                        
            
                                    
            
            
                | 76 |  |  |         "vocabs": [ | 
            
                                                        
            
                                    
            
            
                | 77 |  |  |             vocab.dump() | 
            
                                                        
            
                                    
            
            
                | 78 |  |  |             for vocab in annif.registry.get_vocabs(min_access=Access.public).values() | 
            
                                                        
            
                                    
            
            
                | 79 |  |  |         ] | 
            
                                                        
            
                                    
            
            
                | 80 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 81 |  |  |     return result, 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 82 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 84 |  |  | def list_projects() -> tuple: | 
            
                                                        
            
                                    
            
            
                | 85 |  |  |     """return a dict with projects formatted according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 86 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 87 |  |  |     result = { | 
            
                                                        
            
                                    
            
            
                | 88 |  |  |         "projects": [ | 
            
                                                        
            
                                    
            
            
                | 89 |  |  |             proj.dump() | 
            
                                                        
            
                                    
            
            
                | 90 |  |  |             for proj in annif.registry.get_projects(min_access=Access.public).values() | 
            
                                                        
            
                                    
            
            
                | 91 |  |  |         ] | 
            
                                                        
            
                                    
            
            
                | 92 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 93 |  |  |     return result, 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 94 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 95 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 96 |  |  | def show_project( | 
            
                                                        
            
                                    
            
            
                | 97 |  |  |     project_id: str, | 
            
                                                        
            
                                    
            
            
                | 98 |  |  | ) -> dict | ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 99 |  |  |     """return a single project formatted according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 101 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 102 |  |  |         project = annif.registry.get_project(project_id, min_access=Access.hidden) | 
            
                                                        
            
                                    
            
            
                | 103 |  |  |     except ValueError: | 
            
                                                        
            
                                    
            
            
                | 104 |  |  |         return project_not_found_error(project_id) | 
            
                                                        
            
                                    
            
            
                | 105 |  |  |     return project.dump(), 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 108 |  |  | def detect_language(body: dict[str, Any]): | 
            
                                                        
            
                                    
            
            
                | 109 |  |  |     """return scores for detected languages formatted according to Swagger spec""" | 
            
                                                        
            
                                    
            
            
                | 110 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 111 |  |  |     text = body.get("text") | 
            
                                                        
            
                                    
            
            
                | 112 |  |  |     languages = body.get("languages") | 
            
                                                        
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 114 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 115 |  |  |         proportions = annif.simplemma_util.detect_language(text, tuple(languages)) | 
            
                                                        
            
                                    
            
            
                | 116 |  |  |     except ValueError: | 
            
                                                        
            
                                    
            
            
                | 117 |  |  |         return connexion.problem( | 
            
                                                        
            
                                    
            
            
                | 118 |  |  |             status=400, | 
            
                                                        
            
                                    
            
            
                | 119 |  |  |             title="Bad Request", | 
            
                                                        
            
                                    
            
            
                | 120 |  |  |             detail="unsupported candidate languages", | 
            
                                                        
            
                                    
            
            
                | 121 |  |  |         ) | 
            
                                                        
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 123 |  |  |     result = { | 
            
                                                        
            
                                    
            
            
                | 124 |  |  |         "results": [ | 
            
                                                        
            
                                    
            
            
                | 125 |  |  |             {"language": lang if lang != "unk" else None, "score": score} | 
            
                                                        
            
                                    
            
            
                | 126 |  |  |             for lang, score in proportions.items() | 
            
                                                        
            
                                    
            
            
                | 127 |  |  |         ] | 
            
                                                        
            
                                    
            
            
                | 128 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 129 |  |  |     return result, 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 132 |  |  | def _is_error(result: list[dict[str, list]] | ConnexionResponse) -> bool: | 
            
                                                        
            
                                    
            
            
                | 133 |  |  |     return ( | 
            
                                                        
            
                                    
            
            
                | 134 |  |  |         isinstance(result, connexion.lifecycle.ConnexionResponse) | 
            
                                                        
            
                                    
            
            
                | 135 |  |  |         and result.status_code >= 400 | 
            
                                                        
            
                                    
            
            
                | 136 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 137 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 138 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 139 |  |  | def suggest( | 
            
                                                        
            
                                    
            
            
                | 140 |  |  |     project_id: str, body: dict[str, Any] | 
            
                                                        
            
                                    
            
            
                | 141 |  |  | ) -> dict[str, list] | ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 142 |  |  |     """suggest subjects for the given text and return a dict with results | 
            
                                                        
            
                                    
            
            
                | 143 |  |  |     formatted according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 145 |  |  |     parameters = dict( | 
            
                                                        
            
                                    
            
            
                | 146 |  |  |         (key, body[key]) for key in ["language", "limit", "threshold"] if key in body | 
            
                                                        
            
                                    
            
            
                | 147 |  |  |     ) | 
            
                                                        
            
                                    
            
            
                | 148 |  |  |     metadata = { | 
            
                                                        
            
                                    
            
            
                | 149 |  |  |         key[len("metadata_") :]: value | 
            
                                                        
            
                                    
            
            
                | 150 |  |  |         for key, value in body.items() | 
            
                                                        
            
                                    
            
            
                | 151 |  |  |         if key.startswith("metadata_") | 
            
                                                        
            
                                    
            
            
                | 152 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 153 |  |  |     documents = [{"text": body["text"], "metadata": metadata}] | 
            
                                                        
            
                                    
            
            
                | 154 |  |  |     result = _suggest(project_id, documents, parameters) | 
            
                                                        
            
                                    
            
            
                | 155 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 156 |  |  |     if _is_error(result): | 
            
                                                        
            
                                    
            
            
                | 157 |  |  |         return result | 
            
                                                        
            
                                    
            
            
                | 158 |  |  |     return result[0], 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 159 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 160 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 161 |  |  | def suggest_batch( | 
            
                                                        
            
                                    
            
            
                | 162 |  |  |     project_id: str, | 
            
                                                        
            
                                    
            
            
                | 163 |  |  |     body: dict[str, list], | 
            
                                                        
            
                                    
            
            
                | 164 |  |  |     **query_parameters, | 
            
                                                        
            
                                    
            
            
                | 165 |  |  | ) -> list[dict[str, Any]] | ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 166 |  |  |     """suggest subjects for the given documents and return a list of dicts with results | 
            
                                                        
            
                                    
            
            
                | 167 |  |  |     formatted according to OpenAPI spec""" | 
            
                                                        
            
                                    
            
            
                | 168 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 169 |  |  |     documents = body["documents"] | 
            
                                                        
            
                                    
            
            
                | 170 |  |  |     result = _suggest(project_id, documents, query_parameters) | 
            
                                                        
            
                                    
            
            
                | 171 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 172 |  |  |     if _is_error(result): | 
            
                                                        
            
                                    
            
            
                | 173 |  |  |         return result | 
            
                                                        
            
                                    
            
            
                | 174 |  |  |     for document_results, document in zip(result, documents): | 
            
                                                        
            
                                    
            
            
                | 175 |  |  |         document_results["document_id"] = document.get("document_id") | 
            
                                                        
            
                                    
            
            
                | 176 |  |  |     return result, 200, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 177 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 178 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 179 |  |  | def _suggest( | 
            
                                                        
            
                                    
            
            
                | 180 |  |  |     project_id: str, | 
            
                                                        
            
                                    
            
            
                | 181 |  |  |     documents: list[dict[str, str]], | 
            
                                                        
            
                                    
            
            
                | 182 |  |  |     parameters: dict[str, Any], | 
            
                                                        
            
                                    
            
            
                | 183 |  |  | ) -> list[dict[str, list]] | ConnexionResponse: | 
            
                                                        
            
                                    
            
            
                | 184 |  |  |     corpus = _documents_to_corpus(documents, subject_index=None) | 
            
                                                        
            
                                    
            
            
                | 185 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 186 |  |  |         project = annif.registry.get_project(project_id, min_access=Access.hidden) | 
            
                                                        
            
                                    
            
            
                | 187 |  |  |     except ValueError: | 
            
                                                        
            
                                    
            
            
                | 188 |  |  |         return project_not_found_error(project_id) | 
            
                                                        
            
                                    
            
            
                | 189 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 190 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 191 |  |  |         lang = parameters.get("language") or project.vocab_lang | 
            
                                                        
            
                                    
            
            
                | 192 |  |  |     except AnnifException as err: | 
            
                                                        
            
                                    
            
            
                | 193 |  |  |         return server_error(err) | 
            
                                                        
            
                                    
            
            
                | 194 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 195 |  |  |     if lang not in project.vocab.languages: | 
            
                                                        
            
                                    
            
            
                | 196 |  |  |         return language_not_supported_error(lang) | 
            
                                                        
            
                                    
            
            
                | 197 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 198 |  |  |     limit = parameters.get("limit", 10) | 
            
                                                        
            
                                    
            
            
                | 199 |  |  |     threshold = parameters.get("threshold", 0.0) | 
            
                                                        
            
                                    
            
            
                | 200 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 201 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 202 |  |  |         suggestion_results = project.suggest_corpus(corpus).filter(limit, threshold) | 
            
                                                        
            
                                    
            
            
                | 203 |  |  |     except AnnifException as err: | 
            
                                                        
            
                                    
            
            
                | 204 |  |  |         return server_error(err) | 
            
                                                        
            
                                    
            
            
                | 205 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 206 |  |  |     return suggestion_results_to_list(suggestion_results, project.subjects, lang) | 
            
                                                        
            
                                    
            
            
                | 207 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 208 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 209 |  |  | def _documents_to_corpus( | 
            
                                                        
            
                                    
            
            
                | 210 |  |  |     documents: list[dict[str, Any]], | 
            
                                                        
            
                                    
            
            
                | 211 |  |  |     subject_index: SubjectIndex | None, | 
            
                                                        
            
                                    
            
            
                | 212 |  |  | ) -> annif.corpus.document.DocumentList: | 
            
                                                        
            
                                    
            
            
                | 213 |  |  |     if subject_index is not None: | 
            
                                                        
            
                                    
            
            
                | 214 |  |  |         corpus = [ | 
            
                                                        
            
                                    
            
            
                | 215 |  |  |             Document( | 
            
                                                        
            
                                    
            
            
                | 216 |  |  |                 text=d["text"], | 
            
                                                        
            
                                    
            
            
                | 217 |  |  |                 subject_set=SubjectSet( | 
            
                                                        
            
                                    
            
            
                | 218 |  |  |                     [subject_index.by_uri(subj["uri"]) for subj in d["subjects"]] | 
            
                                                        
            
                                    
            
            
                | 219 |  |  |                 ), | 
            
                                                        
            
                                    
            
            
                | 220 |  |  |                 metadata=d.get("metadata", {}), | 
            
                                                        
            
                                    
            
            
                | 221 |  |  |             ) | 
            
                                                        
            
                                    
            
            
                | 222 |  |  |             for d in documents | 
            
                                                        
            
                                    
            
            
                | 223 |  |  |             if "text" in d and "subjects" in d | 
            
                                                        
            
                                    
            
            
                | 224 |  |  |         ] | 
            
                                                        
            
                                    
            
            
                | 225 |  |  |     else: | 
            
                                                        
            
                                    
            
            
                | 226 |  |  |         corpus = [ | 
            
                                                        
            
                                    
            
            
                | 227 |  |  |             Document(text=d["text"], subject_set=None, metadata=d.get("metadata", {})) | 
            
                                                        
            
                                    
            
            
                | 228 |  |  |             for d in documents | 
            
                                                        
            
                                    
            
            
                | 229 |  |  |             if "text" in d | 
            
                                                        
            
                                    
            
            
                | 230 |  |  |         ] | 
            
                                                        
            
                                    
            
            
                | 231 |  |  |     return DocumentList(corpus) | 
            
                                                        
            
                                    
            
            
                | 232 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 233 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 234 |  |  | def learn( | 
            
                                                        
            
                                    
            
            
                | 235 |  |  |     project_id: str, | 
            
                                                        
            
                                    
            
            
                | 236 |  |  |     body: list[dict[str, Any]], | 
            
                                                        
            
                                    
            
            
                | 237 |  |  | ) -> ConnexionResponse | tuple[None, int]: | 
            
                                                        
            
                                    
            
            
                | 238 |  |  |     """learn from documents and return an empty 204 response if succesful""" | 
            
                                                        
            
                                    
            
            
                | 239 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 240 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 241 |  |  |         project = annif.registry.get_project(project_id, min_access=Access.hidden) | 
            
                                                        
            
                                    
            
            
                | 242 |  |  |     except ValueError: | 
            
                                                        
            
                                    
            
            
                | 243 |  |  |         return project_not_found_error(project_id) | 
            
                                                        
            
                                    
            
            
                | 244 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 245 |  |  |     try: | 
            
                                                        
            
                                    
            
            
                | 246 |  |  |         corpus = _documents_to_corpus(body, project.subjects) | 
            
                                                        
            
                                    
            
            
                | 247 |  |  |         project.learn(corpus) | 
            
                                                        
            
                                    
            
            
                | 248 |  |  |     except NotEnabledException: | 
            
                                                        
            
                                    
            
            
                | 249 |  |  |         return learning_not_enabled_error(project_id) | 
            
                                                        
            
                                    
            
            
                | 250 |  |  |     except AnnifException as err: | 
            
                                                        
            
                                    
            
            
                | 251 |  |  |         return server_error(err) | 
            
                                                        
            
                                    
            
            
                | 252 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 253 |  |  |     return None, 204, {"Content-Type": "application/json"} | 
            
                                                        
            
                                    
            
            
                | 254 |  |  |  |