Passed
Pull Request — master (#966)
by Konstantin
02:36
created

ocrd.mets_server.ClientSideOcrdMets.add_file()   A

Complexity

Conditions 1

Size

Total Lines 22
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 22
rs 9.376
c 0
b 0
f 0
cc 1
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    url : Union[str, None] = Field()
36
    local_filename : Union[str, None] = Field()
37
38
    @staticmethod
39
    def create(file_grp : str, file_id : str, page_id : Union[str, None], url : str, local_filename : str, mimetype : str):
40
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
41
42
class OcrdAgentModel(BaseModel):
43
    name : str = Field()
44
    _type : str = Field()
45
    role : str = Field()
46
    otherrole : Optional[str] = Field()
47
    othertype : str = Field()
48
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
49
50
    @staticmethod
51
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
52
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
53
54
55
class OcrdFileListModel(BaseModel):
56
    files : List[OcrdFileModel] = Field()
57
58
    @staticmethod
59
    def create(files : List[OcrdFile]):
60
        return OcrdFileListModel(
61
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, url=f.url, local_filename=f.local_filename) for f in files]
62
        )
63
64
class OcrdFileGroupListModel(BaseModel):
65
    file_groups : List[str] = Field()
66
67
    @staticmethod
68
    def create(file_groups : List[str]):
69
        return OcrdFileGroupListModel(file_groups=file_groups)
70
71
class OcrdAgentListModel(BaseModel):
72
    agents : List[OcrdAgentModel] = Field()
73
74
    @staticmethod
75
    def create(agents : List[OcrdAgent]):
76
        return OcrdAgentListModel(
77
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
78
        )
79
80
#
81
# Client
82
#
83
84
85
class ClientSideOcrdMets():
86
    """
87
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
88
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
89
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
90
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
91
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
92
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
93
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
94
    """
95
96
    def __init__(self, url):
97
        protocol = 'tcp' if url.startswith('http://') else 'uds'
98
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
99
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
100
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
101
102
    def __getattr__(self, name):
103
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
104
105
    def __str__(self):
106
        return f'<ClientSideOcrdMets[url={self.url}]>'
107
108
    @deprecated_alias(ID="file_id")
109
    @deprecated_alias(pageId="page_id")
110
    @deprecated_alias(fileGrp="file_grp")
111
    def find_files(self, **kwargs):
112
        if 'pageId' in kwargs:
113
            kwargs['page_id'] = kwargs.pop('pageId')
114
        if 'ID' in kwargs:
115
            kwargs['file_id'] = kwargs.pop('ID')
116
        if 'fileGrp' in kwargs:
117
            kwargs['file_grp'] = kwargs.pop('fileGrp')
118
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
119
        for f in r.json()['files']:
120
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], local_filename=f['local_filename'], mimetype=f['mimetype'])
121
122
    def find_all_files(self, *args, **kwargs):
123
        return list(self.find_files(*args, **kwargs))
124
125
    def add_agent(self, *args, **kwargs):
126
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
127
128
    @property
129
    def agents(self):
130
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
131
132
    @property
133
    def unique_identifier(self):
134
        return self.session.request('GET', f'{self.url}/unique_identifier').text
135
136
    @property
137
    def file_groups(self):
138
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
139
140
    @deprecated_alias(pageId="page_id")
141
    @deprecated_alias(ID="file_id")
142
    def add_file(self, file_grp, content=None, file_id=None, url=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
143
        self.session.request(
144
            'POST',
145
            f'{self.url}/file',
146
            data=OcrdFileModel.create(
147
                file_id=file_id,
148
                file_grp=file_grp,
149
                page_id=page_id,
150
                mimetype=mimetype,
151
                url=url,
152
                local_filename=local_filename).dict(),
153
        )
154
        return ClientSideOcrdFile(
155
                None,
156
                ID=file_id,
157
                fileGrp=file_grp,
158
                url=url,
159
                pageId=page_id,
160
                mimetype=mimetype,
161
                local_filename=local_filename)
162
163
164
    def save(self):
165
        self.session.request('PUT', self.url)
166
167
    def stop(self):
168
        self.session.request('DELETE', self.url)
169
170
#
171
# Server
172
#
173
174
class OcrdMetsServer():
175
176
    def __init__(self, workspace, url):
177
        self.workspace = workspace
178
        self.url = url
179
        self.log = getLogger('ocrd.workspace_client')
180
181
    def shutdown(self):
182
        _exit(0)
183
184
    def startup(self):
185
186
        workspace = self.workspace
187
188
        app = FastAPI(
189
            title="OCR-D METS Server",
190
            description="Providing simultaneous write-access to mets.xml for OCR-D",
191
        )
192
193
        @app.exception_handler(ValidationError)
194
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
195
            return JSONResponse(status_code=400, content=exc.errors())
196
197
        @app.exception_handler(FileExistsError)
198
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
199
            return JSONResponse(status_code=400, content=str(exc))
200
201
        @app.exception_handler(re.error)
202
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
203
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
204
205
        @app.get("/file", response_model=OcrdFileListModel)
206
        async def find_files(
207
            file_grp : Union[str, None] = None,
208
            file_id : Union[str, None] = None,
209
            page_id : Union[str, None] = None,
210
            mimetype : Union[str, None] = None,
211
        ):
212
            """
213
            Find files in the mets
214
            """
215
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
216
            return OcrdFileListModel.create(found)
217
218
        @app.put('/')
219
        def save():
220
            return workspace.save_mets()
221
222
        @app.post('/file', response_model=OcrdFileModel)
223
        async def add_file(
224
            file_grp : str = Form(),
225
            file_id : str = Form(),
226
            page_id : Union[str, None] = Form(),
227
            mimetype : str = Form(),
228
            url : Union[str, None] = Form(),
229
            local_filename : Union[str, None] = Form(),
230
        ):
231
            """
232
            Add a file
233
            """
234
            # Validate
235
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
236
            # Add to workspace
237
            kwargs = file_resource.dict()
238
            workspace.add_file(**kwargs)
239
            return file_resource
240
241
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
242
        async def file_groups():
243
            return {'file_groups': workspace.mets.file_groups}
244
245
        @app.post('/agent', response_model=OcrdAgentModel)
246
        async def add_agent(agent : OcrdAgentModel):
247
            kwargs = agent.dict()
248
            workspace.mets.add_agent(**kwargs)
249
            return agent
250
251
        @app.get('/agent', response_model=OcrdAgentListModel)
252
        async def agents():
253
            return OcrdAgentListModel.create(workspace.mets.agents)
254
255
        @app.get('/unique_identifier', response_model=str)
256
        async def unique_identifier():
257
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
258
259
        @app.delete('/')
260
        async def stop():
261
            """
262
            Stop the server
263
            """
264
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
265
            workspace.save_mets()
266
            # os._exit because uvicorn catches SystemExit raised by sys.exit
267
            _exit(0)
268
269
        if self.url.startswith('http'):
270
            parsed = urlparse(self.url)
271
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
272
        else:
273
            uvicorn_kwargs = {'uds': self.url}
274
        uvicorn.run(app, **uvicorn_kwargs)
275