Passed
Pull Request — master (#966)
by Konstantin
03:18
created

ocrd.mets_server.ClientSideOcrdMets.add_file()   A

Complexity

Conditions 1

Size

Total Lines 22
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 21
dl 0
loc 22
rs 9.376
c 0
b 0
f 0
cc 1
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    url : Union[str, None] = Field()
36
    local_filename : Union[str, None] = Field()
37
38
    @staticmethod
39
    def create(file_grp : str, file_id : str, page_id : Union[str, None], url : str, local_filename : str, mimetype : str):
40
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
41
42
class OcrdAgentModel(BaseModel):
43
    name : str = Field()
44
    _type : str = Field()
45
    role : str = Field()
46
    otherrole : Optional[str] = Field()
47
    othertype : str = Field()
48
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
49
50
    @staticmethod
51
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
52
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
53
54
55
class OcrdFileListModel(BaseModel):
56
    files : List[OcrdFileModel] = Field()
57
58
    @staticmethod
59
    def create(files : List[OcrdFile]):
60
        return OcrdFileListModel(
61
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, url=f.url, local_filename=f.local_filename) for f in files]
62
        )
63
64
class OcrdFileGroupListModel(BaseModel):
65
    file_groups : List[str] = Field()
66
67
    @staticmethod
68
    def create(file_groups : List[str]):
69
        return OcrdFileGroupListModel(file_groups=file_groups)
70
71
class OcrdAgentListModel(BaseModel):
72
    agents : List[OcrdAgentModel] = Field()
73
74
    @staticmethod
75
    def create(agents : List[OcrdAgent]):
76
        return OcrdAgentListModel(
77
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
78
        )
79
80
#
81
# Client
82
#
83
84
85
class ClientSideOcrdMets():
86
    """
87
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
88
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
89
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
90
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
91
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
92
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
93
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
94
    """
95
96
    def __init__(self, url):
97
        protocol = 'tcp' if url.startswith('http://') else 'uds'
98
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
99
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
100
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
101
102
    def __getattr__(self, name):
103
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
104
105
    def __str__(self):
106
        return f'<ClientSideOcrdMets[url={self.url}]>'
107
108
    @property
109
    def workspace_path(self):
110
        return self.session.request('GET', f'{self.url}/workspace_path').text
111
112
    @deprecated_alias(ID="file_id")
113
    @deprecated_alias(pageId="page_id")
114
    @deprecated_alias(fileGrp="file_grp")
115
    def find_files(self, **kwargs):
116
        if 'pageId' in kwargs:
117
            kwargs['page_id'] = kwargs.pop('pageId')
118
        if 'ID' in kwargs:
119
            kwargs['file_id'] = kwargs.pop('ID')
120
        if 'fileGrp' in kwargs:
121
            kwargs['file_grp'] = kwargs.pop('fileGrp')
122
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
123
        for f in r.json()['files']:
124
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], local_filename=f['local_filename'], mimetype=f['mimetype'])
125
126
    def find_all_files(self, *args, **kwargs):
127
        return list(self.find_files(*args, **kwargs))
128
129
    def add_agent(self, *args, **kwargs):
130
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
131
132
    @property
133
    def agents(self):
134
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
135
136
    @property
137
    def unique_identifier(self):
138
        return self.session.request('GET', f'{self.url}/unique_identifier').text
139
140
    @property
141
    def file_groups(self):
142
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
143
144
    @deprecated_alias(pageId="page_id")
145
    @deprecated_alias(ID="file_id")
146
    def add_file(self, file_grp, content=None, file_id=None, url=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
147
        self.session.request(
148
            'POST',
149
            f'{self.url}/file',
150
            data=OcrdFileModel.create(
151
                file_id=file_id,
152
                file_grp=file_grp,
153
                page_id=page_id,
154
                mimetype=mimetype,
155
                url=url,
156
                local_filename=local_filename).dict(),
157
        )
158
        return ClientSideOcrdFile(
159
                None,
160
                ID=file_id,
161
                fileGrp=file_grp,
162
                url=url,
163
                pageId=page_id,
164
                mimetype=mimetype,
165
                local_filename=local_filename)
166
167
168
    def save(self):
169
        self.session.request('PUT', self.url)
170
171
    def stop(self):
172
        self.session.request('DELETE', self.url)
173
174
#
175
# Server
176
#
177
178
class OcrdMetsServer():
179
180
    def __init__(self, workspace, url):
181
        self.workspace = workspace
182
        self.url = url
183
        self.log = getLogger('ocrd.workspace_client')
184
185
    def shutdown(self):
186
        _exit(0)
187
188
    def startup(self):
189
190
        workspace = self.workspace
191
192
        app = FastAPI(
193
            title="OCR-D METS Server",
194
            description="Providing simultaneous write-access to mets.xml for OCR-D",
195
        )
196
197
        @app.exception_handler(ValidationError)
198
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
199
            return JSONResponse(status_code=400, content=exc.errors())
200
201
        @app.exception_handler(FileExistsError)
202
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
203
            return JSONResponse(status_code=400, content=str(exc))
204
205
        @app.exception_handler(re.error)
206
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
207
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
208
209
        @app.get("/file", response_model=OcrdFileListModel)
210
        async def find_files(
211
            file_grp : Union[str, None] = None,
212
            file_id : Union[str, None] = None,
213
            page_id : Union[str, None] = None,
214
            mimetype : Union[str, None] = None,
215
        ):
216
            """
217
            Find files in the mets
218
            """
219
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
220
            return OcrdFileListModel.create(found)
221
222
        @app.put('/')
223
        def save():
224
            return workspace.save_mets()
225
226
        @app.post('/file', response_model=OcrdFileModel)
227
        async def add_file(
228
            file_grp : str = Form(),
229
            file_id : str = Form(),
230
            page_id : Union[str, None] = Form(),
231
            mimetype : str = Form(),
232
            url : Union[str, None] = Form(),
233
            local_filename : Union[str, None] = Form(),
234
        ):
235
            """
236
            Add a file
237
            """
238
            # Validate
239
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
240
            # Add to workspace
241
            kwargs = file_resource.dict()
242
            workspace.add_file(**kwargs)
243
            return file_resource
244
245
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
246
        async def file_groups():
247
            return {'file_groups': workspace.mets.file_groups}
248
249
        @app.post('/agent', response_model=OcrdAgentModel)
250
        async def add_agent(agent : OcrdAgentModel):
251
            kwargs = agent.dict()
252
            workspace.mets.add_agent(**kwargs)
253
            return agent
254
255
        @app.get('/agent', response_model=OcrdAgentListModel)
256
        async def agents():
257
            return OcrdAgentListModel.create(workspace.mets.agents)
258
259
        @app.get('/unique_identifier', response_model=str)
260
        async def unique_identifier():
261
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
262
263
        @app.get('/workspace_path', response_model=str)
264
        async def workspace_path():
265
            return Response(content=workspace.directory, media_type="text/plain")
266
267
        @app.delete('/')
268
        async def stop():
269
            """
270
            Stop the server
271
            """
272
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
273
            workspace.save_mets()
274
            # os._exit because uvicorn catches SystemExit raised by sys.exit
275
            _exit(0)
276
277
        if self.url.startswith('http'):
278
            parsed = urlparse(self.url)
279
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
280
        else:
281
            uvicorn_kwargs = {'uds': self.url}
282
        uvicorn.run(app, **uvicorn_kwargs)
283