Passed
Pull Request — master (#966)
by Konstantin
02:30
created

ocrd.mets_server.ClientSideOcrdFile.__init__()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 19
rs 10
c 0
b 0
f 0
cc 1
nop 10

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, Response
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    local_filename : str = Field()
35
36
    @staticmethod
37
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
38
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
39
40
class OcrdAgentModel(BaseModel):
41
    name : str = Field()
42
    _type : str = Field()
43
    role : str = Field()
44
    otherrole : Optional[str] = Field()
45
    othertype : str = Field()
46
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
47
48
    @staticmethod
49
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
50
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
51
52
53
class OcrdFileListModel(BaseModel):
54
    files : List[OcrdFileModel] = Field()
55
56
    @staticmethod
57
    def create(files : List[OcrdFile]):
58
        return OcrdFileListModel(
59
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
60
        )
61
62
class OcrdFileGroupListModel(BaseModel):
63
    file_groups : List[str] = Field()
64
65
    @staticmethod
66
    def create(file_groups : List[str]):
67
        return OcrdFileGroupListModel(file_groups=file_groups)
68
69
class OcrdAgentListModel(BaseModel):
70
    agents : List[OcrdAgentModel] = Field()
71
72
    @staticmethod
73
    def create(agents : List[OcrdAgent]):
74
        return OcrdAgentListModel(
75
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
76
        )
77
78
#
79
# Client
80
#
81
82
class ClientSideOcrdFile:
83
    """
84
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
85
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
86
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
87
    """
88
89
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
90
        """
91
        Args:
92
            el (): ignored
93
        Keyword Args:
94
            mets (): ignored
95
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
96
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
97
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
98
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
99
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
100
            ID (string): ``@ID`` of this ``mets:file``
101
        """
102
        self.ID = ID
103
        self.mimetype = mimetype
104
        self.local_filename = local_filename
105
        self.loctype = loctype
106
        self.pageId = pageId
107
        self.fileGrp = fileGrp
108
109
    def __str__(self):
110
        props = ', '.join([
111
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
112
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
113
        ])
114
        return '<OcrdFile %s]/>' % (props)
115
116
class ClientSideOcrdAgent():
117
    """
118
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
119
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
120
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
121
    """
122
123
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
124
                 notes=None):
125
        """
126
        Args:
127
            el (): ignored
128
        Keyword Args:
129
            name (string):
130
            _type (string):
131
            othertype (string):
132
            role (string):
133
            otherrole (string):
134
            notes (dict):
135
        """
136
        self.name = name
137
        self.type = _type
138
        self.othertype = othertype
139
        self.role = role
140
        self.otherrole = otherrole
141
        self.notes = notes
142
143
    def __str__(self):
144
        props = ', '.join([
145
            '='.join([k, getattr(self, k) if getattr(self, k) else '---'])
146
            for k in ['type', 'othertype', 'role', 'otherrole', 'name']
147
        ])
148
        return '<OcrdAgent [' + props + ']/>'
149
150
151
class ClientSideOcrdMets():
152
    """
153
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
154
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
155
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
156
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
157
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
158
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
159
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
160
    """
161
162
    def __init__(self, host, port, socket):
163
        self.log = getLogger('ocrd.mets_client.%s' % ('uds' if socket else 'tcp'))
164
        if socket:
165
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
166
            self.session = requests_unixsocket_session()
167
        else:
168
            self.url = f'http://{host}:{port}'
169
            self.session = requests_session()
170
171
    def __getattr__(self, name):
172
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
173
174
    def __str__(self):
175
        return f'<ClientSideOcrdMets[url={self.url}]>'
176
177
    @deprecated_alias(ID="file_id")
178
    @deprecated_alias(pageId="page_id")
179
    @deprecated_alias(fileGrp="file_grp")
180
    def find_files(self, **kwargs):
181
        if 'pageId' in kwargs:
182
            kwargs['page_id'] = kwargs.pop('pageId')
183
        if 'ID' in kwargs:
184
            kwargs['file_id'] = kwargs.pop('ID')
185
        if 'fileGrp' in kwargs:
186
            kwargs['file_grp'] = kwargs.pop('fileGrp')
187
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
188
        for f in r.json()['files']:
189
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
190
191
    def find_all_files(self, *args, **kwargs):
192
        return list(self.find_files(*args, **kwargs))
193
194
    def add_agent(self, *args, **kwargs):
195
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
196
197
    @property
198
    def agents(self):
199
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
200
201
    @property
202
    def unique_identifier(self):
203
        return self.session.request('GET', f'{self.url}/unique_identifier').text
204
205
    @property
206
    def file_groups(self):
207
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
208
209
    @deprecated_alias(pageId="page_id")
210
    @deprecated_alias(ID="file_id")
211
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
212
        return self.session.request(
213
            'POST',
214
            f'{self.url}/file',
215
            data=OcrdFileModel.create(
216
                file_id=file_id,
217
                file_grp=file_grp,
218
                page_id=page_id,
219
                mimetype=mimetype,
220
                local_filename=local_filename).dict(),
221
        )
222
223
    def save(self):
224
        self.session.request('PUT', self.url)
225
226
    def stop(self):
227
        self.session.request('DELETE', self.url)
228
229
#
230
# Server
231
#
232
233
class OcrdMetsServer():
234
235
    def __init__(self, workspace, host, port, socket):
236
        self.workspace = workspace
237
        if socket and host:
238
            raise ValueError("Expecting either socket or host/port")
239
        if not socket and not(host and port):
240
            raise ValueError("Expecting both host and port")
241
        self.host = host
242
        self.port = port
243
        self.socket = socket
244
        self.log = getLogger('ocrd.workspace_client')
245
246
    def shutdown(self):
247
        _exit(0)
248
249
    def startup(self):
250
251
        workspace = self.workspace
252
253
        app = FastAPI(
254
            title="OCR-D METS Server",
255
            description="Providing simultaneous write-access to mets.xml for OCR-D",
256
        )
257
258
        @app.exception_handler(ValidationError)
259
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
260
            return JSONResponse(status_code=400, content=exc.errors())
261
262
        @app.exception_handler(FileExistsError)
263
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
264
            return JSONResponse(status_code=400, content=str(exc))
265
266
        @app.exception_handler(re.error)
267
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
268
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
269
270
        @app.get("/file", response_model=OcrdFileListModel)
271
        async def find_files(
272
            file_grp : Union[str, None] = None,
273
            file_id : Union[str, None] = None,
274
            page_id : Union[str, None] = None,
275
            mimetype : Union[str, None] = None,
276
        ):
277
            """
278
            Find files in the mets
279
            """
280
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
281
            return OcrdFileListModel.create(found)
282
283
        @app.put('/')
284
        def save():
285
            return workspace.save_mets()
286
287
        @app.post('/file', response_model=OcrdFileModel)
288
        async def add_file(
289
            file_grp : str = Form(),
290
            file_id : str = Form(),
291
            page_id : Union[str, None] = Form(),
292
            mimetype : str = Form(),
293
            local_filename : str = Form(),
294
        ):
295
            """
296
            Add a file
297
            """
298
            # Validate
299
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
300
            # Add to workspace
301
            kwargs = file_resource.dict()
302
            workspace.add_file(**kwargs)
303
            return file_resource
304
305
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
306
        async def file_groups():
307
            return {'file_groups': workspace.mets.file_groups}
308
309
        @app.post('/agent', response_model=OcrdAgentModel)
310
        async def add_agent(agent : OcrdAgentModel):
311
            kwargs = agent.dict()
312
            workspace.mets.add_agent(**kwargs)
313
            return agent
314
315
        @app.get('/agent', response_model=OcrdAgentListModel)
316
        async def agents():
317
            return OcrdAgentListModel.create(workspace.mets.agents)
318
319
        @app.get('/unique_identifier', response_model=str)
320
        async def unique_identifier():
321
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
322
323
        @app.delete('/')
324
        async def stop():
325
            """
326
            Stop the server
327
            """
328
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
329
            workspace.save_mets()
330
            # os._exit because uvicorn catches SystemExit raised by sys.exit
331
            _exit(0)
332
333
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
334