Passed
Pull Request — master (#966)
by Konstantin
02:39
created

ocrd.mets_server.ClientSideOcrdMets.add_file()   A

Complexity

Conditions 1

Size

Total Lines 13
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 13
rs 9.75
c 0
b 0
f 0
cc 1
nop 8

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, UploadFile
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    url : str = Field()
35
36
    @staticmethod
37
    def create(file_grp : str, file_id : str, page_id : Union[str, None], url : str, mimetype : str):
38
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url)
39
40
class OcrdAgentModel(BaseModel):
41
    name : str = Field()
42
    _type : str = Field()
43
    role : str = Field()
44
    otherrole : Optional[str] = Field()
45
    othertype : str = Field()
46
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
47
48
    @staticmethod
49
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
50
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
51
52
53
class OcrdFileListModel(BaseModel):
54
    files : List[OcrdFileModel] = Field()
55
56
    @staticmethod
57
    def create(files : List[OcrdFile]):
58
        return OcrdFileListModel(
59
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, url=f.url) for f in files]
60
        )
61
62
class OcrdFileGroupListModel(BaseModel):
63
    file_groups : List[str] = Field()
64
65
    @staticmethod
66
    def create(file_groups : List[str]):
67
        return OcrdFileGroupListModel(file_groups=file_groups)
68
69
class OcrdAgentListModel(BaseModel):
70
    agents : List[OcrdAgentModel] = Field()
71
72
    @staticmethod
73
    def create(agents : List[OcrdAgent]):
74
        return OcrdAgentListModel(
75
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
76
        )
77
78
#
79
# Client
80
#
81
82
class ClientSideOcrdFile:
83
    """
84
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
85
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
86
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
87
    """
88
89
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
90
        """
91
        Args:
92
            el (): ignored
93
        Keyword Args:
94
            mets (): ignored
95
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
96
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
97
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
98
            local_filename (): ignored
99
            url (string): ``@xlink:href`` of this ``mets:file``
100
            ID (string): ``@ID`` of this ``mets:file``
101
        """
102
        self.ID = ID
103
        self.mimetype = mimetype
104
        self.url = url
105
        self.loctype = loctype
106
        self.pageId = pageId
107
        self.fileGrp = fileGrp
108
109
class ClientSideOcrdAgent():
110
    """
111
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
112
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
113
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
114
    """
115
116
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
117
                 notes=None):
118
        """
119
        Args:
120
            el (): ignored
121
        Keyword Args:
122
            name (string):
123
            _type (string):
124
            othertype (string):
125
            role (string):
126
            otherrole (string):
127
            notes (dict):
128
        """
129
        self.name = name
130
        self.type = _type
131
        self.othertype = othertype
132
        self.role = role
133
        self.otherrole = otherrole
134
        self.notes = notes
135
136
class ClientSideOcrdMets():
137
    """
138
    Replacement for :py:class:`ocrd_models.ocrd_mets.OcrdMets` with overrides for
139
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
140
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
141
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
142
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
143
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
144
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
145
    """
146
147
    def __init__(self, host, port, socket):
148
        self.log = getLogger('ocrd.mets_client.%s' % ('uds' if socket else 'tcp'))
149
        if socket:
150
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
151
            self.session = requests_unixsocket_session()
152
        else:
153
            self.url = f'http://{host}:{port}'
154
            self.session = requests_session()
155
156
    @deprecated_alias(ID="file_id")
157
    @deprecated_alias(pageId="page_id")
158
    @deprecated_alias(fileGrp="file_grp")
159
    def find_files(self, **kwargs):
160
        if 'pageId' in kwargs:
161
            kwargs['page_id'] = kwargs.pop('pageId')
162
        if 'ID' in kwargs:
163
            kwargs['file_id'] = kwargs.pop('ID')
164
        if 'fileGrp' in kwargs:
165
            kwargs['file_grp'] = kwargs.pop('fileGrp')
166
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
167
        for f in r.json()['files']:
168
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], mimetype=f['mimetype'])
169
170
    def find_all_files(self, *args, **kwargs):
171
        return list(self.find_files(*args, **kwargs))
172
173
    def add_agent(self, *args, **kwargs):
174
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
175
176
    @property
177
    def agents(self):
178
        # print(self.session.request('GET', f'{self.url}/agent').json())
179
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
180
181
    @property
182
    def file_groups(self):
183
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
184
185
    @deprecated_alias(pageId="page_id")
186
    @deprecated_alias(ID="file_id")
187
    def add_file(self, file_grp, content=None, file_id=None, url=None, mimetype=None, page_id=None, **kwargs):
188
        return self.session.request(
189
            'POST',
190
            f'{self.url}/file',
191
            data=OcrdFileModel.create(
192
                file_id=file_id,
193
                file_grp=file_grp,
194
                page_id=page_id,
195
                mimetype=mimetype,
196
                url=url).dict(),
197
            files={'data': content}
198
        )
199
200
    def save(self):
201
        self.session.request('PUT', self.url)
202
203
204
#
205
# Server
206
#
207
208
class OcrdMetsServer():
209
210
    def __init__(self, workspace, host, port, socket):
211
        self.workspace = workspace
212
        if socket and host:
213
            raise ValueError("Expecting either socket or host/port")
214
        if not socket and not(host and port):
215
            raise ValueError("Expecting both host and port")
216
        self.host = host
217
        self.port = port
218
        self.socket = socket
219
        self.log = getLogger('ocrd.workspace_client')
220
221
    def shutdown(self):
222
        _exit(0)
223
224
    def startup(self):
225
226
        # XXX HACK 
227
        # circumventing dependency injection like this is bad and
228
        # needs to be refactored once it's all runnign
229
        workspace = self.workspace
230
231
        app = FastAPI(
232
            title="OCR-D METS Server",
233
            description="Providing simultaneous write-access to mets.xml for OCR-D",
234
        )
235
236
        @app.exception_handler(ValidationError)
237
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
238
            return JSONResponse(status_code=400, content=exc.errors())
239
240
        @app.exception_handler(FileExistsError)
241
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
242
            return JSONResponse(status_code=400, content=str(exc))
243
244
        @app.exception_handler(re.error)
245
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
246
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
247
248
        @app.get("/file", response_model=OcrdFileListModel)
249
        async def find_files(
250
            file_grp : Union[str, None] = None,
251
            file_id : Union[str, None] = None,
252
            page_id : Union[str, None] = None,
253
            mimetype : Union[str, None] = None,
254
        ):
255
            """
256
            Find files in the mets
257
            """
258
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
259
            return OcrdFileListModel.create(found)
260
261
        @app.put('/')
262
        def save():
263
            return workspace.save_mets()
264
265
        @app.post('/file', response_model=OcrdFileModel)
266
        async def add_file(
267
            file_grp : str = Form(),
268
            file_id : str = Form(),
269
            page_id : Union[str, None] = Form(),
270
            mimetype : str = Form(),
271
            url : str = Form(),
272
        ):
273
            """
274
            Add a file
275
            """
276
            # Validate
277
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url)
278
            # Add to workspace
279
            kwargs = file_resource.dict()
280
            kwargs['page_id'] = page_id
281
            kwargs['local_filename'] = kwargs.pop('url')
282
            workspace.add_file(**kwargs)
283
            return file_resource
284
285
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
286
        async def file_groups():
287
            return {'file_groups': workspace.mets.file_groups}
288
289
        @app.post('/agent', response_model=OcrdAgentModel)
290
        async def add_agent(agent : OcrdAgentModel):
291
            kwargs = agent.dict()
292
            workspace.mets.add_agent(**kwargs)
293
            return agent
294
295
        @app.get('/agent', response_model=OcrdAgentListModel)
296
        async def agents():
297
            return OcrdAgentListModel.create(workspace.mets.agents)
298
299
        @app.delete('/')
300
        async def stop():
301
            """
302
            Stop the server
303
            """
304
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
305
            workspace.save_mets()
306
            # XXX HACK os._exit to not trigger SystemExit caught by uvicorn with sys.exit
307
            _exit(0)
308
309
310
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
311
312
313