Passed
Pull Request — master (#966)
by Konstantin
04:11 queued 01:47
created

ocrd.mets_server.ClientSideOcrdFile.__init__()   A

Complexity

Conditions 1

Size

Total Lines 18
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 18
rs 10
c 0
b 0
f 0
cc 1
nop 10

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, UploadFile
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    local_filename : str = Field()
35
36
    @staticmethod
37
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
38
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
39
40
class OcrdAgentModel(BaseModel):
41
    name : str = Field()
42
    _type : str = Field()
43
    role : str = Field()
44
    otherrole : Optional[str] = Field()
45
    othertype : str = Field()
46
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
47
48
    @staticmethod
49
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
50
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
51
52
53
class OcrdFileListModel(BaseModel):
54
    files : List[OcrdFileModel] = Field()
55
56
    @staticmethod
57
    def create(files : List[OcrdFile]):
58
        return OcrdFileListModel(
59
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
60
        )
61
62
class OcrdFileGroupListModel(BaseModel):
63
    file_groups : List[str] = Field()
64
65
    @staticmethod
66
    def create(file_groups : List[str]):
67
        return OcrdFileGroupListModel(file_groups=file_groups)
68
69
class OcrdAgentListModel(BaseModel):
70
    agents : List[OcrdAgentModel] = Field()
71
72
    @staticmethod
73
    def create(agents : List[OcrdAgent]):
74
        return OcrdAgentListModel(
75
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
76
        )
77
78
#
79
# Client
80
#
81
82
class ClientSideOcrdFile:
83
    """
84
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
85
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
86
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
87
    """
88
89
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
90
        """
91
        Args:
92
            el (): ignored
93
        Keyword Args:
94
            mets (): ignored
95
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
96
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
97
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
98
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
99
            ID (string): ``@ID`` of this ``mets:file``
100
        """
101
        self.ID = ID
102
        self.mimetype = mimetype
103
        self.local_filename = local_filename
104
        self.loctype = loctype
105
        self.pageId = pageId
106
        self.fileGrp = fileGrp
107
108
class ClientSideOcrdAgent():
109
    """
110
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
111
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
112
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
113
    """
114
115
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
116
                 notes=None):
117
        """
118
        Args:
119
            el (): ignored
120
        Keyword Args:
121
            name (string):
122
            _type (string):
123
            othertype (string):
124
            role (string):
125
            otherrole (string):
126
            notes (dict):
127
        """
128
        self.name = name
129
        self.type = _type
130
        self.othertype = othertype
131
        self.role = role
132
        self.otherrole = otherrole
133
        self.notes = notes
134
135
class ClientSideOcrdMets():
136
    """
137
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
138
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
139
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
140
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
141
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
142
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
143
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
144
    """
145
146
    def __init__(self, host, port, socket):
147
        self.log = getLogger('ocrd.mets_client.%s' % ('uds' if socket else 'tcp'))
148
        if socket:
149
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
150
            self.session = requests_unixsocket_session()
151
        else:
152
            self.url = f'http://{host}:{port}'
153
            self.session = requests_session()
154
155
    def __getattr__(self, name):
156
        if hasattr(self, name):
157
            return self.get(name)
158
        raise Exception("ClientSideOcrdMets has no access to '%s' - try without METS server", name)
159
    @deprecated_alias(ID="file_id")
160
    @deprecated_alias(pageId="page_id")
161
    @deprecated_alias(fileGrp="file_grp")
162
    def find_files(self, **kwargs):
163
        if 'pageId' in kwargs:
164
            kwargs['page_id'] = kwargs.pop('pageId')
165
        if 'ID' in kwargs:
166
            kwargs['file_id'] = kwargs.pop('ID')
167
        if 'fileGrp' in kwargs:
168
            kwargs['file_grp'] = kwargs.pop('fileGrp')
169
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
170
        for f in r.json()['files']:
171
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
172
173
    def find_all_files(self, *args, **kwargs):
174
        return list(self.find_files(*args, **kwargs))
175
176
    def add_agent(self, *args, **kwargs):
177
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
178
179
    @property
180
    def agents(self):
181
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
182
183
    @property
184
    def file_groups(self):
185
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
186
187
    @deprecated_alias(pageId="page_id")
188
    @deprecated_alias(ID="file_id")
189
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
190
        return self.session.request(
191
            'POST',
192
            f'{self.url}/file',
193
            data=OcrdFileModel.create(
194
                file_id=file_id,
195
                file_grp=file_grp,
196
                page_id=page_id,
197
                mimetype=mimetype,
198
                local_filename=local_filename).dict(),
199
        )
200
201
    def save(self):
202
        self.session.request('PUT', self.url)
203
204
    def stop(self):
205
        self.session.request('DELETE', self.url)
206
207
#
208
# Server
209
#
210
211
class OcrdMetsServer():
212
213
    def __init__(self, workspace, host, port, socket):
214
        self.workspace = workspace
215
        if socket and host:
216
            raise ValueError("Expecting either socket or host/port")
217
        if not socket and not(host and port):
218
            raise ValueError("Expecting both host and port")
219
        self.host = host
220
        self.port = port
221
        self.socket = socket
222
        self.log = getLogger('ocrd.workspace_client')
223
224
    def shutdown(self):
225
        _exit(0)
226
227
    def startup(self):
228
229
        workspace = self.workspace
230
231
        app = FastAPI(
232
            title="OCR-D METS Server",
233
            description="Providing simultaneous write-access to mets.xml for OCR-D",
234
        )
235
236
        @app.exception_handler(ValidationError)
237
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
238
            return JSONResponse(status_code=400, content=exc.errors())
239
240
        @app.exception_handler(FileExistsError)
241
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
242
            return JSONResponse(status_code=400, content=str(exc))
243
244
        @app.exception_handler(re.error)
245
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
246
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
247
248
        @app.get("/file", response_model=OcrdFileListModel)
249
        async def find_files(
250
            file_grp : Union[str, None] = None,
251
            file_id : Union[str, None] = None,
252
            page_id : Union[str, None] = None,
253
            mimetype : Union[str, None] = None,
254
        ):
255
            """
256
            Find files in the mets
257
            """
258
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
259
            return OcrdFileListModel.create(found)
260
261
        @app.put('/')
262
        def save():
263
            return workspace.save_mets()
264
265
        @app.post('/file', response_model=OcrdFileModel)
266
        async def add_file(
267
            file_grp : str = Form(),
268
            file_id : str = Form(),
269
            page_id : Union[str, None] = Form(),
270
            mimetype : str = Form(),
271
            local_filename : str = Form(),
272
        ):
273
            """
274
            Add a file
275
            """
276
            # Validate
277
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
278
            # Add to workspace
279
            kwargs = file_resource.dict()
280
            kwargs['page_id'] = page_id
281
            workspace.add_file(**kwargs)
282
            return file_resource
283
284
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
285
        async def file_groups():
286
            return {'file_groups': workspace.mets.file_groups}
287
288
        @app.post('/agent', response_model=OcrdAgentModel)
289
        async def add_agent(agent : OcrdAgentModel):
290
            kwargs = agent.dict()
291
            workspace.mets.add_agent(**kwargs)
292
            return agent
293
294
        @app.get('/agent', response_model=OcrdAgentListModel)
295
        async def agents():
296
            return OcrdAgentListModel.create(workspace.mets.agents)
297
298
        @app.delete('/')
299
        async def stop():
300
            """
301
            Stop the server
302
            """
303
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
304
            workspace.save_mets()
305
            # os._exit because uvicorn catches SystemExit raised by sys.exit
306
            _exit(0)
307
308
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
309