Passed
Pull Request — master (#966)
by Konstantin
04:31 queued 02:00
created

ocrd.mets_server.ClientSideOcrdAgent.__init__()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 8
dl 0
loc 19
rs 10
c 0
b 0
f 0
cc 1
nop 8

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, UploadFile
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    local_filename : str = Field()
35
36
    @staticmethod
37
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
38
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
39
40
class OcrdAgentModel(BaseModel):
41
    name : str = Field()
42
    _type : str = Field()
43
    role : str = Field()
44
    otherrole : Optional[str] = Field()
45
    othertype : str = Field()
46
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
47
48
    @staticmethod
49
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
50
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
51
52
53
class OcrdFileListModel(BaseModel):
54
    files : List[OcrdFileModel] = Field()
55
56
    @staticmethod
57
    def create(files : List[OcrdFile]):
58
        return OcrdFileListModel(
59
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
60
        )
61
62
class OcrdFileGroupListModel(BaseModel):
63
    file_groups : List[str] = Field()
64
65
    @staticmethod
66
    def create(file_groups : List[str]):
67
        return OcrdFileGroupListModel(file_groups=file_groups)
68
69
class OcrdAgentListModel(BaseModel):
70
    agents : List[OcrdAgentModel] = Field()
71
72
    @staticmethod
73
    def create(agents : List[OcrdAgent]):
74
        return OcrdAgentListModel(
75
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
76
        )
77
78
#
79
# Client
80
#
81
82
class ClientSideOcrdFile:
83
    """
84
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
85
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
86
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
87
    """
88
89
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
90
        """
91
        Args:
92
            el (): ignored
93
        Keyword Args:
94
            mets (): ignored
95
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
96
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
97
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
98
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
99
            ID (string): ``@ID`` of this ``mets:file``
100
        """
101
        self.ID = ID
102
        self.mimetype = mimetype
103
        self.local_filename = local_filename
104
        self.loctype = loctype
105
        self.pageId = pageId
106
        self.fileGrp = fileGrp
107
108
class ClientSideOcrdAgent():
109
    """
110
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
111
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
112
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
113
    """
114
115
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
116
                 notes=None):
117
        """
118
        Args:
119
            el (): ignored
120
        Keyword Args:
121
            name (string):
122
            _type (string):
123
            othertype (string):
124
            role (string):
125
            otherrole (string):
126
            notes (dict):
127
        """
128
        self.name = name
129
        self.type = _type
130
        self.othertype = othertype
131
        self.role = role
132
        self.otherrole = otherrole
133
        self.notes = notes
134
135
class ClientSideOcrdMets():
136
    """
137
    Replacement for :py:class:`ocrd_models.ocrd_mets.OcrdMets` with overrides for
138
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
139
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
140
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
141
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
142
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
143
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
144
    """
145
146
    def __init__(self, host, port, socket):
147
        self.log = getLogger('ocrd.mets_client.%s' % ('uds' if socket else 'tcp'))
148
        if socket:
149
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
150
            self.session = requests_unixsocket_session()
151
        else:
152
            self.url = f'http://{host}:{port}'
153
            self.session = requests_session()
154
155
    @deprecated_alias(ID="file_id")
156
    @deprecated_alias(pageId="page_id")
157
    @deprecated_alias(fileGrp="file_grp")
158
    def find_files(self, **kwargs):
159
        if 'pageId' in kwargs:
160
            kwargs['page_id'] = kwargs.pop('pageId')
161
        if 'ID' in kwargs:
162
            kwargs['file_id'] = kwargs.pop('ID')
163
        if 'fileGrp' in kwargs:
164
            kwargs['file_grp'] = kwargs.pop('fileGrp')
165
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
166
        for f in r.json()['files']:
167
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
168
169
    def find_all_files(self, *args, **kwargs):
170
        return list(self.find_files(*args, **kwargs))
171
172
    def add_agent(self, *args, **kwargs):
173
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
174
175
    @property
176
    def agents(self):
177
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
178
179
    @property
180
    def file_groups(self):
181
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
182
183
    @deprecated_alias(pageId="page_id")
184
    @deprecated_alias(ID="file_id")
185
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
186
        return self.session.request(
187
            'POST',
188
            f'{self.url}/file',
189
            data=OcrdFileModel.create(
190
                file_id=file_id,
191
                file_grp=file_grp,
192
                page_id=page_id,
193
                mimetype=mimetype,
194
                local_filename=local_filename).dict(),
195
        )
196
197
    def save(self):
198
        self.session.request('PUT', self.url)
199
200
    def stop(self):
201
        self.session.request('DELETE', self.url)
202
203
#
204
# Server
205
#
206
207
class OcrdMetsServer():
208
209
    def __init__(self, workspace, host, port, socket):
210
        self.workspace = workspace
211
        if socket and host:
212
            raise ValueError("Expecting either socket or host/port")
213
        if not socket and not(host and port):
214
            raise ValueError("Expecting both host and port")
215
        self.host = host
216
        self.port = port
217
        self.socket = socket
218
        self.log = getLogger('ocrd.workspace_client')
219
220
    def shutdown(self):
221
        _exit(0)
222
223
    def startup(self):
224
225
        # XXX HACK 
226
        # circumventing dependency injection like this is bad and
227
        # needs to be refactored once it's all runnign
228
        workspace = self.workspace
229
230
        app = FastAPI(
231
            title="OCR-D METS Server",
232
            description="Providing simultaneous write-access to mets.xml for OCR-D",
233
        )
234
235
        @app.exception_handler(ValidationError)
236
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
237
            return JSONResponse(status_code=400, content=exc.errors())
238
239
        @app.exception_handler(FileExistsError)
240
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
241
            return JSONResponse(status_code=400, content=str(exc))
242
243
        @app.exception_handler(re.error)
244
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
245
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
246
247
        @app.get("/file", response_model=OcrdFileListModel)
248
        async def find_files(
249
            file_grp : Union[str, None] = None,
250
            file_id : Union[str, None] = None,
251
            page_id : Union[str, None] = None,
252
            mimetype : Union[str, None] = None,
253
        ):
254
            """
255
            Find files in the mets
256
            """
257
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
258
            return OcrdFileListModel.create(found)
259
260
        @app.put('/')
261
        def save():
262
            return workspace.save_mets()
263
264
        @app.post('/file', response_model=OcrdFileModel)
265
        async def add_file(
266
            file_grp : str = Form(),
267
            file_id : str = Form(),
268
            page_id : Union[str, None] = Form(),
269
            mimetype : str = Form(),
270
            local_filename : str = Form(),
271
        ):
272
            """
273
            Add a file
274
            """
275
            # Validate
276
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
277
            # Add to workspace
278
            kwargs = file_resource.dict()
279
            kwargs['page_id'] = page_id
280
            workspace.add_file(**kwargs)
281
            return file_resource
282
283
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
284
        async def file_groups():
285
            return {'file_groups': workspace.mets.file_groups}
286
287
        @app.post('/agent', response_model=OcrdAgentModel)
288
        async def add_agent(agent : OcrdAgentModel):
289
            kwargs = agent.dict()
290
            workspace.mets.add_agent(**kwargs)
291
            return agent
292
293
        @app.get('/agent', response_model=OcrdAgentListModel)
294
        async def agents():
295
            return OcrdAgentListModel.create(workspace.mets.agents)
296
297
        @app.delete('/')
298
        async def stop():
299
            """
300
            Stop the server
301
            """
302
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
303
            workspace.save_mets()
304
            # XXX HACK os._exit to not trigger SystemExit caught by uvicorn with sys.exit
305
            _exit(0)
306
307
308
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
309
310
311