Passed
Pull Request — master (#966)
by Konstantin
02:36
created

ocrd.mets_server.ClientSideOcrdAgent.__init__()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 8
dl 0
loc 19
rs 10
c 0
b 0
f 0
cc 1
nop 8

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    local_filename : str = Field()
36
37
    @staticmethod
38
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
39
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
40
41
class OcrdAgentModel(BaseModel):
42
    name : str = Field()
43
    _type : str = Field()
44
    role : str = Field()
45
    otherrole : Optional[str] = Field()
46
    othertype : str = Field()
47
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
48
49
    @staticmethod
50
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
51
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
52
53
54
class OcrdFileListModel(BaseModel):
55
    files : List[OcrdFileModel] = Field()
56
57
    @staticmethod
58
    def create(files : List[OcrdFile]):
59
        return OcrdFileListModel(
60
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
61
        )
62
63
class OcrdFileGroupListModel(BaseModel):
64
    file_groups : List[str] = Field()
65
66
    @staticmethod
67
    def create(file_groups : List[str]):
68
        return OcrdFileGroupListModel(file_groups=file_groups)
69
70
class OcrdAgentListModel(BaseModel):
71
    agents : List[OcrdAgentModel] = Field()
72
73
    @staticmethod
74
    def create(agents : List[OcrdAgent]):
75
        return OcrdAgentListModel(
76
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
77
        )
78
79
#
80
# Client
81
#
82
83
class ClientSideOcrdFile:
84
    """
85
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
86
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
87
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
88
    """
89
90
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
91
        """
92
        Args:
93
            el (): ignored
94
        Keyword Args:
95
            mets (): ignored
96
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
97
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
98
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
99
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
100
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
101
            ID (string): ``@ID`` of this ``mets:file``
102
        """
103
        self.ID = ID
104
        self.mimetype = mimetype
105
        self.local_filename = local_filename
106
        self.loctype = loctype
107
        self.pageId = pageId
108
        self.fileGrp = fileGrp
109
110
    def __str__(self):
111
        props = ', '.join([
112
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
113
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
114
        ])
115
        return '<OcrdFile %s]/>' % (props)
116
117
class ClientSideOcrdAgent():
118
    """
119
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
120
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
121
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
122
    """
123
124
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
125
                 notes=None):
126
        """
127
        Args:
128
            el (): ignored
129
        Keyword Args:
130
            name (string):
131
            _type (string):
132
            othertype (string):
133
            role (string):
134
            otherrole (string):
135
            notes (dict):
136
        """
137
        self.name = name
138
        self.type = _type
139
        self.othertype = othertype
140
        self.role = role
141
        self.otherrole = otherrole
142
        self.notes = notes
143
144
    def __str__(self):
145
        props = ', '.join([
146
            '='.join([k, getattr(self, k) if getattr(self, k) else '---'])
147
            for k in ['type', 'othertype', 'role', 'otherrole', 'name']
148
        ])
149
        return '<OcrdAgent [' + props + ']/>'
150
151
152
class ClientSideOcrdMets():
153
    """
154
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
155
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
156
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
157
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
158
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
159
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
160
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
161
    """
162
163
    def __init__(self, url):
164
        protocol = 'tcp' if url.startswith('http://') else 'uds'
165
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
166
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
167
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
168
169
    def __getattr__(self, name):
170
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
171
172
    def __str__(self):
173
        return f'<ClientSideOcrdMets[url={self.url}]>'
174
175
    @deprecated_alias(ID="file_id")
176
    @deprecated_alias(pageId="page_id")
177
    @deprecated_alias(fileGrp="file_grp")
178
    def find_files(self, **kwargs):
179
        if 'pageId' in kwargs:
180
            kwargs['page_id'] = kwargs.pop('pageId')
181
        if 'ID' in kwargs:
182
            kwargs['file_id'] = kwargs.pop('ID')
183
        if 'fileGrp' in kwargs:
184
            kwargs['file_grp'] = kwargs.pop('fileGrp')
185
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
186
        for f in r.json()['files']:
187
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
188
189
    def find_all_files(self, *args, **kwargs):
190
        return list(self.find_files(*args, **kwargs))
191
192
    def add_agent(self, *args, **kwargs):
193
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
194
195
    @property
196
    def agents(self):
197
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
198
199
    @property
200
    def unique_identifier(self):
201
        return self.session.request('GET', f'{self.url}/unique_identifier').text
202
203
    @property
204
    def file_groups(self):
205
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
206
207
    @deprecated_alias(pageId="page_id")
208
    @deprecated_alias(ID="file_id")
209
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
210
        return self.session.request(
211
            'POST',
212
            f'{self.url}/file',
213
            data=OcrdFileModel.create(
214
                file_id=file_id,
215
                file_grp=file_grp,
216
                page_id=page_id,
217
                mimetype=mimetype,
218
                local_filename=local_filename).dict(),
219
        )
220
221
    def save(self):
222
        self.session.request('PUT', self.url)
223
224
    def stop(self):
225
        self.session.request('DELETE', self.url)
226
227
#
228
# Server
229
#
230
231
class OcrdMetsServer():
232
233
    def __init__(self, workspace, url):
234
        self.workspace = workspace
235
        self.url = url
236
        self.log = getLogger('ocrd.workspace_client')
237
238
    def shutdown(self):
239
        _exit(0)
240
241
    def startup(self):
242
243
        workspace = self.workspace
244
245
        app = FastAPI(
246
            title="OCR-D METS Server",
247
            description="Providing simultaneous write-access to mets.xml for OCR-D",
248
        )
249
250
        @app.exception_handler(ValidationError)
251
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
252
            return JSONResponse(status_code=400, content=exc.errors())
253
254
        @app.exception_handler(FileExistsError)
255
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
256
            return JSONResponse(status_code=400, content=str(exc))
257
258
        @app.exception_handler(re.error)
259
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
260
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
261
262
        @app.get("/file", response_model=OcrdFileListModel)
263
        async def find_files(
264
            file_grp : Union[str, None] = None,
265
            file_id : Union[str, None] = None,
266
            page_id : Union[str, None] = None,
267
            mimetype : Union[str, None] = None,
268
        ):
269
            """
270
            Find files in the mets
271
            """
272
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
273
            return OcrdFileListModel.create(found)
274
275
        @app.put('/')
276
        def save():
277
            return workspace.save_mets()
278
279
        @app.post('/file', response_model=OcrdFileModel)
280
        async def add_file(
281
            file_grp : str = Form(),
282
            file_id : str = Form(),
283
            page_id : Union[str, None] = Form(),
284
            mimetype : str = Form(),
285
            local_filename : str = Form(),
286
        ):
287
            """
288
            Add a file
289
            """
290
            # Validate
291
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
292
            # Add to workspace
293
            kwargs = file_resource.dict()
294
            workspace.add_file(**kwargs)
295
            return file_resource
296
297
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
298
        async def file_groups():
299
            return {'file_groups': workspace.mets.file_groups}
300
301
        @app.post('/agent', response_model=OcrdAgentModel)
302
        async def add_agent(agent : OcrdAgentModel):
303
            kwargs = agent.dict()
304
            workspace.mets.add_agent(**kwargs)
305
            return agent
306
307
        @app.get('/agent', response_model=OcrdAgentListModel)
308
        async def agents():
309
            return OcrdAgentListModel.create(workspace.mets.agents)
310
311
        @app.get('/unique_identifier', response_model=str)
312
        async def unique_identifier():
313
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
314
315
        @app.delete('/')
316
        async def stop():
317
            """
318
            Stop the server
319
            """
320
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
321
            workspace.save_mets()
322
            # os._exit because uvicorn catches SystemExit raised by sys.exit
323
            _exit(0)
324
325
        if self.url.startswith('http'):
326
            parsed = urlparse(self.url)
327
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
328
        else:
329
            uvicorn_kwargs = {'uds': self.url}
330
        uvicorn.run(app, **uvicorn_kwargs)
331