Passed
Pull Request — master (#966)
by Konstantin
02:35
created

ocrd.mets_server.OcrdFileModel.create()   A

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
nop 5
1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, Response
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    local_filename : str = Field()
35
36
    @staticmethod
37
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
38
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
39
40
class OcrdAgentModel(BaseModel):
41
    name : str = Field()
42
    _type : str = Field()
43
    role : str = Field()
44
    otherrole : Optional[str] = Field()
45
    othertype : str = Field()
46
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
47
48
    @staticmethod
49
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
50
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
51
52
53
class OcrdFileListModel(BaseModel):
54
    files : List[OcrdFileModel] = Field()
55
56
    @staticmethod
57
    def create(files : List[OcrdFile]):
58
        return OcrdFileListModel(
59
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
60
        )
61
62
class OcrdFileGroupListModel(BaseModel):
63
    file_groups : List[str] = Field()
64
65
    @staticmethod
66
    def create(file_groups : List[str]):
67
        return OcrdFileGroupListModel(file_groups=file_groups)
68
69
class OcrdAgentListModel(BaseModel):
70
    agents : List[OcrdAgentModel] = Field()
71
72
    @staticmethod
73
    def create(agents : List[OcrdAgent]):
74
        return OcrdAgentListModel(
75
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
76
        )
77
78
#
79
# Client
80
#
81
82
class ClientSideOcrdFile:
83
    """
84
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
85
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
86
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
87
    """
88
89
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
90
        """
91
        Args:
92
            el (): ignored
93
        Keyword Args:
94
            mets (): ignored
95
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
96
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
97
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
98
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
99
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
100
            ID (string): ``@ID`` of this ``mets:file``
101
        """
102
        self.ID = ID
103
        self.mimetype = mimetype
104
        self.local_filename = local_filename
105
        self.loctype = loctype
106
        self.pageId = pageId
107
        self.fileGrp = fileGrp
108
109
    def __str__(self):
110
        props = ', '.join([
111
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
112
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
113
        ])
114
        return '<OcrdFile %s]/>' % (props)
115
116
class ClientSideOcrdAgent():
117
    """
118
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
119
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
120
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
121
    """
122
123
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
124
                 notes=None):
125
        """
126
        Args:
127
            el (): ignored
128
        Keyword Args:
129
            name (string):
130
            _type (string):
131
            othertype (string):
132
            role (string):
133
            otherrole (string):
134
            notes (dict):
135
        """
136
        self.name = name
137
        self.type = _type
138
        self.othertype = othertype
139
        self.role = role
140
        self.otherrole = otherrole
141
        self.notes = notes
142
143
    def __str__(self):
144
        props = ', '.join([
145
            '='.join([k, getattr(self, k) if getattr(self, k) else '---'])
146
            for k in ['type', 'othertype', 'role', 'otherrole', 'name']
147
        ])
148
        return '<OcrdAgent [' + props + ']/>'
149
150
151
class ClientSideOcrdMets():
152
    """
153
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
154
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
155
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
156
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
157
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
158
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
159
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
160
    """
161
162
    def __init__(self, host, port, socket):
163
        self.log = getLogger('ocrd.mets_client.%s' % ('uds' if socket else 'tcp'))
164
        if socket:
165
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
166
            self.session = requests_unixsocket_session()
167
        else:
168
            self.url = f'http://{host}:{port}'
169
            self.session = requests_session()
170
171
    def __getattr__(self, name):
172
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
173
174
    def __str__(self):
175
        return f'<ClientSideOcrdMets[url={self.url}]>'
176
177
    @deprecated_alias(ID="file_id")
178
    @deprecated_alias(pageId="page_id")
179
    @deprecated_alias(fileGrp="file_grp")
180
    def find_files(self, **kwargs):
181
        if 'pageId' in kwargs:
182
            kwargs['page_id'] = kwargs.pop('pageId')
183
        if 'ID' in kwargs:
184
            kwargs['file_id'] = kwargs.pop('ID')
185
        if 'fileGrp' in kwargs:
186
            kwargs['file_grp'] = kwargs.pop('fileGrp')
187
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
188
        for f in r.json()['files']:
189
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
190
191
    def find_all_files(self, *args, **kwargs):
192
        return list(self.find_files(*args, **kwargs))
193
194
    def add_agent(self, *args, **kwargs):
195
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
196
197
    @property
198
    def agents(self):
199
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
200
201
    @property
202
    def unique_identifier(self):
203
        return self.session.request('GET', f'{self.url}/unique_identifier').text
204
205
    @property
206
    def file_groups(self):
207
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
208
209
    @deprecated_alias(pageId="page_id")
210
    @deprecated_alias(ID="file_id")
211
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
212
        return self.session.request(
213
            'POST',
214
            f'{self.url}/file',
215
            data=OcrdFileModel.create(
216
                file_id=file_id,
217
                file_grp=file_grp,
218
                page_id=page_id,
219
                mimetype=mimetype,
220
                local_filename=local_filename).dict(),
221
        )
222
223
    def save(self):
224
        self.session.request('PUT', self.url)
225
226
    def stop(self):
227
        self.session.request('DELETE', self.url)
228
229
#
230
# Server
231
#
232
233
class OcrdMetsServer():
234
235
    def __init__(self, workspace, host, port, socket):
236
        self.workspace = workspace
237
        if socket and host:
238
            raise ValueError("Expecting either socket or host/port")
239
        if not socket and not(host and port):
240
            raise ValueError("Expecting both host and port")
241
        self.host = host
242
        self.port = port
243
        self.socket = socket
244
        self.log = getLogger('ocrd.workspace_client')
245
246
    def shutdown(self):
247
        _exit(0)
248
249
    def startup(self):
250
251
        workspace = self.workspace
252
253
        app = FastAPI(
254
            title="OCR-D METS Server",
255
            description="Providing simultaneous write-access to mets.xml for OCR-D",
256
        )
257
258
        @app.exception_handler(ValidationError)
259
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
260
            return JSONResponse(status_code=400, content=exc.errors())
261
262
        @app.exception_handler(FileExistsError)
263
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
264
            return JSONResponse(status_code=400, content=str(exc))
265
266
        @app.exception_handler(re.error)
267
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
268
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
269
270
        @app.get("/file", response_model=OcrdFileListModel)
271
        async def find_files(
272
            file_grp : Union[str, None] = None,
273
            file_id : Union[str, None] = None,
274
            page_id : Union[str, None] = None,
275
            mimetype : Union[str, None] = None,
276
        ):
277
            """
278
            Find files in the mets
279
            """
280
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
281
            return OcrdFileListModel.create(found)
282
283
        @app.put('/')
284
        def save():
285
            return workspace.save_mets()
286
287
        @app.post('/file', response_model=OcrdFileModel)
288
        async def add_file(
289
            file_grp : str = Form(),
290
            file_id : str = Form(),
291
            page_id : Union[str, None] = Form(),
292
            mimetype : str = Form(),
293
            local_filename : str = Form(),
294
        ):
295
            """
296
            Add a file
297
            """
298
            # Validate
299
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
300
            # Add to workspace
301
            kwargs = file_resource.dict()
302
            kwargs['page_id'] = page_id
303
            workspace.add_file(**kwargs)
304
            return file_resource
305
306
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
307
        async def file_groups():
308
            return {'file_groups': workspace.mets.file_groups}
309
310
        @app.post('/agent', response_model=OcrdAgentModel)
311
        async def add_agent(agent : OcrdAgentModel):
312
            kwargs = agent.dict()
313
            workspace.mets.add_agent(**kwargs)
314
            return agent
315
316
        @app.get('/agent', response_model=OcrdAgentListModel)
317
        async def agents():
318
            return OcrdAgentListModel.create(workspace.mets.agents)
319
320
        @app.get('/unique_identifier', response_model=str)
321
        async def unique_identifier():
322
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
323
324
        @app.delete('/')
325
        async def stop():
326
            """
327
            Stop the server
328
            """
329
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
330
            workspace.save_mets()
331
            # os._exit because uvicorn catches SystemExit raised by sys.exit
332
            _exit(0)
333
334
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
335