Passed
Pull Request — master (#966)
by Konstantin
02:36
created

ocrd.mets_server.ClientSideOcrdMets.stop()   A

Complexity

Conditions 1

Size

Total Lines 2
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 2
rs 10
c 0
b 0
f 0
cc 1
nop 1
1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, OcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    local_filename : str = Field()
36
37
    @staticmethod
38
    def create(file_grp : str, file_id : str, page_id : Union[str, None], local_filename : str, mimetype : str):
39
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
40
41
class OcrdAgentModel(BaseModel):
42
    name : str = Field()
43
    _type : str = Field()
44
    role : str = Field()
45
    otherrole : Optional[str] = Field()
46
    othertype : str = Field()
47
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
48
49
    @staticmethod
50
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
51
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
52
53
54
class OcrdFileListModel(BaseModel):
55
    files : List[OcrdFileModel] = Field()
56
57
    @staticmethod
58
    def create(files : List[OcrdFile]):
59
        return OcrdFileListModel(
60
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, local_filename=f.local_filename) for f in files]
61
        )
62
63
class OcrdFileGroupListModel(BaseModel):
64
    file_groups : List[str] = Field()
65
66
    @staticmethod
67
    def create(file_groups : List[str]):
68
        return OcrdFileGroupListModel(file_groups=file_groups)
69
70
class OcrdAgentListModel(BaseModel):
71
    agents : List[OcrdAgentModel] = Field()
72
73
    @staticmethod
74
    def create(agents : List[OcrdAgent]):
75
        return OcrdAgentListModel(
76
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
77
        )
78
79
#
80
# Client
81
#
82
83
class ClientSideOcrdFile:
84
    """
85
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
86
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
87
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
88
    """
89
90
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
91
        """
92
        Args:
93
            el (): ignored
94
        Keyword Args:
95
            mets (): ignored
96
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
97
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
98
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
99
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
100
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
101
            ID (string): ``@ID`` of this ``mets:file``
102
        """
103
        self.ID = ID
104
        self.mimetype = mimetype
105
        self.local_filename = local_filename
106
        self.loctype = loctype
107
        self.pageId = pageId
108
        self.fileGrp = fileGrp
109
110
    def __str__(self):
111
        props = ', '.join([
112
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
113
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
114
        ])
115
        return '<OcrdFile %s]/>' % (props)
116
117
class ClientSideOcrdAgent():
118
    """
119
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdAgent`
120
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
121
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
122
    """
123
124
    def __init__(self, el, name=None, _type=None, othertype=None, role=None, otherrole=None,
125
                 notes=None):
126
        """
127
        Args:
128
            el (): ignored
129
        Keyword Args:
130
            name (string):
131
            _type (string):
132
            othertype (string):
133
            role (string):
134
            otherrole (string):
135
            notes (dict):
136
        """
137
        self.name = name
138
        self.type = _type
139
        self.othertype = othertype
140
        self.role = role
141
        self.otherrole = otherrole
142
        self.notes = notes
143
144
    def __str__(self):
145
        props = ', '.join([
146
            '='.join([k, getattr(self, k) if getattr(self, k) else '---'])
147
            for k in ['type', 'othertype', 'role', 'otherrole', 'name']
148
        ])
149
        return '<OcrdAgent [' + props + ']/>'
150
151
152
class ClientSideOcrdMets():
153
    """
154
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
155
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
156
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
157
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
158
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
159
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
160
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
161
    """
162
163
    def __init__(self, url):
164
        protocol = 'tcp' if url.startswith('http://') else 'uds'
165
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
166
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
167
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
168
169
    def __getattr__(self, name):
170
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
171
172
    def __str__(self):
173
        return f'<ClientSideOcrdMets[url={self.url}]>'
174
175
    @deprecated_alias(ID="file_id")
176
    @deprecated_alias(pageId="page_id")
177
    @deprecated_alias(fileGrp="file_grp")
178
    def find_files(self, **kwargs):
179
        if 'pageId' in kwargs:
180
            kwargs['page_id'] = kwargs.pop('pageId')
181
        if 'ID' in kwargs:
182
            kwargs['file_id'] = kwargs.pop('ID')
183
        if 'fileGrp' in kwargs:
184
            kwargs['file_grp'] = kwargs.pop('fileGrp')
185
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
186
        for f in r.json()['files']:
187
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], local_filename=f['local_filename'], mimetype=f['mimetype'])
188
189
    def find_all_files(self, *args, **kwargs):
190
        return list(self.find_files(*args, **kwargs))
191
192
    def add_agent(self, *args, **kwargs):
193
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
194
195
    @property
196
    def agents(self):
197
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
198
199
    @property
200
    def unique_identifier(self):
201
        return self.session.request('GET', f'{self.url}/unique_identifier').text
202
203
    @property
204
    def file_groups(self):
205
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
206
207
    @deprecated_alias(pageId="page_id")
208
    @deprecated_alias(ID="file_id")
209
    def add_file(self, file_grp, content=None, file_id=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
210
        return self.session.request(
211
            'POST',
212
            f'{self.url}/file',
213
            data=OcrdFileModel.create(
214
                file_id=file_id,
215
                file_grp=file_grp,
216
                page_id=page_id,
217
                mimetype=mimetype,
218
                local_filename=local_filename).dict(),
219
        )
220
221
    def save(self):
222
        self.session.request('PUT', self.url)
223
224
    def stop(self):
225
        self.session.request('DELETE', self.url)
226
227
#
228
# Server
229
#
230
231
class OcrdMetsServer():
232
233
    def __init__(self, workspace, url):
234
        self.workspace = workspace
235
        self.url = url
236
        self.log = getLogger('ocrd.workspace_client')
237
238
    def shutdown(self):
239
        _exit(0)
240
241
    def startup(self):
242
243
        workspace = self.workspace
244
245
        app = FastAPI(
246
            title="OCR-D METS Server",
247
            description="Providing simultaneous write-access to mets.xml for OCR-D",
248
        )
249
250
        @app.exception_handler(ValidationError)
251
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
252
            return JSONResponse(status_code=400, content=exc.errors())
253
254
        @app.exception_handler(FileExistsError)
255
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
256
            return JSONResponse(status_code=400, content=str(exc))
257
258
        @app.exception_handler(re.error)
259
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
260
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
261
262
        @app.get("/file", response_model=OcrdFileListModel)
263
        async def find_files(
264
            file_grp : Union[str, None] = None,
265
            file_id : Union[str, None] = None,
266
            page_id : Union[str, None] = None,
267
            mimetype : Union[str, None] = None,
268
        ):
269
            """
270
            Find files in the mets
271
            """
272
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
273
            return OcrdFileListModel.create(found)
274
275
        @app.put('/')
276
        def save():
277
            return workspace.save_mets()
278
279
        @app.post('/file', response_model=OcrdFileModel)
280
        async def add_file(
281
            file_grp : str = Form(),
282
            file_id : str = Form(),
283
            page_id : Union[str, None] = Form(),
284
            mimetype : str = Form(),
285
            local_filename : str = Form(),
286
        ):
287
            """
288
            Add a file
289
            """
290
            # Validate
291
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, local_filename=local_filename)
292
            # Add to workspace
293
            kwargs = file_resource.dict()
294
            workspace.add_file(**kwargs)
295
            return file_resource
296
297
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
298
        async def file_groups():
299
            return {'file_groups': workspace.mets.file_groups}
300
301
        @app.post('/agent', response_model=OcrdAgentModel)
302
        async def add_agent(agent : OcrdAgentModel):
303
            kwargs = agent.dict()
304
            workspace.mets.add_agent(**kwargs)
305
            return agent
306
307
        @app.get('/agent', response_model=OcrdAgentListModel)
308
        async def agents():
309
            return OcrdAgentListModel.create(workspace.mets.agents)
310
311
        @app.get('/unique_identifier', response_model=str)
312
        async def unique_identifier():
313
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
314
315
        @app.delete('/')
316
        async def stop():
317
            """
318
            Stop the server
319
            """
320
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
321
            workspace.save_mets()
322
            # os._exit because uvicorn catches SystemExit raised by sys.exit
323
            _exit(0)
324
325
        if self.url.startswith('http'):
326
            parsed = urlparse(self.url)
327
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
328
        else:
329
            uvicorn_kwargs = {'uds': self.url}
330
        uvicorn.run(app, **uvicorn_kwargs)
331