Passed
Pull Request — master (#966)
by Konstantin
03:22
created

ocrd.mets_server.ClientSideOcrdMets.find_files()   A

Complexity

Conditions 2

Size

Total Lines 4
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 2
nop 2
1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, UploadFile
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, constr, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    url : str = Field()
35
36
class OcrdAgentModel(BaseModel):
37
    name : str = Field()
38
    _type : str = Field()
39
    role : str = Field()
40
    otherrole : str = Field()
41
    othertype : str = Field()
42
    notes : List[Tuple[Dict[str, str], Optional[str]]] = Field()
43
44
class OcrdFileListModel(BaseModel):
45
    files : List[OcrdFileModel] = Field()
46
47
class OcrdFileGroupListModel(BaseModel):
48
    file_groups : List[str] = Field()
49
50
#
51
# Client
52
#
53
54
class ClientSideOcrdFile:
55
    """
56
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
57
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
58
    this represents the response of the :py:class:`ocrd.mets_server.OcrdWorkspaceServer`.
59
    """
60
61
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
62
        """
63
        Args:
64
            el (): ignored
65
        Keyword Args:
66
            mets (): ignored
67
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
68
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
69
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
70
            local_filename (): ignored
71
            url (string): ``@xlink:href`` of this ``mets:file``
72
            ID (string): ``@ID`` of this ``mets:file``
73
        """
74
        self.ID = ID
75
        self.mimetype = mimetype
76
        self.url = url
77
        self.loctype = loctype
78
        self.pageId = pageId
79
        self.fileGrp = fileGrp
80
81
class ClientSideOcrdMets():
82
    """
83
    Replacement for :py:class:`ocrd_models.ocrd_mets.OcrdMets` with overrides for
84
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
85
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
86
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
87
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
88
    """
89
90
    def __init__(self, host, port, socket):
91
        self.log = getLogger('ocrd.workspace_client')
92
        if socket:
93
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
94
            self.session = requests_unixsocket_session()
95
        else:
96
            self.url = f'http://{host}:{port}'
97
            self.session = requests_session()
98
99
    def find_files(self, **kwargs):
100
        r = self.session.request('GET', self.url, params={**kwargs})
101
        for f in r.json()['files']:
102
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], mimetype=f['mimetype'])
103
104
    def find_all_files(self, *args, **kwargs):
105
        return list(self.find_files(*args, **kwargs))
106
107
    def add_agent(self, *args, **kwargs):
108
        return self.session.request('POST', f'{self.url}/agent', data=OcrdAgentModel(**kwargs))
109
110
    @property
111
    def file_groups():
112
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable self does not seem to be defined.
Loading history...
113
114
    @deprecated_alias(pageId="page_id")
115
    @deprecated_alias(ID="file_id")
116
    def add_file(self, file_grp, content=None, file_id=None, url=None, mimetype=None, page_id=None, **kwargs):
117
        r = self.session.request(
118
            'POST',
119
            self.url,
120
            data=OcrdFileModel(
121
                file_id=file_id,
122
                file_grp=file_grp,
123
                page_id=page_id,
124
                mimetype=mimetype,
125
                url=url).dict(),
126
            files={'data': content}
127
        )
128
129
    def save(self):
130
        self.session.request('PUT', self.url)
131
132
133
#
134
# Server
135
#
136
137
class OcrdMetsServer():
138
139
    def __init__(self, workspace, host, port, socket):
140
        self.workspace = workspace
141
        self.host = host
142
        self.port = port
143
        self.socket = socket
144
        self.log = getLogger('ocrd.workspace_client')
145
146
147
    def startup(self):
148
149
        # XXX HACK 
150
        # circumventing dependency injection like this is bad and
151
        # needs to be refactored once it's all runnign
152
        workspace = self.workspace
153
154
        app = FastAPI(
155
            title="OCR-D METS Server",
156
            description="Providing simultaneous write-access to mets.xml for OCR-D",
157
        )
158
159
        @app.exception_handler(ValidationError)
160
        async def exception_handler_invalid400(request: Request, exc: ValidationError):
161
            return JSONResponse(status_code=400, content=exc.errors())
162
163
        @app.exception_handler(FileExistsError)
164
        async def exception_handler_invalid400(request: Request, exc: FileExistsError):
165
            return JSONResponse(status_code=400, content=str(exc))
166
167
        @app.exception_handler(re.error)
168
        async def exception_handler_invalid400(request: Request, exc: re.error):
169
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
170
171
        @app.get("/", response_model=OcrdFileListModel)
172
        async def find_files(
173
            file_grp : Union[str, None] = None,
174
            file_id : Union[str, None] = None,
175
            page_id : Union[str, None] = None,
176
            mimetype : Union[str, None] = None,
177
        ):
178
            """
179
            Find files in the mets
180
            """
181
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
182
            return OcrdFileListModel(
183
                files=[OcrdFileModel(file_grp=of.fileGrp, file_id=of.ID, mimetype=of.mimetype, page_id=of.pageId, url=of.url) for of in found]
184
            )
185
186
        @app.put('/')
187
        def save():
188
            return workspace.save_mets()
189
190
        @app.post('/', response_model=OcrdFileModel)
191
        async def add_file(
192
            data : bytes = File(),
193
            file_grp : str = Form(),
194
            file_id : str = Form(),
195
            page_id : Union[str, None] = Form(),
196
            mimetype : str = Form(),
197
            url : str = Form(),
198
        ):
199
            """
200
            Add a file
201
            """
202
            # Validate
203
            file_resource = OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url)
204
            # Add to workspace
205
            kwargs = file_resource.dict()
206
            kwargs['page_id'] = page_id
207
            kwargs['content'] = data
208
            kwargs['local_filename'] = kwargs.pop('url')
209
            workspace.add_file(**kwargs)
210
            workspace.save_mets()
211
            return file_resource
212
213
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
214
        async def file_groups():
215
            return {'file_groups': workspace.mets.file_groups}
216
217
        @app.post('/agent', response_model=OcrdAgentModel)
218
        async def add_agent(agent : OcrdAgentModel):
219
            kwargs = agent.dict()
220
            workspace.mets.add_agent(**kwargs)
221
            workspace.save_mets()
222
            return agent
223
224
        @app.delete('/')
225
        async def stop():
226
            """
227
            Stop the server
228
            """
229
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
230
            workspace.save_mets()
231
            # XXX HACK os._exit to not trigger SystemExit caught by uvicorn with sys.exit
232
            _exit(0)
233
234
235
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
236
237
238