Passed
Pull Request — master (#966)
by Konstantin
04:23 queued 01:53
created

ocrd.mets_server.OcrdMetsServer.startup()   B

Complexity

Conditions 2

Size

Total Lines 91
Code Lines 61

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 61
dl 0
loc 91
rs 8.2763
c 0
b 0
f 0
cc 2
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    url : str = Field()
36
    local_filename : str = Field()
37
38
    @staticmethod
39
    def create(file_grp : str, file_id : str, page_id : Union[str, None], url : str, local_filename : str, mimetype : str):
40
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
41
42
class OcrdAgentModel(BaseModel):
43
    name : str = Field()
44
    _type : str = Field()
45
    role : str = Field()
46
    otherrole : Optional[str] = Field()
47
    othertype : str = Field()
48
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
49
50
    @staticmethod
51
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
52
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
53
54
55
class OcrdFileListModel(BaseModel):
56
    files : List[OcrdFileModel] = Field()
57
58
    @staticmethod
59
    def create(files : List[OcrdFile]):
60
        return OcrdFileListModel(
61
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, url=f.url, local_filename=f.local_filename) for f in files]
62
        )
63
64
class OcrdFileGroupListModel(BaseModel):
65
    file_groups : List[str] = Field()
66
67
    @staticmethod
68
    def create(file_groups : List[str]):
69
        return OcrdFileGroupListModel(file_groups=file_groups)
70
71
class OcrdAgentListModel(BaseModel):
72
    agents : List[OcrdAgentModel] = Field()
73
74
    @staticmethod
75
    def create(agents : List[OcrdAgent]):
76
        return OcrdAgentListModel(
77
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
78
        )
79
80
#
81
# Client
82
#
83
84
85
class ClientSideOcrdMets():
86
    """
87
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
88
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
89
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
90
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
91
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
92
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
93
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
94
    """
95
96
    def __init__(self, url):
97
        protocol = 'tcp' if url.startswith('http://') else 'uds'
98
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
99
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
100
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
101
102
    def __getattr__(self, name):
103
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
104
105
    def __str__(self):
106
        return f'<ClientSideOcrdMets[url={self.url}]>'
107
108
    @deprecated_alias(ID="file_id")
109
    @deprecated_alias(pageId="page_id")
110
    @deprecated_alias(fileGrp="file_grp")
111
    def find_files(self, **kwargs):
112
        if 'pageId' in kwargs:
113
            kwargs['page_id'] = kwargs.pop('pageId')
114
        if 'ID' in kwargs:
115
            kwargs['file_id'] = kwargs.pop('ID')
116
        if 'fileGrp' in kwargs:
117
            kwargs['file_grp'] = kwargs.pop('fileGrp')
118
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
119
        for f in r.json()['files']:
120
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], local_filename=f['local_filename'], mimetype=f['mimetype'])
121
122
    def find_all_files(self, *args, **kwargs):
123
        return list(self.find_files(*args, **kwargs))
124
125
    def add_agent(self, *args, **kwargs):
126
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
127
128
    @property
129
    def agents(self):
130
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
131
132
    @property
133
    def unique_identifier(self):
134
        return self.session.request('GET', f'{self.url}/unique_identifier').text
135
136
    @property
137
    def file_groups(self):
138
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
139
140
    @deprecated_alias(pageId="page_id")
141
    @deprecated_alias(ID="file_id")
142
    def add_file(self, file_grp, content=None, file_id=None, url=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
143
        self.session.request(
144
            'POST',
145
            f'{self.url}/file',
146
            data=OcrdFileModel.create(
147
                file_id=file_id,
148
                file_grp=file_grp,
149
                page_id=page_id,
150
                mimetype=mimetype,
151
                url=url,
152
                local_filename=local_filename).dict(),
153
        )
154
        return ClientSideOcrdFile(
155
                None,
156
                ID=file_id,
157
                fileGrp=file_grp,
158
                url=url,
159
                pageId=page_id,
160
                mimetype=mimetype,
161
                local_filename=local_filename)
162
163
164
    def save(self):
165
        self.session.request('PUT', self.url)
166
167
    def stop(self):
168
        self.session.request('DELETE', self.url)
169
170
#
171
# Server
172
#
173
174
class OcrdMetsServer():
175
176
    def __init__(self, workspace, url):
177
        self.workspace = workspace
178
        self.url = url
179
        self.log = getLogger('ocrd.workspace_client')
180
181
    def shutdown(self):
182
        _exit(0)
183
184
    def startup(self):
185
186
        workspace = self.workspace
187
188
        app = FastAPI(
189
            title="OCR-D METS Server",
190
            description="Providing simultaneous write-access to mets.xml for OCR-D",
191
        )
192
193
        @app.exception_handler(ValidationError)
194
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
195
            return JSONResponse(status_code=400, content=exc.errors())
196
197
        @app.exception_handler(FileExistsError)
198
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
199
            return JSONResponse(status_code=400, content=str(exc))
200
201
        @app.exception_handler(re.error)
202
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
203
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
204
205
        @app.get("/file", response_model=OcrdFileListModel)
206
        async def find_files(
207
            file_grp : Union[str, None] = None,
208
            file_id : Union[str, None] = None,
209
            page_id : Union[str, None] = None,
210
            mimetype : Union[str, None] = None,
211
        ):
212
            """
213
            Find files in the mets
214
            """
215
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
216
            return OcrdFileListModel.create(found)
217
218
        @app.put('/')
219
        def save():
220
            return workspace.save_mets()
221
222
        @app.post('/file', response_model=OcrdFileModel)
223
        async def add_file(
224
            file_grp : str = Form(),
225
            file_id : str = Form(),
226
            page_id : Union[str, None] = Form(),
227
            mimetype : str = Form(),
228
            url : str = Form(),
229
            local_filename : str = Form(),
230
        ):
231
            """
232
            Add a file
233
            """
234
            # Validate
235
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
236
            # Add to workspace
237
            kwargs = file_resource.dict()
238
            workspace.add_file(**kwargs)
239
            return file_resource
240
241
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
242
        async def file_groups():
243
            return {'file_groups': workspace.mets.file_groups}
244
245
        @app.post('/agent', response_model=OcrdAgentModel)
246
        async def add_agent(agent : OcrdAgentModel):
247
            kwargs = agent.dict()
248
            workspace.mets.add_agent(**kwargs)
249
            return agent
250
251
        @app.get('/agent', response_model=OcrdAgentListModel)
252
        async def agents():
253
            return OcrdAgentListModel.create(workspace.mets.agents)
254
255
        @app.get('/unique_identifier', response_model=str)
256
        async def unique_identifier():
257
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
258
259
        @app.delete('/')
260
        async def stop():
261
            """
262
            Stop the server
263
            """
264
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
265
            workspace.save_mets()
266
            # os._exit because uvicorn catches SystemExit raised by sys.exit
267
            _exit(0)
268
269
        if self.url.startswith('http'):
270
            parsed = urlparse(self.url)
271
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
272
        else:
273
            uvicorn_kwargs = {'uds': self.url}
274
        uvicorn.run(app, **uvicorn_kwargs)
275