Passed
Push — master ( 5d6273...744804 )
by Konstantin
02:34
created

ocrd.mets_server.OcrdMetsServer.startup()   B

Complexity

Conditions 2

Size

Total Lines 95
Code Lines 64

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 64
dl 0
loc 95
rs 8.1781
c 0
b 0
f 0
cc 2
nop 1

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
from urllib.parse import urlparse
9
10
from fastapi import FastAPI, Request, File, Form, Response
11
from fastapi.responses import JSONResponse
12
from requests import request, Session as requests_session
13
from requests_unixsocket import Session as requests_unixsocket_session
14
from pydantic import BaseModel, Field, ValidationError
15
16
import uvicorn
17
18
from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent
19
from ocrd_utils import initLogging, getLogger, deprecated_alias
20
21
#
22
# XXX HACKS TODO
23
#
24
initLogging()
25
26
#
27
# Models
28
#
29
30
class OcrdFileModel(BaseModel):
31
    file_grp : str = Field()
32
    file_id : str = Field()
33
    mimetype : str = Field()
34
    page_id : Union[str, None] = Field()
35
    url : Union[str, None] = Field()
36
    local_filename : Union[str, None] = Field()
37
38
    @staticmethod
39
    def create(file_grp : str, file_id : str, page_id : Union[str, None], url : str, local_filename : str, mimetype : str):
40
        return OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
41
42
class OcrdAgentModel(BaseModel):
43
    name : str = Field()
44
    _type : str = Field()
45
    role : str = Field()
46
    otherrole : Optional[str] = Field()
47
    othertype : str = Field()
48
    notes : Optional[List[Tuple[Dict[str, str], Optional[str]]]] = Field()
49
50
    @staticmethod
51
    def create(name : str, _type : str, role : str, otherrole : str, othertype : str, notes : List[Tuple[Dict[str, str], Optional[str]]]):
52
        return OcrdAgentModel(name=name, _type=_type, role=role, otherrole=otherrole, othertype=othertype, notes=notes)
53
54
55
class OcrdFileListModel(BaseModel):
56
    files : List[OcrdFileModel] = Field()
57
58
    @staticmethod
59
    def create(files : List[OcrdFile]):
60
        return OcrdFileListModel(
61
            files=[OcrdFileModel.create(file_grp=f.fileGrp, file_id=f.ID, mimetype=f.mimetype, page_id=f.pageId, url=f.url, local_filename=f.local_filename) for f in files]
62
        )
63
64
class OcrdFileGroupListModel(BaseModel):
65
    file_groups : List[str] = Field()
66
67
    @staticmethod
68
    def create(file_groups : List[str]):
69
        return OcrdFileGroupListModel(file_groups=file_groups)
70
71
class OcrdAgentListModel(BaseModel):
72
    agents : List[OcrdAgentModel] = Field()
73
74
    @staticmethod
75
    def create(agents : List[OcrdAgent]):
76
        return OcrdAgentListModel(
77
            agents=[OcrdAgentModel(name=a.name, _type=a.type, role=a.role, otherrole=a.otherrole, othertype=a.othertype, notes=a.notes) for a in agents]
78
        )
79
80
#
81
# Client
82
#
83
84
85
class ClientSideOcrdMets():
86
    """
87
    Partial substitute for :py:class:`ocrd_models.ocrd_mets.OcrdMets` which provides for
88
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
89
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
90
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_agent`,
91
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.agents`,
92
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
93
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
94
    """
95
96
    def __init__(self, url):
97
        protocol = 'tcp' if url.startswith('http://') else 'uds'
98
        self.log = getLogger(f'ocrd.mets_client.{protocol}')
99
        self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
100
        self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
101
102
    def __getattr__(self, name):
103
        raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
104
105
    def __str__(self):
106
        return f'<ClientSideOcrdMets[url={self.url}]>'
107
108
    @property
109
    def workspace_path(self):
110
        return self.session.request('GET', f'{self.url}/workspace_path').text
111
112
    @deprecated_alias(ID="file_id")
113
    @deprecated_alias(pageId="page_id")
114
    @deprecated_alias(fileGrp="file_grp")
115
    def find_files(self, **kwargs):
116
        if 'pageId' in kwargs:
117
            kwargs['page_id'] = kwargs.pop('pageId')
118
        if 'ID' in kwargs:
119
            kwargs['file_id'] = kwargs.pop('ID')
120
        if 'fileGrp' in kwargs:
121
            kwargs['file_grp'] = kwargs.pop('fileGrp')
122
        r = self.session.request('GET', f'{self.url}/file', params={**kwargs})
123
        for f in r.json()['files']:
124
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], local_filename=f['local_filename'], mimetype=f['mimetype'])
125
126
    def find_all_files(self, *args, **kwargs):
127
        return list(self.find_files(*args, **kwargs))
128
129
    def add_agent(self, *args, **kwargs):
130
        return self.session.request('POST', f'{self.url}/agent', json=OcrdAgentModel.create(**kwargs).dict())
131
132
    @property
133
    def agents(self):
134
        return [ClientSideOcrdAgent(None, **agent_dict) for agent_dict in self.session.request('GET', f'{self.url}/agent').json()['agents']]
135
136
    @property
137
    def unique_identifier(self):
138
        return self.session.request('GET', f'{self.url}/unique_identifier').text
139
140
    @property
141
    def file_groups(self):
142
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
143
144
    @deprecated_alias(pageId="page_id")
145
    @deprecated_alias(ID="file_id")
146
    def add_file(self, file_grp, content=None, file_id=None, url=None, local_filename=None, mimetype=None, page_id=None, **kwargs):
147
        self.session.request(
148
            'POST',
149
            f'{self.url}/file',
150
            data=OcrdFileModel.create(
151
                file_id=file_id,
152
                file_grp=file_grp,
153
                page_id=page_id,
154
                mimetype=mimetype,
155
                url=url,
156
                local_filename=local_filename).dict(),
157
        )
158
        return ClientSideOcrdFile(
159
                None,
160
                ID=file_id,
161
                fileGrp=file_grp,
162
                url=url,
163
                pageId=page_id,
164
                mimetype=mimetype,
165
                local_filename=local_filename)
166
167
168
    def save(self):
169
        self.session.request('PUT', self.url)
170
171
    def stop(self):
172
        self.session.request('DELETE', self.url)
173
174
#
175
# Server
176
#
177
178
class OcrdMetsServer():
179
180
    def __init__(self, workspace, url):
181
        self.workspace = workspace
182
        self.url = url
183
        self.log = getLogger('ocrd.workspace_client')
184
185
    def shutdown(self):
186
        _exit(0)
187
188
    def startup(self):
189
190
        workspace = self.workspace
191
192
        app = FastAPI(
193
            title="OCR-D METS Server",
194
            description="Providing simultaneous write-access to mets.xml for OCR-D",
195
        )
196
197
        @app.exception_handler(ValidationError)
198
        async def exception_handler_validation_error(request: Request, exc: ValidationError):
199
            return JSONResponse(status_code=400, content=exc.errors())
200
201
        @app.exception_handler(FileExistsError)
202
        async def exception_handler_file_exists(request: Request, exc: FileExistsError):
203
            return JSONResponse(status_code=400, content=str(exc))
204
205
        @app.exception_handler(re.error)
206
        async def exception_handler_invalid_regex(request: Request, exc: re.error):
207
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
208
209
        @app.get("/file", response_model=OcrdFileListModel)
210
        async def find_files(
211
            file_grp : Union[str, None] = None,
212
            file_id : Union[str, None] = None,
213
            page_id : Union[str, None] = None,
214
            mimetype : Union[str, None] = None,
215
        ):
216
            """
217
            Find files in the mets
218
            """
219
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
220
            return OcrdFileListModel.create(found)
221
222
        @app.put('/')
223
        def save():
224
            return workspace.save_mets()
225
226
        @app.post('/file', response_model=OcrdFileModel)
227
        async def add_file(
228
            file_grp : str = Form(),
229
            file_id : str = Form(),
230
            page_id : Union[str, None] = Form(),
231
            mimetype : str = Form(),
232
            url : Union[str, None] = Form(),
233
            local_filename : Union[str, None] = Form(),
234
        ):
235
            """
236
            Add a file
237
            """
238
            # Validate
239
            file_resource = OcrdFileModel.create(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url, local_filename=local_filename)
240
            # Add to workspace
241
            kwargs = file_resource.dict()
242
            workspace.add_file(**kwargs)
243
            return file_resource
244
245
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
246
        async def file_groups():
247
            return {'file_groups': workspace.mets.file_groups}
248
249
        @app.post('/agent', response_model=OcrdAgentModel)
250
        async def add_agent(agent : OcrdAgentModel):
251
            kwargs = agent.dict()
252
            workspace.mets.add_agent(**kwargs)
253
            return agent
254
255
        @app.get('/agent', response_model=OcrdAgentListModel)
256
        async def agents():
257
            return OcrdAgentListModel.create(workspace.mets.agents)
258
259
        @app.get('/unique_identifier', response_model=str)
260
        async def unique_identifier():
261
            return Response(content=workspace.mets.unique_identifier, media_type='text/plain')
262
263
        @app.get('/workspace_path', response_model=str)
264
        async def workspace_path():
265
            return Response(content=workspace.directory, media_type="text/plain")
266
267
        @app.delete('/')
268
        async def stop():
269
            """
270
            Stop the server
271
            """
272
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
273
            workspace.save_mets()
274
            # os._exit because uvicorn catches SystemExit raised by sys.exit
275
            _exit(0)
276
277
        if self.url.startswith('http'):
278
            parsed = urlparse(self.url)
279
            uvicorn_kwargs = {'host': parsed.hostname, 'port': parsed.port}
280
        else:
281
            uvicorn_kwargs = {'uds': self.url}
282
        uvicorn.run(app, **uvicorn_kwargs)
283