Passed
Pull Request — master (#966)
by Konstantin
03:22
created

ocrd.mets_server.ClientSideOcrdFile.__init__()   A

Complexity

Conditions 1

Size

Total Lines 19
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 19
rs 10
c 0
b 0
f 0
cc 1
nop 10

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
# METS server functionality
3
"""
4
import re
5
from os import environ, _exit
6
from io import BytesIO
7
from typing import Any, Dict, Optional, Union, List, Tuple
8
9
from fastapi import FastAPI, Request, File, Form, UploadFile
10
from fastapi.responses import JSONResponse
11
from requests import request, Session as requests_session
12
from requests_unixsocket import Session as requests_unixsocket_session
13
from pydantic import BaseModel, Field, constr, ValidationError
14
15
import uvicorn
16
17
from ocrd_models import OcrdMets
18
from ocrd_utils import initLogging, getLogger, deprecated_alias
19
20
#
21
# XXX HACKS TODO
22
#
23
initLogging()
24
25
#
26
# Models
27
#
28
29
class OcrdFileModel(BaseModel):
30
    file_grp : str = Field()
31
    file_id : str = Field()
32
    mimetype : str = Field()
33
    page_id : Union[str, None] = Field()
34
    url : str = Field()
35
36
class OcrdAgentModel(BaseModel):
37
    name : str = Field()
38
    _type : str = Field()
39
    role : str = Field()
40
    otherrole : str = Field()
41
    othertype : str = Field()
42
    notes : List[Tuple[Dict[str, str], Optional[str]]] = Field()
43
44
class OcrdFileListModel(BaseModel):
45
    files : List[OcrdFileModel] = Field()
46
47
class OcrdFileGroupListModel(BaseModel):
48
    file_groups : List[str] = Field()
49
50
#
51
# Client
52
#
53
54
class ClientSideOcrdFile:
55
    """
56
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
57
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
58
    this represents the response of the :py:class:`ocrd.mets_server.OcrdWorkspaceServer`.
59
    """
60
61
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
62
        """
63
        Args:
64
            el (): ignored
65
        Keyword Args:
66
            mets (): ignored
67
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
68
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
69
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
70
            local_filename (): ignored
71
            url (string): ``@xlink:href`` of this ``mets:file``
72
            ID (string): ``@ID`` of this ``mets:file``
73
        """
74
        self.ID = ID
75
        self.mimetype = mimetype
76
        self.url = url
77
        self.loctype = loctype
78
        self.pageId = pageId
79
        self.fileGrp = fileGrp
80
81
class ClientSideOcrdMets():
82
    """
83
    Replacement for :py:class:`ocrd_models.ocrd_mets.OcrdMets` with overrides for
84
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_files`,
85
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.find_all_files`, and
86
    :py:meth:`ocrd_models.ocrd_mets.OcrdMets.add_file` to query via HTTP a
87
    :py:class:`ocrd.mets_server.OcrdMetsServer`.
88
    """
89
90
    def __init__(self, host, port, socket):
91
        self.log = getLogger('ocrd.workspace_client')
92
        if socket:
93
            self.url = f'http+unix://{socket.replace("/", "%2F")}'
94
            self.session = requests_unixsocket_session()
95
        else:
96
            self.url = f'http://{host}:{port}'
97
            self.session = requests_session()
98
99
    def find_files(self, **kwargs):
100
        r = self.session.request('GET', self.url, params={**kwargs})
101
        for f in r.json()['files']:
102
            yield ClientSideOcrdFile(None, ID=f['file_id'], pageId=f['page_id'], fileGrp=f['file_grp'], url=f['url'], mimetype=f['mimetype'])
103
104
    def find_all_files(self, *args, **kwargs):
105
        return list(self.find_files(*args, **kwargs))
106
107
    def add_agent(self, *args, **kwargs):
108
        return self.session.request('POST', f'{self.url}/agent', data=OcrdAgentModel(**kwargs))
109
110
    @property
111
    def file_groups():
112
        return self.session.request('GET', f'{self.url}/file_groups').json()['file_groups']
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable self does not seem to be defined.
Loading history...
113
114
    @deprecated_alias(pageId="page_id")
115
    @deprecated_alias(ID="file_id")
116
    def add_file(self, file_grp, content=None, file_id=None, url=None, mimetype=None, page_id=None, **kwargs):
117
        r = self.session.request(
118
            'POST',
119
            self.url,
120
            data=OcrdFileModel(
121
                file_id=file_id,
122
                file_grp=file_grp,
123
                page_id=page_id,
124
                mimetype=mimetype,
125
                url=url).dict(),
126
            files={'data': content}
127
        )
128
129
    def save(self):
130
        self.session.request('PUT', self.url)
131
132
133
#
134
# Server
135
#
136
137
class OcrdMetsServer():
138
139
    def __init__(self, workspace, host, port, socket):
140
        self.workspace = workspace
141
        self.host = host
142
        self.port = port
143
        self.socket = socket
144
        self.log = getLogger('ocrd.workspace_client')
145
146
147
    def startup(self):
148
149
        # XXX HACK 
150
        # circumventing dependency injection like this is bad and
151
        # needs to be refactored once it's all runnign
152
        workspace = self.workspace
153
154
        app = FastAPI(
155
            title="OCR-D METS Server",
156
            description="Providing simultaneous write-access to mets.xml for OCR-D",
157
        )
158
159
        @app.exception_handler(ValidationError)
160
        async def exception_handler_invalid400(request: Request, exc: ValidationError):
161
            return JSONResponse(status_code=400, content=exc.errors())
162
163
        @app.exception_handler(FileExistsError)
164
        async def exception_handler_invalid400(request: Request, exc: FileExistsError):
165
            return JSONResponse(status_code=400, content=str(exc))
166
167
        @app.exception_handler(re.error)
168
        async def exception_handler_invalid400(request: Request, exc: re.error):
169
            return JSONResponse(status_code=400, content=f'invalid regex: {exc}')
170
171
        @app.get("/", response_model=OcrdFileListModel)
172
        async def find_files(
173
            file_grp : Union[str, None] = None,
174
            file_id : Union[str, None] = None,
175
            page_id : Union[str, None] = None,
176
            mimetype : Union[str, None] = None,
177
        ):
178
            """
179
            Find files in the mets
180
            """
181
            found = workspace.mets.find_all_files(fileGrp=file_grp, ID=file_id, pageId=page_id, mimetype=mimetype)
182
            return OcrdFileListModel(
183
                files=[OcrdFileModel(file_grp=of.fileGrp, file_id=of.ID, mimetype=of.mimetype, page_id=of.pageId, url=of.url) for of in found]
184
            )
185
186
        @app.put('/')
187
        def save():
188
            return workspace.save_mets()
189
190
        @app.post('/', response_model=OcrdFileModel)
191
        async def add_file(
192
            data : bytes = File(),
193
            file_grp : str = Form(),
194
            file_id : str = Form(),
195
            page_id : Union[str, None] = Form(),
196
            mimetype : str = Form(),
197
            url : str = Form(),
198
        ):
199
            """
200
            Add a file
201
            """
202
            # Validate
203
            file_resource = OcrdFileModel(file_grp=file_grp, file_id=file_id, page_id=page_id, mimetype=mimetype, url=url)
204
            # Add to workspace
205
            kwargs = file_resource.dict()
206
            kwargs['page_id'] = page_id
207
            kwargs['content'] = data
208
            kwargs['local_filename'] = kwargs.pop('url')
209
            workspace.add_file(**kwargs)
210
            workspace.save_mets()
211
            return file_resource
212
213
        @app.get('/file_groups', response_model=OcrdFileGroupListModel)
214
        async def file_groups():
215
            return {'file_groups': workspace.mets.file_groups}
216
217
        @app.post('/agent', response_model=OcrdAgentModel)
218
        async def add_agent(agent : OcrdAgentModel):
219
            kwargs = agent.dict()
220
            workspace.mets.add_agent(**kwargs)
221
            workspace.save_mets()
222
            return agent
223
224
        @app.delete('/')
225
        async def stop():
226
            """
227
            Stop the server
228
            """
229
            getLogger('ocrd_models.ocrd_mets').info('Shutting down')
230
            workspace.save_mets()
231
            # XXX HACK os._exit to not trigger SystemExit caught by uvicorn with sys.exit
232
            _exit(0)
233
234
235
        uvicorn.run(app, host=self.host, port=self.port, uds=self.socket)
236
237
238