ocrd_models.ocrd_file.OcrdFile.__init__()   A
last analyzed

Complexity

Conditions 5

Size

Total Lines 27
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 14
dl 0
loc 27
rs 9.2333
c 0
b 0
f 0
cc 5
nop 9

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
"""
2
API to ``mets:file``
3
"""
4
from pathlib import Path
5
from typing import Any, List, Optional, Union
6
7
from ocrd_utils import deprecation_warning
8
9
from .ocrd_xml_base import ET  # type: ignore
10
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT
11
12
13
class OcrdFile():
14
    """
15
    Represents a single ``mets:file/mets:FLocat`` (METS file entry).
16
    """
17
18
    def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=None, url=None, ID=None, loctype=None):
19
        """
20
        Args:
21
            el (LxmlElement): etree Element of the ``mets:file`` this represents. Create new if not provided
22
        Keyword Args:
23
            mets (OcrdMets): Containing :py:class:`ocrd_models.ocrd_mets.OcrdMets`.
24
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
25
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
26
            url (string): original ``@xlink:href`` of this ``mets:file``
27
            local_filename (string): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
28
            ID (string): ``@ID`` of this ``mets:file``
29
            loctype (string): DEPRECATED do not use
30
        """
31
        if el is None:
32
            raise ValueError("Must provide mets:file element this OcrdFile represents")
33
        if loctype:
34
            deprecation_warning("'loctype' is not supported in OcrdFile anymore, use 'url' or 'local_filename'")
35
        self._el = el
36
        self.mets = mets
37
        self.ID = ID
38
        self.mimetype = mimetype
39
        self.pageId = pageId
40
41
        if local_filename:
42
            self.local_filename = local_filename
43
        if url:
44
            self.url = url
45
46
    def __str__(self):
47
        """
48
        String representation of this ``mets:file``.
49
        """
50
        #  props = '\n\t'.join([
51
        #      ' : '.join([k, getattr(self, k) if getattr(self, k) else '---'])
52
        #      for k in ['mimetype', 'ID', 'url', 'local_filename']
53
        #  ])
54
        #  return 'OcrdFile[' + '\n\t' + props + '\n\t]'
55
        props = ', '.join([
56
            '='.join([k, str(getattr(self, k)) if getattr(self, k) else '---'])
57
            for k in ['ID', 'mimetype', 'url', 'local_filename']
58
        ])
59
        try:
60
            fileGrp = self.fileGrp
61
        except ValueError:
62
            fileGrp = '---'
63
        return '<OcrdFile fileGrp=%s %s]/> ' % (fileGrp, props)
64
65
    def __eq__(self, other):
66
        return (self.ID == other.ID and
67
                self.url == other.url and
68
                self.local_filename == other.local_filename  # and
69
                # EXT_TO_MIME[MIME_TO_EXT[self.mimetype]] == EXT_TO_MIME[MIME_TO_EXT[other.mimetype]] and
70
                # self.fileGrp == other.fileGrp
71
                )
72
73
    @property
74
    def basename(self) -> str:
75
        """
76
        Get the ``.name`` of the local file
77
        """
78
        if not self.local_filename:
79
            return ''
80
        return Path(self.local_filename).name
81
82
    @property
83
    def extension(self) -> str:
84
        if not self.local_filename:
85
            return ''
86
        return ''.join(Path(self.local_filename).suffixes)
87
88
    @property
89
    def basename_without_extension(self) -> str:
90
        """
91
        Get the ``os.path.basename`` of the local file, if any, with extension removed.
92
        """
93
        if not self.local_filename:
94
            return ''
95
        return Path(self.local_filename).name[:-len(self.extension)]
96
97
    @property
98
    def ID(self) -> str:
99
        """
100
        Get the ``@ID`` of the ``mets:file``.
101
        """
102
        return self._el.get('ID')
103
104
    @ID.setter
105
    def ID(self, ID: Optional[str]) -> None:
106
        """
107
        Set the ``@ID`` of the ``mets:file`` to :py:attr:`ID`.
108
        """
109
        if ID is None:
110
            return
111
        if self.mets is None:
112
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
113
        old_id = self.ID
114
        self._el.set('ID', ID)
115
        # also update the references in the physical structmap
116
        for pageId in self.mets.remove_physical_page_fptr(fileId=old_id):
117
            self.pageId = pageId
118
119
    @property
120
    def pageId(self) -> str:
121
        """
122
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
123
        (physical page manifestation).
124
        """
125
        if self.mets is None:
126
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
127
        return self.mets.get_physical_page_for_file(self)
128
129
    @pageId.setter
130
    def pageId(self, pageId: Optional[str]) -> None:
131
        """
132
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
133
        (physical page manifestation) to :py:attr:`pageId`.
134
        """
135
        if pageId is None:
136
            return
137
        if self.mets is None:
138
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
139
        self.mets.set_physical_page_for_file(pageId, self)
140
141
    @property
142
    def loctypes(self) -> List[str]:
143
        """
144
        Get the ``@LOCTYPE``s of the ``mets:file``.
145
        """
146
        return [x.get('LOCTYPE') for x in self._el.findall('mets:FLocat', NS)]
147
148
    @property
149
    def mimetype(self) -> str:
150
        """
151
        Get the ``@MIMETYPE`` of the ``mets:file``.
152
        """
153
        return self._el.get('MIMETYPE')
154
155
    @mimetype.setter
156
    def mimetype(self, mimetype: Optional[str]) -> None:
157
        """
158
        Set the ``@MIMETYPE`` of the ``mets:file`` to :py:attr:`mimetype`.
159
        """
160
        if mimetype is None:
161
            return
162
        self._el.set('MIMETYPE', mimetype)
163
164
    @property
165
    def fileGrp(self) -> str:
166
        """
167
        The ``@USE`` of the containing ``mets:fileGrp``
168
        """
169
        parent = self._el.getparent()
170
        if parent is not None:
171
            return self._el.getparent().get('USE')
172
        raise ValueError("OcrdFile not related to METS")
173
174
    @property
175
    def url(self) -> str:
176
        """
177
        Get the remote/original URL ``@xlink:href`` of this ``mets:file``.
178
        """
179
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
180
        if el_FLocat is not None:
181
            return el_FLocat.get("{%s}href" % NS["xlink"])
182
        return ''
183
184
    @url.setter
185
    def url(self, url: Optional[str]) -> None:
186
        """
187
        Set the remote/original URL ``@xlink:href`` of this ``mets:file`` to :py:attr:`url`.
188
        """
189
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
190
        if url is None:
191
            if el_FLocat is not None:
192
                self._el.remove(el_FLocat)
193
            return
194
        if el_FLocat is None:
195
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
196
        el_FLocat.set("{%s}href" % NS["xlink"], url)
197
        el_FLocat.set("LOCTYPE", "URL")
198
199
    @property
200
    def local_filename(self) -> Optional[str]:
201
        """
202
        Get the local/cached ``@xlink:href`` of this ``mets:file``.
203
        """
204
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
205
        if el_FLocat is not None:
206
            return el_FLocat.get("{%s}href" % NS["xlink"])
207
        return None
208
209
    @local_filename.setter
210
    def local_filename(self, fname: Optional[Union[Path, str]]):
211
        """
212
        Set the local/cached ``@xlink:href`` of this ``mets:file`` to :py:attr:`local_filename`.
213
        """
214
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
215
        if not fname:
216
            if el_FLocat is not None:
217
                self._el.remove(el_FLocat)
218
            return
219
        else:
220
            fname = str(fname)
221
        if el_FLocat is None:
222
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
223
        el_FLocat.set("{%s}href" % NS["xlink"], fname)
224
        el_FLocat.set("LOCTYPE", "OTHER")
225
        el_FLocat.set("OTHERLOCTYPE", "FILE")
226
227
228
class ClientSideOcrdFile:
229
    """
230
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
231
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
232
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
233
    """
234
235
    def __init__(
236
        self,
237
        el,  # pylint: disable=unused-argument
238
        mimetype: str = '',
239
        pageId: str = '',
240
        loctype: str = 'OTHER',
241
        local_filename: Optional[str] = None,
242
        mets: Any = None,  # pylint: disable=unused-argument
243
        url: str = '',
244
        ID: str = '',
245
        fileGrp: str = ''
246
    ):
247
        """
248
        Args:
249
            el (): ignored
250
        Keyword Args:
251
            mets (): ignored
252
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
253
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
254
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
255
            url (string):  ``@xlink:href`` of this ``mets:file`` (if ``@LOCTYPE==URL``)
256
            local_filename (): ``@xlink:href`` of this ``mets:file`` (if ``@LOCTYPE==FILE @OTHERLOCTYPE==FILE``)
257
            ID (string): ``@ID`` of this ``mets:file``
258
        """
259
        self.ID = ID
260
        self.mimetype = mimetype
261
        self.local_filename = local_filename
262
        self.url = url
263
        self.loctype = loctype
264
        self.pageId = pageId
265
        self.fileGrp = fileGrp
266
267
    def __str__(self):
268
        props = ', '.join([
269
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
270
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
271
        ])
272
        return '<ClientSideOcrdFile %s]/>' % (props)
273
274
275
OcrdFileType = Union[OcrdFile, ClientSideOcrdFile]
276