ocrd_models.ocrd_file.OcrdFile.local_filename()   A
last analyzed

Complexity

Conditions 4

Size

Total Lines 9
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 9
rs 9.75
c 0
b 0
f 0
cc 4
nop 2
1
"""
2
API to ``mets:file``
3
"""
4
from pathlib import Path
5
from typing import Any, List, Optional, Union
6
7
from ocrd_utils import deprecation_warning
8
9
from .ocrd_xml_base import ET  # type: ignore
10
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT
11
12
13
class OcrdFile():
14
    """
15
    Represents a single ``mets:file/mets:FLocat`` (METS file entry).
16
    """
17
18
    def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=None, url=None, ID=None, loctype=None):
19
        """
20
        Args:
21
            el (LxmlElement): etree Element of the ``mets:file`` this represents. Create new if not provided
22
        Keyword Args:
23
            mets (OcrdMets): Containing :py:class:`ocrd_models.ocrd_mets.OcrdMets`.
24
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
25
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
26
            url (string): original ``@xlink:href`` of this ``mets:file``
27
            local_filename (string): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
28
            ID (string): ``@ID`` of this ``mets:file``
29
            loctype (string): DEPRECATED do not use
30
        """
31
        if el is None:
32
            raise ValueError("Must provide mets:file element this OcrdFile represents")
33
        if loctype:
34
            deprecation_warning("'loctype' is not supported in OcrdFile anymore, use 'url' or 'local_filename'")
35
        self._el = el
36
        self.mets = mets
37
        self.ID = ID
38
        self.mimetype = mimetype
39
        self.pageId = pageId
40
41
        if local_filename:
42
            self.local_filename = local_filename
43
        if url:
44
            self.url = url
45
46
    def __str__(self):
47
        """
48
        String representation of this ``mets:file``.
49
        """
50
        #  props = '\n\t'.join([
51
        #      ' : '.join([k, getattr(self, k) if getattr(self, k) else '---'])
52
        #      for k in ['mimetype', 'ID', 'url', 'local_filename']
53
        #  ])
54
        #  return 'OcrdFile[' + '\n\t' + props + '\n\t]'
55
        props = ', '.join([
56
            '='.join([k, str(getattr(self, k)) if getattr(self, k) else '---'])
57
            for k in ['ID', 'mimetype', 'url', 'local_filename']
58
        ])
59
        try:
60
            fileGrp = self.fileGrp
61
        except ValueError:
62
            fileGrp = '---'
63
        return '<OcrdFile fileGrp=%s %s]/> ' % (fileGrp, props)
64
65
    def __eq__(self, other):
66
        return (self.ID == other.ID and
67
                self.url == other.url and
68
                self.local_filename == other.local_filename  # and
69
                # EXT_TO_MIME[MIME_TO_EXT[self.mimetype]] == EXT_TO_MIME[MIME_TO_EXT[other.mimetype]] and
70
                # self.fileGrp == other.fileGrp
71
                )
72
73
    @property
74
    def basename(self) -> str:
75
        """
76
        Get the ``.name`` of the local file
77
        """
78
        if not self.local_filename:
79
            return ''
80
        return Path(self.local_filename).name
81
82
    @property
83
    def extension(self) -> str:
84
        if not self.local_filename:
85
            return ''
86
        return ''.join(Path(self.local_filename).suffixes)
87
88
    @property
89
    def basename_without_extension(self) -> str:
90
        """
91
        Get the ``os.path.basename`` of the local file, if any, with extension removed.
92
        """
93
        if not self.local_filename:
94
            return ''
95
        return Path(self.local_filename).name[:-len(self.extension)]
96
97
    @property
98
    def ID(self) -> str:
99
        """
100
        Get the ``@ID`` of the ``mets:file``.
101
        """
102
        return self._el.get('ID')
103
104
    @ID.setter
105
    def ID(self, ID: Optional[str]) -> None:
106
        """
107
        Set the ``@ID`` of the ``mets:file`` to :py:attr:`ID`.
108
        """
109
        if ID is None:
110
            return
111
        if self.mets is None:
112
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
113
        old_id = self.ID
114
        self._el.set('ID', ID)
115
        # also update the references in the physical structmap
116
        for pageId in self.mets.remove_physical_page_fptr(fileId=old_id):
117
            self.pageId = pageId
118
119
    @property
120
    def pageId(self) -> str:
121
        """
122
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
123
        (physical page manifestation).
124
        """
125
        if self.mets is None:
126
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
127
        return self.mets.get_physical_page_for_file(self)
128
129
    @pageId.setter
130
    def pageId(self, pageId: Optional[str]) -> None:
131
        """
132
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
133
        (physical page manifestation) to :py:attr:`pageId`.
134
        """
135
        if pageId is None:
136
            return
137
        if self.mets is None:
138
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
139
        self.mets.set_physical_page_for_file(pageId, self)
140
141
    @property
142
    def loctypes(self) -> List[str]:
143
        """
144
        Get the ``@LOCTYPE``s of the ``mets:file``.
145
        """
146
        return [x.get('LOCTYPE') for x in self._el.findall('mets:FLocat', NS)]
147
148
    @property
149
    def mimetype(self) -> str:
150
        """
151
        Get the ``@MIMETYPE`` of the ``mets:file``.
152
        """
153
        return self._el.get('MIMETYPE')
154
155
    @mimetype.setter
156
    def mimetype(self, mimetype: Optional[str]) -> None:
157
        """
158
        Set the ``@MIMETYPE`` of the ``mets:file`` to :py:attr:`mimetype`.
159
        """
160
        if mimetype is None:
161
            return
162
        self._el.set('MIMETYPE', mimetype)
163
164
    @property
165
    def fileGrp(self) -> str:
166
        """
167
        The ``@USE`` of the containing ``mets:fileGrp``
168
        """
169
        parent = self._el.getparent()
170
        if parent is not None:
171
            return self._el.getparent().get('USE')
172
        raise ValueError("OcrdFile not related to METS")
173
174
    @property
175
    def url(self) -> str:
176
        """
177
        Get the remote/original URL ``@xlink:href`` of this ``mets:file``.
178
        """
179
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
180
        if el_FLocat is not None:
181
            return el_FLocat.get("{%s}href" % NS["xlink"])
182
        return ''
183
184
    @url.setter
185
    def url(self, url: Optional[str]) -> None:
186
        """
187
        Set the remote/original URL ``@xlink:href`` of this ``mets:file`` to :py:attr:`url`.
188
        """
189
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
190
        if url is None:
191
            if el_FLocat is not None:
192
                self._el.remove(el_FLocat)
193
            return
194
        if el_FLocat is None:
195
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
196
        el_FLocat.set("{%s}href" % NS["xlink"], url)
197
        el_FLocat.set("LOCTYPE", "URL")
198
199
    @property
200
    def local_filename(self) -> Optional[str]:
201
        """
202
        Get the local/cached ``@xlink:href`` of this ``mets:file``.
203
        """
204
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
205
        if el_FLocat is not None:
206
            return el_FLocat.get("{%s}href" % NS["xlink"])
207
        return None
208
209
    @local_filename.setter
210
    def local_filename(self, fname: Optional[Union[Path, str]]):
211
        """
212
        Set the local/cached ``@xlink:href`` of this ``mets:file`` to :py:attr:`local_filename`.
213
        """
214
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
215
        if not fname:
216
            if el_FLocat is not None:
217
                self._el.remove(el_FLocat)
218
            return
219
        else:
220
            fname = str(fname)
221
        if el_FLocat is None:
222
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
223
        el_FLocat.set("{%s}href" % NS["xlink"], fname)
224
        el_FLocat.set("LOCTYPE", "OTHER")
225
        el_FLocat.set("OTHERLOCTYPE", "FILE")
226
227
228
class ClientSideOcrdFile:
229
    """
230
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
231
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
232
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
233
    """
234
235
    def __init__(
236
        self,
237
        el,  # pylint: disable=unused-argument
238
        mimetype: str = '',
239
        pageId: str = '',
240
        loctype: str = 'OTHER',
241
        local_filename: Optional[str] = None,
242
        mets: Any = None,  # pylint: disable=unused-argument
243
        url: str = '',
244
        ID: str = '',
245
        fileGrp: str = ''
246
    ):
247
        """
248
        Args:
249
            el (): ignored
250
        Keyword Args:
251
            mets (): ignored
252
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
253
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
254
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
255
            url (string):  ``@xlink:href`` of this ``mets:file`` (if ``@LOCTYPE==URL``)
256
            local_filename (): ``@xlink:href`` of this ``mets:file`` (if ``@LOCTYPE==FILE @OTHERLOCTYPE==FILE``)
257
            ID (string): ``@ID`` of this ``mets:file``
258
        """
259
        self.ID = ID
260
        self.mimetype = mimetype
261
        self.local_filename = local_filename
262
        self.url = url
263
        self.loctype = loctype
264
        self.pageId = pageId
265
        self.fileGrp = fileGrp
266
267
    def __str__(self):
268
        props = ', '.join([
269
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
270
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
271
        ])
272
        return '<ClientSideOcrdFile %s]/>' % (props)
273
274
275
OcrdFileType = Union[OcrdFile, ClientSideOcrdFile]
276