Passed
Pull Request — master (#1157)
by Konstantin
02:59
created

ocrd_models.ocrd_file.OcrdFile.contentids()   A

Complexity

Conditions 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 8
rs 10
c 0
b 0
f 0
cc 2
nop 1
1
"""
2
API to ``mets:file``
3
"""
4
from os.path import splitext, basename
5
from pathlib import Path
6
7
from ocrd_utils import deprecation_warning
8
9
from .ocrd_xml_base import ET
10
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT, TAG_METS_FILE
11
12
class OcrdFile():
13
    """
14
    Represents a single ``mets:file/mets:FLocat`` (METS file entry).
15
    """
16
17
    def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=None, url=None, ID=None, loctype=None):
18
        """
19
        Args:
20
            el (LxmlElement): etree Element of the ``mets:file`` this represents. Create new if not provided
21
        Keyword Args:
22
            mets (OcrdMets): Containing :py:class:`ocrd_models.ocrd_mets.OcrdMets`.
23
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
24
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
25
            local_filename (string): Local filename
26
            url (string): original ``@xlink:href`` of this ``mets:file``
27
            local_filename (Path): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
28
            ID (string): ``@ID`` of this ``mets:file``
29
            loctype (string): DEPRECATED do not use
30
            contentids (string): ``@CONTENTIDS`` of the ``mets:div`` in the ``mets:structMap[@TYPE="PHYSICAL]`` this file manifests
31
        """
32
        if el is None:
33
            raise ValueError("Must provide mets:file element this OcrdFile represents")
34
        if loctype:
35
            deprecation_warning("'loctype' is not supported in OcrdFile anymore, use 'url' or 'local_filename'")
36
        self._el = el
37
        self.mets = mets
38
        self.ID = ID
39
        self.mimetype = mimetype
40
        self.pageId = pageId
41
42
        if local_filename:
43
            self.local_filename = Path(local_filename)
44
        if url:
45
            self.url = url
46
47
    def __str__(self):
48
        """
49
        String representation of this ``mets:file``.
50
        """
51
        #  props = '\n\t'.join([
52
        #      ' : '.join([k, getattr(self, k) if getattr(self, k) else '---'])
53
        #      for k in ['mimetype', 'ID', 'url', 'local_filename']
54
        #  ])
55
        #  return 'OcrdFile[' + '\n\t' + props + '\n\t]'
56
        props = ', '.join([
57
            '='.join([k, str(getattr(self, k)) if getattr(self, k) else '---'])
58
            for k in ['ID', 'mimetype', 'url', 'local_filename']
59
        ])
60
        try:
61
            fileGrp = self.fileGrp
62
        except ValueError:
63
            fileGrp = '---'
64
        return '<OcrdFile fileGrp=%s %s]/> ' % (fileGrp, props)
65
66
    def __eq__(self, other):
67
        return self.ID == other.ID \
68
           and self.url == other.url \
69
           and self.local_filename == other.local_filename
70
               # EXT_TO_MIME[MIME_TO_EXT[self.mimetype]] == EXT_TO_MIME[MIME_TO_EXT[other.mimetype]] and \
71
               # self.fileGrp == other.fileGrp
72
73
    @property
74
    def basename(self):
75
        """
76
        Get the ``.name`` of the local file
77
        """
78
        if not self.local_filename:
79
            return
80
        return self.local_filename.name
81
82
    @property
83
    def extension(self):
84
        if not self.local_filename:
85
            return
86
        return ''.join(self.local_filename.suffixes)
87
88
    @property
89
    def basename_without_extension(self):
90
        """
91
        Get the ``os.path.basename`` of the local file, if any, with extension removed.
92
        """
93
        if not self.local_filename:
94
            return
95
        return self.local_filename.name[:-len(self.extension)]
96
97
    @property
98
    def ID(self):
99
        """
100
        Get the ``@ID`` of the ``mets:file``.
101
        """
102
        return self._el.get('ID')
103
104
    @ID.setter
105
    def ID(self, ID):
106
        """
107
        Set the ``@ID`` of the ``mets:file`` to :py:attr:`ID`.
108
        """
109
        if ID is None:
110
            return
111
        if self.mets is None:
112
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
113
        old_id = self.ID
114
        self._el.set('ID', ID)
115
        # also update the references in the physical structmap
116
        for pageId in self.mets.remove_physical_page_fptr(fileId=old_id):
117
            self.pageId = pageId
118
119
    @property
120
    def pageId(self):
121
        """
122
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation).
123
        """
124
        if self.mets is None:
125
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
126
        return self.mets.get_physical_page_for_file(self)
127
128
    @pageId.setter
129
    def pageId(self, pageId):
130
        """
131
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation) to :py:attr:`pageId`.
132
        """
133
        if pageId is None:
134
            return
135
        if self.mets is None:
136
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
137
        self.mets.set_physical_page_for_file(pageId, self)
138
139
    @property
140
    def contentids(self):
141
        """
142
        Get the ``@CONTENTIDS`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``.
143
        """
144
        if self.mets is None:
145
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
146
        return self.mets.get_contentids_for_file(self)
147
148
    @property
149
    def loctypes(self):
150
        """
151
        Get the ``@LOCTYPE``s of the ``mets:file``.
152
        """
153
        return [x.get('LOCTYPE') for x in  self._el.findall('mets:FLocat', NS)]
154
155
    @property
156
    def mimetype(self):
157
        """
158
        Get the ``@MIMETYPE`` of the ``mets:file``.
159
        """
160
        return self._el.get('MIMETYPE')
161
162
    @mimetype.setter
163
    def mimetype(self, mimetype):
164
        """
165
        Set the ``@MIMETYPE`` of the ``mets:file`` to :py:attr:`mimetype`.
166
        """
167
        if mimetype is None:
168
            return
169
        self._el.set('MIMETYPE', mimetype)
170
171
    @property
172
    def fileGrp(self):
173
        """
174
        The ``@USE`` of the containing ``mets:fileGrp``
175
        """
176
        parent = self._el.getparent()
177
        if parent is not None:
178
            return self._el.getparent().get('USE')
179
        raise ValueError("OcrdFile not related to METS")
180
181
    @property
182
    def url(self):
183
        """
184
        Get the remote/original URL ``@xlink:href`` of this ``mets:file``.
185
        """
186
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
187
        if el_FLocat is not None:
188
            return el_FLocat.get("{%s}href" % NS["xlink"])
189
        return ''
190
191
    @url.setter
192
    def url(self, url):
193
        """
194
        Set the remote/original URL ``@xlink:href`` of this ``mets:file`` to :py:attr:`url`.
195
        """
196
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
197
        if url is None:
198
            if el_FLocat:
199
                self._el.remove(el_FLocat)
200
            return
201
        if el_FLocat is None:
202
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
203
        el_FLocat.set("{%s}href" % NS["xlink"], url)
204
        el_FLocat.set("LOCTYPE", "URL")
205
206
    @property
207
    def local_filename(self):
208
        """
209
        Get the local/cached ``@xlink:href`` of this ``mets:file``.
210
        """
211
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
212
        if el_FLocat is not None:
213
            return Path(el_FLocat.get("{%s}href" % NS["xlink"]))
214
215
    @local_filename.setter
216
    def local_filename(self, fname):
217
        """
218
        Set the local/cached ``@xlink:href`` of this ``mets:file`` to :py:attr:`local_filename`.
219
        """
220
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
221
        if not fname:
222
            if el_FLocat is not None:
223
                self._el.remove(el_FLocat)
224
            return
225
        if el_FLocat is None:
226
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
227
        el_FLocat.set("{%s}href" % NS["xlink"], str(fname))
228
        el_FLocat.set("LOCTYPE", "OTHER")
229
        el_FLocat.set("OTHERLOCTYPE", "FILE")
230
231
232
class ClientSideOcrdFile:
233
    """
234
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
235
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
236
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
237
    """
238
239
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None, contentids=None):
240
        """
241
        Args:
242
            el (): ignored
243
        Keyword Args:
244
            mets (): ignored
245
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
246
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
247
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
248
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
249
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
250
            ID (string): ``@ID`` of this ``mets:file``
251
            fileGrp (string): ``@USE`` of the ``mets:fileGrp`` this file belongs to
252
        """
253
        self.ID = ID
254
        self.mimetype = mimetype
255
        self.local_filename = local_filename
256
        self.url = url
257
        self.loctype = loctype
258
        self.pageId = pageId
259
        self.fileGrp = fileGrp
260
261
    def __str__(self):
262
        props = ', '.join([
263
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
264
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
265
        ])
266
        return '<ClientSideOcrdFile %s]/>' % (props)
267