Passed
Push — master ( 488518...bfe6a0 )
by Konstantin
03:11
created

ocrd_models.ocrd_file.OcrdFile.local_filename()   A

Complexity

Conditions 4

Size

Total Lines 8
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 12
dl 0
loc 8
rs 9.8
c 0
b 0
f 0
cc 4
nop 2
1
"""
2
API to ``mets:file``
3
"""
4
from os.path import splitext, basename
5
from pathlib import Path
6
7
from ocrd_utils import deprecation_warning
8
9
from .ocrd_xml_base import ET
10
from .constants import NAMESPACES as NS, TAG_METS_FLOCAT, TAG_METS_FILE
11
12
class OcrdFile():
13
    """
14
    Represents a single ``mets:file/mets:FLocat`` (METS file entry).
15
    """
16
17
    def __init__(self, el, mimetype=None, pageId=None, local_filename=None, mets=None, url=None, ID=None, loctype=None):
18
        """
19
        Args:
20
            el (LxmlElement): etree Element of the ``mets:file`` this represents. Create new if not provided
21
        Keyword Args:
22
            mets (OcrdMets): Containing :py:class:`ocrd_models.ocrd_mets.OcrdMets`.
23
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
24
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
25
            local_filename (string): Local filename
26
            url (string): original ``@xlink:href`` of this ``mets:file``
27
            local_filename (Path): ``@xlink:href`` pointing to the locally cached version of the file in the workspace
28
            ID (string): ``@ID`` of this ``mets:file``
29
            loctype (string): DEPRECATED do not use
30
        """
31
        if el is None:
32
            raise ValueError("Must provide mets:file element this OcrdFile represents")
33
        if loctype:
34
            deprecation_warning("'loctype' is not supported in OcrdFile anymore, use 'url' or 'local_filename'")
35
        self._el = el
36
        self.mets = mets
37
        self.ID = ID
38
        self.mimetype = mimetype
39
        self.pageId = pageId
40
41
        if local_filename:
42
            self.local_filename = Path(local_filename)
43
        if url:
44
            self.url = url
45
46
    def __str__(self):
47
        """
48
        String representation of this ``mets:file``.
49
        """
50
        #  props = '\n\t'.join([
51
        #      ' : '.join([k, getattr(self, k) if getattr(self, k) else '---'])
52
        #      for k in ['mimetype', 'ID', 'url', 'local_filename']
53
        #  ])
54
        #  return 'OcrdFile[' + '\n\t' + props + '\n\t]'
55
        props = ', '.join([
56
            '='.join([k, str(getattr(self, k)) if getattr(self, k) else '---'])
57
            for k in ['ID', 'mimetype', 'url', 'local_filename']
58
        ])
59
        try:
60
            fileGrp = self.fileGrp
61
        except ValueError:
62
            fileGrp = '---'
63
        return '<OcrdFile fileGrp=%s %s]/> ' % (fileGrp, props)
64
65
    def __eq__(self, other):
66
        return self.ID == other.ID \
67
           and self.url == other.url \
68
           and self.local_filename == other.local_filename
69
               # EXT_TO_MIME[MIME_TO_EXT[self.mimetype]] == EXT_TO_MIME[MIME_TO_EXT[other.mimetype]] and \
70
               # self.fileGrp == other.fileGrp
71
72
    @property
73
    def basename(self):
74
        """
75
        Get the ``.name`` of the local file
76
        """
77
        if not self.local_filename:
78
            return
79
        return self.local_filename.name
80
81
    @property
82
    def extension(self):
83
        if not self.local_filename:
84
            return
85
        return ''.join(self.local_filename.suffixes)
86
87
    @property
88
    def basename_without_extension(self):
89
        """
90
        Get the ``os.path.basename`` of the local file, if any, with extension removed.
91
        """
92
        if not self.local_filename:
93
            return
94
        return self.local_filename.name[:-len(self.extension)]
95
96
    @property
97
    def ID(self):
98
        """
99
        Get the ``@ID`` of the ``mets:file``.
100
        """
101
        return self._el.get('ID')
102
103
    @ID.setter
104
    def ID(self, ID):
105
        """
106
        Set the ``@ID`` of the ``mets:file`` to :py:attr:`ID`.
107
        """
108
        if ID is None:
109
            return
110
        if self.mets is None:
111
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
112
        old_id = self.ID
113
        self._el.set('ID', ID)
114
        # also update the references in the physical structmap
115
        for pageId in self.mets.remove_physical_page_fptr(fileId=old_id):
116
            self.pageId = pageId
117
118
    @property
119
    def pageId(self):
120
        """
121
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation).
122
        """
123
        if self.mets is None:
124
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
125
        return self.mets.get_physical_page_for_file(self)
126
127
    @pageId.setter
128
    def pageId(self, pageId):
129
        """
130
        Get the ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file`` (physical page manifestation) to :py:attr:`pageId`.
131
        """
132
        if pageId is None:
133
            return
134
        if self.mets is None:
135
            raise Exception("OcrdFile %s has no member 'mets' pointing to parent OcrdMets" % self)
136
        self.mets.set_physical_page_for_file(pageId, self)
137
138
    @property
139
    def loctypes(self):
140
        """
141
        Get the ``@LOCTYPE``s of the ``mets:file``.
142
        """
143
        return [x.get('LOCTYPE') for x in  self._el.findall('mets:FLocat', NS)]
144
145
    @property
146
    def mimetype(self):
147
        """
148
        Get the ``@MIMETYPE`` of the ``mets:file``.
149
        """
150
        return self._el.get('MIMETYPE')
151
152
    @mimetype.setter
153
    def mimetype(self, mimetype):
154
        """
155
        Set the ``@MIMETYPE`` of the ``mets:file`` to :py:attr:`mimetype`.
156
        """
157
        if mimetype is None:
158
            return
159
        self._el.set('MIMETYPE', mimetype)
160
161
    @property
162
    def fileGrp(self):
163
        """
164
        The ``@USE`` of the containing ``mets:fileGrp``
165
        """
166
        parent = self._el.getparent()
167
        if parent is not None:
168
            return self._el.getparent().get('USE')
169
        raise ValueError("OcrdFile not related to METS")
170
171
    @property
172
    def url(self):
173
        """
174
        Get the remote/original URL ``@xlink:href`` of this ``mets:file``.
175
        """
176
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
177
        if el_FLocat is not None:
178
            return el_FLocat.get("{%s}href" % NS["xlink"])
179
        return ''
180
181
    @url.setter
182
    def url(self, url):
183
        """
184
        Set the remote/original URL ``@xlink:href`` of this ``mets:file`` to :py:attr:`url`.
185
        """
186
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="URL"]', NS)
187
        if url is None:
188
            if el_FLocat:
189
                self._el.remove(el_FLocat)
190
            return
191
        if el_FLocat is None:
192
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
193
        el_FLocat.set("{%s}href" % NS["xlink"], url)
194
        el_FLocat.set("LOCTYPE", "URL")
195
196
    @property
197
    def local_filename(self):
198
        """
199
        Get the local/cached ``@xlink:href`` of this ``mets:file``.
200
        """
201
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
202
        if el_FLocat is not None:
203
            return Path(el_FLocat.get("{%s}href" % NS["xlink"]))
204
205
    @local_filename.setter
206
    def local_filename(self, fname):
207
        """
208
        Set the local/cached ``@xlink:href`` of this ``mets:file`` to :py:attr:`local_filename`.
209
        """
210
        el_FLocat = self._el.find('mets:FLocat[@LOCTYPE="OTHER"][@OTHERLOCTYPE="FILE"]', NS)
211
        if not fname:
212
            if el_FLocat is not None:
213
                self._el.remove(el_FLocat)
214
            return
215
        if el_FLocat is None:
216
            el_FLocat = ET.SubElement(self._el, TAG_METS_FLOCAT)
217
        el_FLocat.set("{%s}href" % NS["xlink"], str(fname))
218
        el_FLocat.set("LOCTYPE", "OTHER")
219
        el_FLocat.set("OTHERLOCTYPE", "FILE")
220
221
222
class ClientSideOcrdFile:
223
    """
224
    Provides the same interface as :py:class:`ocrd_models.ocrd_file.OcrdFile`
225
    but without attachment to :py:class:`ocrd_models.ocrd_mets.OcrdMets` since
226
    this represents the response of the :py:class:`ocrd.mets_server.OcrdMetsServer`.
227
    """
228
229
    def __init__(self, el, mimetype=None, pageId=None, loctype='OTHER', local_filename=None, mets=None, url=None, ID=None, fileGrp=None):
230
        """
231
        Args:
232
            el (): ignored
233
        Keyword Args:
234
            mets (): ignored
235
            mimetype (string): ``@MIMETYPE`` of this ``mets:file``
236
            pageId (string): ``@ID`` of the physical ``mets:structMap`` entry corresponding to this ``mets:file``
237
            loctype (string): ``@LOCTYPE`` of this ``mets:file``
238
            url (string): ignored XXX the remote/original file once we have proper mets:FLocat bookkeeping 
239
            local_filename (): ``@xlink:href`` of this ``mets:file`` - XXX the local file once we have proper mets:FLocat bookkeeping
240
            ID (string): ``@ID`` of this ``mets:file``
241
        """
242
        self.ID = ID
243
        self.mimetype = mimetype
244
        self.local_filename = local_filename
245
        self.url = url
246
        self.loctype = loctype
247
        self.pageId = pageId
248
        self.fileGrp = fileGrp
249
250
    def __str__(self):
251
        props = ', '.join([
252
            '='.join([k, getattr(self, k) if hasattr(self, k) and getattr(self, k) else '---'])
253
            for k in ['fileGrp', 'ID', 'mimetype', 'url', 'local_filename']
254
        ])
255
        return '<ClientSideOcrdFile %s]/>' % (props)
256