Passed
Pull Request — master (#351)
by
unknown
01:59
created

elodie.external.pyexiftool.ExifTool.start()   A

Complexity

Conditions 3

Size

Total Lines 21
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 14
nop 1
dl 0
loc 21
rs 9.7
c 0
b 0
f 0
1
# -*- coding: utf-8 -*-
2
# PyExifTool <http://github.com/smarnach/pyexiftool>
3
# Copyright 2012 Sven Marnach. Enhancements by Leo Broska
4
5
# This file is part of PyExifTool.
6
#
7
# PyExifTool is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, either version 3 of the License, or
10
# (at your option) any later version.
11
#
12
# PyExifTool is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
# GNU General Public License for more details.
16
#
17
# You should have received a copy of the GNU General Public License
18
# along with PyExifTool.  If not, see <http://www.gnu.org/licenses/>.
19
20
"""
21
PyExifTool is a Python library to communicate with an instance of Phil
22
Harvey's excellent ExifTool_ command-line application.  The library
23
provides the class :py:class:`ExifTool` that runs the command-line
24
tool in batch mode and features methods to send commands to that
25
program, including methods to extract meta-information from one or
26
more image files.  Since ``exiftool`` is run in batch mode, only a
27
single instance needs to be launched and can be reused for many
28
queries.  This is much more efficient than launching a separate
29
process for every single query.
30
31
.. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/
32
33
The source code can be checked out from the github repository with
34
35
::
36
37
    git clone git://github.com/smarnach/pyexiftool.git
38
39
Alternatively, you can download a tarball_.  There haven't been any
40
releases yet.
41
42
.. _tarball: https://github.com/smarnach/pyexiftool/tarball/master
43
44
PyExifTool is licenced under GNU GPL version 3 or later.
45
46
Example usage::
47
48
    import exiftool
49
50
    files = ["a.jpg", "b.png", "c.tif"]
51
    with exiftool.ExifTool() as et:
52
        metadata = et.get_metadata_batch(files)
53
    for d in metadata:
54
        print("{:20.20} {:20.20}".format(d["SourceFile"],
55
                                         d["EXIF:DateTimeOriginal"]))
56
"""
57
58
from __future__ import unicode_literals
59
60
import sys
61
import subprocess
62
import os
63
import json
64
import warnings
65
import logging
66
import codecs
67
68
try:        # Py3k compatibility
69
    basestring
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable basestring does not seem to be defined.
Loading history...
70
except NameError:
71
    basestring = (bytes, str)
72
73
executable = "exiftool"
74
"""The name of the executable to run.
75
76
If the executable is not located in one of the paths listed in the
77
``PATH`` environment variable, the full path should be given here.
78
"""
79
80
# Sentinel indicating the end of the output of a sequence of commands.
81
# The standard value should be fine.
82
sentinel = b"{ready}"
83
84
# The block size when reading from exiftool.  The standard value
85
# should be fine, though other values might give better performance in
86
# some cases.
87
block_size = 4096
88
89
# constants related to keywords manipulations 
90
KW_TAGNAME = "IPTC:Keywords"
91
KW_REPLACE, KW_ADD, KW_REMOVE = range(3)
92
93
94
# This code has been adapted from Lib/os.py in the Python source tree
95
# (sha1 265e36e277f3)
96
def _fscodec():
97
    encoding = sys.getfilesystemencoding()
98
    errors = "strict"
99
    if encoding != "mbcs":
100
        try:
101
            codecs.lookup_error("surrogateescape")
102
        except LookupError:
103
            pass
104
        else:
105
            errors = "surrogateescape"
106
107
    def fsencode(filename):
108
        """
109
        Encode filename to the filesystem encoding with 'surrogateescape' error
110
        handler, return bytes unchanged. On Windows, use 'strict' error handler if
111
        the file system encoding is 'mbcs' (which is the default encoding).
112
        """
113
        if isinstance(filename, bytes):
114
            return filename
115
        else:
116
            return filename.encode(encoding, errors)
117
118
    return fsencode
119
120
fsencode = _fscodec()
121
del _fscodec
122
123
#string helper
124
def strip_nl (s):
125
    return ' '.join(s.splitlines())
126
127
128
# Error checking function
129
# Note: They are quite fragile, beacsue teh just parse the output text from exiftool
130
def check_ok (result):
131
    """Evaluates the output from a exiftool write operation (e.g. `set_tags`)
132
    
133
    The argument is the result from the execute method.
134
    
135
    The result is True or False.
136
    """
137
    return not result is None and (not "due to errors" in result)
138
139
def format_error (result):
140
    """Evaluates the output from a exiftool write operation (e.g. `set_tags`)
141
    
142
    The argument is the result from the execute method.
143
    
144
    The result is a human readable one-line string.
145
    """
146
    if check_ok (result):
147
        return 'exiftool finished probably properly. ("%s")' % strip_nl(result)
148
    else:        
149
        if result is None:
150
            return "exiftool operation can't be evaluated: No result given"
151
        else:
152
            return 'exiftool finished with error: "%s"' % strip_nl(result) 
153
154
class Singleton(type):
155
    """Metaclass to use the singleton [anti-]pattern"""
156
    instance = None
157
158
    def __call__(cls, *args, **kwargs):
159
        if cls.instance is None:
160
            cls.instance = super(Singleton, cls).__call__(*args, **kwargs)
161
        return cls.instance
162
163
class ExifTool(object, metaclass=Singleton):
164
    """Run the `exiftool` command-line tool and communicate to it.
165
166
    You can pass two arguments to the constructor:
167
    - ``addedargs`` (list of strings): contains additional paramaters for
168
      the stay-open instance of exiftool
169
    - ``executable`` (string): file name of the ``exiftool`` executable.
170
      The default value ``exiftool`` will only work if the executable
171
      is in your ``PATH``
172
173
    Most methods of this class are only available after calling
174
    :py:meth:`start()`, which will actually launch the subprocess.  To
175
    avoid leaving the subprocess running, make sure to call
176
    :py:meth:`terminate()` method when finished using the instance.
177
    This method will also be implicitly called when the instance is
178
    garbage collected, but there are circumstance when this won't ever
179
    happen, so you should not rely on the implicit process
180
    termination.  Subprocesses won't be automatically terminated if
181
    the parent process exits, so a leaked subprocess will stay around
182
    until manually killed.
183
184
    A convenient way to make sure that the subprocess is terminated is
185
    to use the :py:class:`ExifTool` instance as a context manager::
186
187
        with ExifTool() as et:
188
            ...
189
190
    .. warning:: Note that there is no error handling.  Nonsensical
191
       options will be silently ignored by exiftool, so there's not
192
       much that can be done in that regard.  You should avoid passing
193
       non-existent files to any of the methods, since this will lead
194
       to undefied behaviour.
195
196
    .. py:attribute:: running
197
198
       A Boolean value indicating whether this instance is currently
199
       associated with a running subprocess.
200
    """
201
202
    def __init__(self, executable_=None, addedargs=None):
203
        
204
        if executable_ is None:
205
            self.executable = executable
206
        else:
207
            self.executable = executable_
208
209
        if addedargs is None:
210
            self.addedargs = []
211
        elif type(addedargs) is list:
212
            self.addedargs = addedargs
213
        else:
214
            raise TypeError("addedargs not a list of strings")
215
        
216
        self.running = False
217
218
    def start(self):
219
        """Start an ``exiftool`` process in batch mode for this instance.
220
221
        This method will issue a ``UserWarning`` if the subprocess is
222
        already running.  The process is started with the ``-G`` and
223
        ``-n`` as common arguments, which are automatically included
224
        in every command you run with :py:meth:`execute()`.
225
        """
226
        if self.running:
227
            warnings.warn("ExifTool already running; doing nothing.")
228
            return
229
        with open(os.devnull, "w") as devnull:
230
            procargs = [self.executable, "-stay_open", "True",  "-@", "-",
231
                 "-common_args", "-G", "-n"];
232
            procargs.extend(self.addedargs)
233
            logging.debug(procargs) 
234
            self._process = subprocess.Popen(
235
                procargs,
236
                stdin=subprocess.PIPE, stdout=subprocess.PIPE,
237
                stderr=devnull)
238
        self.running = True
239
240
    def terminate(self):
241
        """Terminate the ``exiftool`` process of this instance.
242
243
        If the subprocess isn't running, this method will do nothing.
244
        """
245
        if not self.running:
246
            return
247
        self._process.stdin.write(b"-stay_open\nFalse\n")
248
        self._process.stdin.flush()
249
        self._process.communicate()
250
        del self._process
251
        self.running = False
252
253
    def __enter__(self):
254
        self.start()
255
        return self
256
257
    def __exit__(self, exc_type, exc_val, exc_tb):
258
        self.terminate()
259
260
    def __del__(self):
261
        self.terminate()
262
263
    def execute(self, *params):
264
        """Execute the given batch of parameters with ``exiftool``.
265
266
        This method accepts any number of parameters and sends them to
267
        the attached ``exiftool`` process.  The process must be
268
        running, otherwise ``ValueError`` is raised.  The final
269
        ``-execute`` necessary to actually run the batch is appended
270
        automatically; see the documentation of :py:meth:`start()` for
271
        the common options.  The ``exiftool`` output is read up to the
272
        end-of-output sentinel and returned as a raw ``bytes`` object,
273
        excluding the sentinel.
274
275
        The parameters must also be raw ``bytes``, in whatever
276
        encoding exiftool accepts.  For filenames, this should be the
277
        system's filesystem encoding.
278
279
        .. note:: This is considered a low-level method, and should
280
           rarely be needed by application developers.
281
        """
282
        if not self.running:
283
            raise ValueError("ExifTool instance not running.")
284
        self._process.stdin.write(b"\n".join(params + (b"-execute\n",)))
285
        self._process.stdin.flush()
286
        output = b""
287
        fd = self._process.stdout.fileno()
288
        while not output[-32:].strip().endswith(sentinel):
289
            output += os.read(fd, block_size)
290
        return output.strip()[:-len(sentinel)]
291
292
    def execute_json(self, *params):
293
        """Execute the given batch of parameters and parse the JSON output.
294
295
        This method is similar to :py:meth:`execute()`.  It
296
        automatically adds the parameter ``-j`` to request JSON output
297
        from ``exiftool`` and parses the output.  The return value is
298
        a list of dictionaries, mapping tag names to the corresponding
299
        values.  All keys are Unicode strings with the tag names
300
        including the ExifTool group name in the format <group>:<tag>.
301
        The values can have multiple types.  All strings occurring as
302
        values will be Unicode strings.  Each dictionary contains the
303
        name of the file it corresponds to in the key ``"SourceFile"``.
304
305
        The parameters to this function must be either raw strings
306
        (type ``str`` in Python 2.x, type ``bytes`` in Python 3.x) or
307
        Unicode strings (type ``unicode`` in Python 2.x, type ``str``
308
        in Python 3.x).  Unicode strings will be encoded using
309
        system's filesystem encoding.  This behaviour means you can
310
        pass in filenames according to the convention of the
311
        respective Python version – as raw strings in Python 2.x and
312
        as Unicode strings in Python 3.x.
313
        """
314
        params = map(fsencode, params)
315
        # Some latin bytes won't decode to utf-8.
316
        # Try utf-8 and fallback to latin.
317
        # http://stackoverflow.com/a/5552623/1318758
318
        # https://github.com/jmathai/elodie/issues/127
319
        try:
320
            return json.loads(self.execute(b"-j", *params).decode("utf-8"))
321
        except UnicodeDecodeError as e:
322
            return json.loads(self.execute(b"-j", *params).decode("latin-1"))
323
324
    def get_metadata_batch(self, filenames):
325
        """Return all meta-data for the given files.
326
327
        The return value will have the format described in the
328
        documentation of :py:meth:`execute_json()`.
329
        """
330
        return self.execute_json(*filenames)
331
332
    def get_metadata(self, filename):
333
        """Return meta-data for a single file.
334
335
        The returned dictionary has the format described in the
336
        documentation of :py:meth:`execute_json()`.
337
        """
338
        return self.execute_json(filename)[0]
339
340
    def get_tags_batch(self, tags, filenames):
341
        """Return only specified tags for the given files.
342
343
        The first argument is an iterable of tags.  The tag names may
344
        include group names, as usual in the format <group>:<tag>.
345
346
        The second argument is an iterable of file names.
347
348
        The format of the return value is the same as for
349
        :py:meth:`execute_json()`.
350
        """
351
        # Explicitly ruling out strings here because passing in a
352
        # string would lead to strange and hard-to-find errors
353
        if isinstance(tags, basestring):
354
            raise TypeError("The argument 'tags' must be "
355
                            "an iterable of strings")
356
        if isinstance(filenames, basestring):
357
            raise TypeError("The argument 'filenames' must be "
358
                            "an iterable of strings")
359
        params = ["-" + t for t in tags]
360
        params.extend(filenames)
361
        return self.execute_json(*params)
362
363
    def get_tags(self, tags, filename):
364
        """Return only specified tags for a single file.
365
366
        The returned dictionary has the format described in the
367
        documentation of :py:meth:`execute_json()`.
368
        """
369
        return self.get_tags_batch(tags, [filename])[0]
370
371
    def get_tag_batch(self, tag, filenames):
372
        """Extract a single tag from the given files.
373
374
        The first argument is a single tag name, as usual in the
375
        format <group>:<tag>.
376
377
        The second argument is an iterable of file names.
378
379
        The return value is a list of tag values or ``None`` for
380
        non-existent tags, in the same order as ``filenames``.
381
        """
382
        data = self.get_tags_batch([tag], filenames)
383
        result = []
384
        for d in data:
385
            d.pop("SourceFile")
386
            result.append(next(iter(d.values()), None))
387
        return result
388
389
    def get_tag(self, tag, filename):
390
        """Extract a single tag from a single file.
391
392
        The return value is the value of the specified tag, or
393
        ``None`` if this tag was not found in the file.
394
        """
395
        return self.get_tag_batch(tag, [filename])[0]
396
397
    def set_tags_batch(self, tags, filenames):
398
        """Writes the values of the specified tags for the given files.
399
400
        The first argument is a dictionary of tags and values.  The tag names may
401
        include group names, as usual in the format <group>:<tag>.
402
403
        The second argument is an iterable of file names.
404
405
        The format of the return value is the same as for
406
        :py:meth:`execute()`.
407
        
408
        It can be passed into `check_ok()` and `format_error()`.
409
        """
410
        # Explicitly ruling out strings here because passing in a
411
        # string would lead to strange and hard-to-find errors
412
        if isinstance(tags, basestring):
413
            raise TypeError("The argument 'tags' must be dictionary "
414
                            "of strings")
415
        if isinstance(filenames, basestring):
416
            raise TypeError("The argument 'filenames' must be "
417
                            "an iterable of strings")
418
                
419
        params = []
420
        params_utf8 = []
421
        for tag, value in tags.items():
422
            params.append(u'-%s=%s' % (tag, value))
423
            
424
        params.extend(filenames)
425
        params_utf8 = [x.encode('utf-8') for x in params]
426
        return self.execute(*params_utf8)
427
428
    def set_tags(self, tags, filename):
429
        """Writes the values of the specified tags for the given file.
430
431
        This is a convenience function derived from `set_tags_batch()`.
432
        Only difference is that it takes as last arugemnt only one file name
433
        as a string. 
434
        """
435
        return self.set_tags_batch(tags, [filename])
436
    
437
    def set_keywords_batch(self, mode, keywords, filenames):
438
        """Modifies the keywords tag for the given files.
439
440
        The first argument is the operation mode:
441
        KW_REPLACE: Replace (i.e. set) the full keywords tag with `keywords`.
442
        KW_ADD:     Add `keywords` to the keywords tag. 
443
                    If a keyword is present, just keep it.
444
        KW_REMOVE:  Remove `keywords` from the keywords tag. 
445
                    If a keyword wasn't present, just leave it.
446
447
        The second argument is an iterable of key words.    
448
449
        The third argument is an iterable of file names.
450
451
        The format of the return value is the same as for
452
        :py:meth:`execute()`.
453
        
454
        It can be passed into `check_ok()` and `format_error()`.
455
        """
456
        # Explicitly ruling out strings here because passing in a
457
        # string would lead to strange and hard-to-find errors
458
        if isinstance(keywords, basestring):
459
            raise TypeError("The argument 'keywords' must be "
460
                            "an iterable of strings")
461
        if isinstance(filenames, basestring):
462
            raise TypeError("The argument 'filenames' must be "
463
                            "an iterable of strings")
464
                
465
        params = []    
466
            
467
        kw_operation = {KW_REPLACE:"-%s=%s",
468
                        KW_ADD:"-%s+=%s",
469
                        KW_REMOVE:"-%s-=%s"}[mode]
470
471
        kw_params = [ kw_operation % (KW_TAGNAME, w)  for w in keywords ]
472
        
473
        params.extend(kw_params)            
474
        params.extend(filenames)
475
        logging.debug (params)
476
        return self.execute(*params)
477
    
478
    def set_keywords(self, mode, keywords, filename):
479
        """Modifies the keywords tag for the given file.
480
481
        This is a convenience function derived from `set_keywords_batch()`.
482
        Only difference is that it takes as last argument only one file name
483
        as a string. 
484
        """
485
        return self.set_keywords_batch(mode, keywords, [filename])
486