1 | # -*- coding: utf-8 -*- |
||
2 | # PyExifTool <http://github.com/smarnach/pyexiftool> |
||
3 | # Copyright 2012 Sven Marnach. Enhancements by Leo Broska |
||
4 | |||
5 | # This file is part of PyExifTool. |
||
6 | # |
||
7 | # PyExifTool is free software: you can redistribute it and/or modify |
||
8 | # it under the terms of the GNU General Public License as published by |
||
9 | # the Free Software Foundation, either version 3 of the License, or |
||
10 | # (at your option) any later version. |
||
11 | # |
||
12 | # PyExifTool is distributed in the hope that it will be useful, |
||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
15 | # GNU General Public License for more details. |
||
16 | # |
||
17 | # You should have received a copy of the GNU General Public License |
||
18 | # along with PyExifTool. If not, see <http://www.gnu.org/licenses/>. |
||
19 | |||
20 | """ |
||
21 | PyExifTool is a Python library to communicate with an instance of Phil |
||
22 | Harvey's excellent ExifTool_ command-line application. The library |
||
23 | provides the class :py:class:`ExifTool` that runs the command-line |
||
24 | tool in batch mode and features methods to send commands to that |
||
25 | program, including methods to extract meta-information from one or |
||
26 | more image files. Since ``exiftool`` is run in batch mode, only a |
||
27 | single instance needs to be launched and can be reused for many |
||
28 | queries. This is much more efficient than launching a separate |
||
29 | process for every single query. |
||
30 | |||
31 | .. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/ |
||
32 | |||
33 | The source code can be checked out from the github repository with |
||
34 | |||
35 | :: |
||
36 | |||
37 | git clone git://github.com/smarnach/pyexiftool.git |
||
38 | |||
39 | Alternatively, you can download a tarball_. There haven't been any |
||
40 | releases yet. |
||
41 | |||
42 | .. _tarball: https://github.com/smarnach/pyexiftool/tarball/master |
||
43 | |||
44 | PyExifTool is licenced under GNU GPL version 3 or later. |
||
45 | |||
46 | Example usage:: |
||
47 | |||
48 | import exiftool |
||
49 | |||
50 | files = ["a.jpg", "b.png", "c.tif"] |
||
51 | with exiftool.ExifTool() as et: |
||
52 | metadata = et.get_metadata_batch(files) |
||
53 | for d in metadata: |
||
54 | print("{:20.20} {:20.20}".format(d["SourceFile"], |
||
55 | d["EXIF:DateTimeOriginal"])) |
||
56 | """ |
||
57 | |||
58 | from __future__ import unicode_literals |
||
59 | |||
60 | import sys |
||
61 | import subprocess |
||
62 | import os |
||
63 | import json |
||
64 | import warnings |
||
65 | import logging |
||
66 | import codecs |
||
67 | |||
68 | from future.utils import with_metaclass |
||
69 | |||
70 | try: # Py3k compatibility |
||
71 | basestring |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
Loading history...
|
|||
72 | except NameError: |
||
73 | basestring = (bytes, str) |
||
74 | |||
75 | executable = "exiftool" |
||
76 | """The name of the executable to run. |
||
77 | |||
78 | If the executable is not located in one of the paths listed in the |
||
79 | ``PATH`` environment variable, the full path should be given here. |
||
80 | """ |
||
81 | |||
82 | # Sentinel indicating the end of the output of a sequence of commands. |
||
83 | # The standard value should be fine. |
||
84 | sentinel = b"{ready}" |
||
85 | |||
86 | # The block size when reading from exiftool. The standard value |
||
87 | # should be fine, though other values might give better performance in |
||
88 | # some cases. |
||
89 | block_size = 4096 |
||
90 | |||
91 | # constants related to keywords manipulations |
||
92 | KW_TAGNAME = "IPTC:Keywords" |
||
93 | KW_REPLACE, KW_ADD, KW_REMOVE = range(3) |
||
94 | |||
95 | |||
96 | # This code has been adapted from Lib/os.py in the Python source tree |
||
97 | # (sha1 265e36e277f3) |
||
98 | def _fscodec(): |
||
99 | encoding = sys.getfilesystemencoding() |
||
100 | errors = "strict" |
||
101 | if encoding != "mbcs": |
||
102 | try: |
||
103 | codecs.lookup_error("surrogateescape") |
||
104 | except LookupError: |
||
105 | pass |
||
106 | else: |
||
107 | errors = "surrogateescape" |
||
108 | |||
109 | def fsencode(filename): |
||
110 | """ |
||
111 | Encode filename to the filesystem encoding with 'surrogateescape' error |
||
112 | handler, return bytes unchanged. On Windows, use 'strict' error handler if |
||
113 | the file system encoding is 'mbcs' (which is the default encoding). |
||
114 | """ |
||
115 | if isinstance(filename, bytes): |
||
116 | return filename |
||
117 | else: |
||
118 | return filename.encode(encoding, errors) |
||
119 | |||
120 | return fsencode |
||
121 | |||
122 | fsencode = _fscodec() |
||
123 | del _fscodec |
||
124 | |||
125 | #string helper |
||
126 | def strip_nl (s): |
||
127 | return ' '.join(s.splitlines()) |
||
128 | |||
129 | |||
130 | # Error checking function |
||
131 | # Note: They are quite fragile, beacsue teh just parse the output text from exiftool |
||
132 | def check_ok (result): |
||
133 | """Evaluates the output from a exiftool write operation (e.g. `set_tags`) |
||
134 | |||
135 | The argument is the result from the execute method. |
||
136 | |||
137 | The result is True or False. |
||
138 | """ |
||
139 | return not result is None and (not "due to errors" in result) |
||
140 | |||
141 | def format_error (result): |
||
142 | """Evaluates the output from a exiftool write operation (e.g. `set_tags`) |
||
143 | |||
144 | The argument is the result from the execute method. |
||
145 | |||
146 | The result is a human readable one-line string. |
||
147 | """ |
||
148 | if check_ok (result): |
||
149 | return 'exiftool finished probably properly. ("%s")' % strip_nl(result) |
||
150 | else: |
||
151 | if result is None: |
||
152 | return "exiftool operation can't be evaluated: No result given" |
||
153 | else: |
||
154 | return 'exiftool finished with error: "%s"' % strip_nl(result) |
||
155 | |||
156 | class Singleton(type): |
||
157 | """Metaclass to use the singleton [anti-]pattern""" |
||
158 | instance = None |
||
159 | |||
160 | def __call__(cls, *args, **kwargs): |
||
161 | if cls.instance is None: |
||
162 | cls.instance = super(Singleton, cls).__call__(*args, **kwargs) |
||
163 | return cls.instance |
||
164 | |||
165 | class ExifTool(object, with_metaclass(Singleton)): |
||
166 | """Run the `exiftool` command-line tool and communicate to it. |
||
167 | |||
168 | You can pass two arguments to the constructor: |
||
169 | - ``addedargs`` (list of strings): contains additional paramaters for |
||
170 | the stay-open instance of exiftool |
||
171 | - ``executable`` (string): file name of the ``exiftool`` executable. |
||
172 | The default value ``exiftool`` will only work if the executable |
||
173 | is in your ``PATH`` |
||
174 | |||
175 | Most methods of this class are only available after calling |
||
176 | :py:meth:`start()`, which will actually launch the subprocess. To |
||
177 | avoid leaving the subprocess running, make sure to call |
||
178 | :py:meth:`terminate()` method when finished using the instance. |
||
179 | This method will also be implicitly called when the instance is |
||
180 | garbage collected, but there are circumstance when this won't ever |
||
181 | happen, so you should not rely on the implicit process |
||
182 | termination. Subprocesses won't be automatically terminated if |
||
183 | the parent process exits, so a leaked subprocess will stay around |
||
184 | until manually killed. |
||
185 | |||
186 | A convenient way to make sure that the subprocess is terminated is |
||
187 | to use the :py:class:`ExifTool` instance as a context manager:: |
||
188 | |||
189 | with ExifTool() as et: |
||
190 | ... |
||
191 | |||
192 | .. warning:: Note that there is no error handling. Nonsensical |
||
193 | options will be silently ignored by exiftool, so there's not |
||
194 | much that can be done in that regard. You should avoid passing |
||
195 | non-existent files to any of the methods, since this will lead |
||
196 | to undefied behaviour. |
||
197 | |||
198 | .. py:attribute:: running |
||
199 | |||
200 | A Boolean value indicating whether this instance is currently |
||
201 | associated with a running subprocess. |
||
202 | """ |
||
203 | |||
204 | def __init__(self, executable_=None, addedargs=None): |
||
205 | |||
206 | if executable_ is None: |
||
207 | self.executable = executable |
||
208 | else: |
||
209 | self.executable = executable_ |
||
210 | |||
211 | if addedargs is None: |
||
212 | self.addedargs = [] |
||
213 | elif type(addedargs) is list: |
||
214 | self.addedargs = addedargs |
||
215 | else: |
||
216 | raise TypeError("addedargs not a list of strings") |
||
217 | |||
218 | self.running = False |
||
219 | |||
220 | def start(self): |
||
221 | """Start an ``exiftool`` process in batch mode for this instance. |
||
222 | |||
223 | This method will issue a ``UserWarning`` if the subprocess is |
||
224 | already running. The process is started with the ``-G`` and |
||
225 | ``-n`` as common arguments, which are automatically included |
||
226 | in every command you run with :py:meth:`execute()`. |
||
227 | """ |
||
228 | if self.running: |
||
229 | warnings.warn("ExifTool already running; doing nothing.") |
||
230 | return |
||
231 | with open(os.devnull, "w") as devnull: |
||
232 | procargs = [self.executable, "-stay_open", "True", "-@", "-", |
||
233 | "-common_args", "-G", "-n"]; |
||
234 | procargs.extend(self.addedargs) |
||
235 | logging.debug(procargs) |
||
236 | self._process = subprocess.Popen( |
||
237 | procargs, |
||
238 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
||
239 | stderr=devnull) |
||
240 | self.running = True |
||
241 | |||
242 | def terminate(self): |
||
243 | """Terminate the ``exiftool`` process of this instance. |
||
244 | |||
245 | If the subprocess isn't running, this method will do nothing. |
||
246 | """ |
||
247 | if not self.running: |
||
248 | return |
||
249 | self._process.stdin.write(b"-stay_open\nFalse\n") |
||
250 | self._process.stdin.flush() |
||
251 | self._process.communicate() |
||
252 | del self._process |
||
253 | self.running = False |
||
254 | |||
255 | def __enter__(self): |
||
256 | self.start() |
||
257 | return self |
||
258 | |||
259 | def __exit__(self, exc_type, exc_val, exc_tb): |
||
260 | self.terminate() |
||
261 | |||
262 | def __del__(self): |
||
263 | self.terminate() |
||
264 | |||
265 | def execute(self, *params): |
||
266 | """Execute the given batch of parameters with ``exiftool``. |
||
267 | |||
268 | This method accepts any number of parameters and sends them to |
||
269 | the attached ``exiftool`` process. The process must be |
||
270 | running, otherwise ``ValueError`` is raised. The final |
||
271 | ``-execute`` necessary to actually run the batch is appended |
||
272 | automatically; see the documentation of :py:meth:`start()` for |
||
273 | the common options. The ``exiftool`` output is read up to the |
||
274 | end-of-output sentinel and returned as a raw ``bytes`` object, |
||
275 | excluding the sentinel. |
||
276 | |||
277 | The parameters must also be raw ``bytes``, in whatever |
||
278 | encoding exiftool accepts. For filenames, this should be the |
||
279 | system's filesystem encoding. |
||
280 | |||
281 | .. note:: This is considered a low-level method, and should |
||
282 | rarely be needed by application developers. |
||
283 | """ |
||
284 | if not self.running: |
||
285 | raise ValueError("ExifTool instance not running.") |
||
286 | self._process.stdin.write(b"\n".join(params + (b"-execute\n",))) |
||
287 | self._process.stdin.flush() |
||
288 | output = b"" |
||
289 | fd = self._process.stdout.fileno() |
||
290 | while not output[-32:].strip().endswith(sentinel): |
||
291 | output += os.read(fd, block_size) |
||
292 | return output.strip()[:-len(sentinel)] |
||
293 | |||
294 | def execute_json(self, *params): |
||
295 | """Execute the given batch of parameters and parse the JSON output. |
||
296 | |||
297 | This method is similar to :py:meth:`execute()`. It |
||
298 | automatically adds the parameter ``-j`` to request JSON output |
||
299 | from ``exiftool`` and parses the output. The return value is |
||
300 | a list of dictionaries, mapping tag names to the corresponding |
||
301 | values. All keys are Unicode strings with the tag names |
||
302 | including the ExifTool group name in the format <group>:<tag>. |
||
303 | The values can have multiple types. All strings occurring as |
||
304 | values will be Unicode strings. Each dictionary contains the |
||
305 | name of the file it corresponds to in the key ``"SourceFile"``. |
||
306 | |||
307 | The parameters to this function must be either raw strings |
||
308 | (type ``str`` in Python 2.x, type ``bytes`` in Python 3.x) or |
||
309 | Unicode strings (type ``unicode`` in Python 2.x, type ``str`` |
||
310 | in Python 3.x). Unicode strings will be encoded using |
||
311 | system's filesystem encoding. This behaviour means you can |
||
312 | pass in filenames according to the convention of the |
||
313 | respective Python version – as raw strings in Python 2.x and |
||
314 | as Unicode strings in Python 3.x. |
||
315 | """ |
||
316 | params = map(fsencode, params) |
||
317 | # Some latin bytes won't decode to utf-8. |
||
318 | # Try utf-8 and fallback to latin. |
||
319 | # http://stackoverflow.com/a/5552623/1318758 |
||
320 | # https://github.com/jmathai/elodie/issues/127 |
||
321 | try: |
||
322 | return json.loads(self.execute(b"-j", *params).decode("utf-8")) |
||
323 | except UnicodeDecodeError as e: |
||
324 | return json.loads(self.execute(b"-j", *params).decode("latin-1")) |
||
325 | |||
326 | def get_metadata_batch(self, filenames): |
||
327 | """Return all meta-data for the given files. |
||
328 | |||
329 | The return value will have the format described in the |
||
330 | documentation of :py:meth:`execute_json()`. |
||
331 | """ |
||
332 | return self.execute_json(*filenames) |
||
333 | |||
334 | def get_metadata(self, filename): |
||
335 | """Return meta-data for a single file. |
||
336 | |||
337 | The returned dictionary has the format described in the |
||
338 | documentation of :py:meth:`execute_json()`. |
||
339 | """ |
||
340 | return self.execute_json(filename)[0] |
||
341 | |||
342 | def get_tags_batch(self, tags, filenames): |
||
343 | """Return only specified tags for the given files. |
||
344 | |||
345 | The first argument is an iterable of tags. The tag names may |
||
346 | include group names, as usual in the format <group>:<tag>. |
||
347 | |||
348 | The second argument is an iterable of file names. |
||
349 | |||
350 | The format of the return value is the same as for |
||
351 | :py:meth:`execute_json()`. |
||
352 | """ |
||
353 | # Explicitly ruling out strings here because passing in a |
||
354 | # string would lead to strange and hard-to-find errors |
||
355 | if isinstance(tags, basestring): |
||
356 | raise TypeError("The argument 'tags' must be " |
||
357 | "an iterable of strings") |
||
358 | if isinstance(filenames, basestring): |
||
359 | raise TypeError("The argument 'filenames' must be " |
||
360 | "an iterable of strings") |
||
361 | params = ["-" + t for t in tags] |
||
362 | params.extend(filenames) |
||
363 | return self.execute_json(*params) |
||
364 | |||
365 | def get_tags(self, tags, filename): |
||
366 | """Return only specified tags for a single file. |
||
367 | |||
368 | The returned dictionary has the format described in the |
||
369 | documentation of :py:meth:`execute_json()`. |
||
370 | """ |
||
371 | return self.get_tags_batch(tags, [filename])[0] |
||
372 | |||
373 | def get_tag_batch(self, tag, filenames): |
||
374 | """Extract a single tag from the given files. |
||
375 | |||
376 | The first argument is a single tag name, as usual in the |
||
377 | format <group>:<tag>. |
||
378 | |||
379 | The second argument is an iterable of file names. |
||
380 | |||
381 | The return value is a list of tag values or ``None`` for |
||
382 | non-existent tags, in the same order as ``filenames``. |
||
383 | """ |
||
384 | data = self.get_tags_batch([tag], filenames) |
||
385 | result = [] |
||
386 | for d in data: |
||
387 | d.pop("SourceFile") |
||
388 | result.append(next(iter(d.values()), None)) |
||
389 | return result |
||
390 | |||
391 | def get_tag(self, tag, filename): |
||
392 | """Extract a single tag from a single file. |
||
393 | |||
394 | The return value is the value of the specified tag, or |
||
395 | ``None`` if this tag was not found in the file. |
||
396 | """ |
||
397 | return self.get_tag_batch(tag, [filename])[0] |
||
398 | |||
399 | def set_tags_batch(self, tags, filenames): |
||
400 | """Writes the values of the specified tags for the given files. |
||
401 | |||
402 | The first argument is a dictionary of tags and values. The tag names may |
||
403 | include group names, as usual in the format <group>:<tag>. |
||
404 | |||
405 | The second argument is an iterable of file names. |
||
406 | |||
407 | The format of the return value is the same as for |
||
408 | :py:meth:`execute()`. |
||
409 | |||
410 | It can be passed into `check_ok()` and `format_error()`. |
||
411 | """ |
||
412 | # Explicitly ruling out strings here because passing in a |
||
413 | # string would lead to strange and hard-to-find errors |
||
414 | if isinstance(tags, basestring): |
||
415 | raise TypeError("The argument 'tags' must be dictionary " |
||
416 | "of strings") |
||
417 | if isinstance(filenames, basestring): |
||
418 | raise TypeError("The argument 'filenames' must be " |
||
419 | "an iterable of strings") |
||
420 | |||
421 | params = [] |
||
422 | params_utf8 = [] |
||
423 | for tag, value in tags.items(): |
||
424 | params.append(u'-%s=%s' % (tag, value)) |
||
425 | |||
426 | params.extend(filenames) |
||
427 | params_utf8 = [x.encode('utf-8') for x in params] |
||
428 | return self.execute(*params_utf8) |
||
429 | |||
430 | def set_tags(self, tags, filename): |
||
431 | """Writes the values of the specified tags for the given file. |
||
432 | |||
433 | This is a convenience function derived from `set_tags_batch()`. |
||
434 | Only difference is that it takes as last arugemnt only one file name |
||
435 | as a string. |
||
436 | """ |
||
437 | return self.set_tags_batch(tags, [filename]) |
||
438 | |||
439 | def set_keywords_batch(self, mode, keywords, filenames): |
||
440 | """Modifies the keywords tag for the given files. |
||
441 | |||
442 | The first argument is the operation mode: |
||
443 | KW_REPLACE: Replace (i.e. set) the full keywords tag with `keywords`. |
||
444 | KW_ADD: Add `keywords` to the keywords tag. |
||
445 | If a keyword is present, just keep it. |
||
446 | KW_REMOVE: Remove `keywords` from the keywords tag. |
||
447 | If a keyword wasn't present, just leave it. |
||
448 | |||
449 | The second argument is an iterable of key words. |
||
450 | |||
451 | The third argument is an iterable of file names. |
||
452 | |||
453 | The format of the return value is the same as for |
||
454 | :py:meth:`execute()`. |
||
455 | |||
456 | It can be passed into `check_ok()` and `format_error()`. |
||
457 | """ |
||
458 | # Explicitly ruling out strings here because passing in a |
||
459 | # string would lead to strange and hard-to-find errors |
||
460 | if isinstance(keywords, basestring): |
||
461 | raise TypeError("The argument 'keywords' must be " |
||
462 | "an iterable of strings") |
||
463 | if isinstance(filenames, basestring): |
||
464 | raise TypeError("The argument 'filenames' must be " |
||
465 | "an iterable of strings") |
||
466 | |||
467 | params = [] |
||
468 | |||
469 | kw_operation = {KW_REPLACE:"-%s=%s", |
||
470 | KW_ADD:"-%s+=%s", |
||
471 | KW_REMOVE:"-%s-=%s"}[mode] |
||
472 | |||
473 | kw_params = [ kw_operation % (KW_TAGNAME, w) for w in keywords ] |
||
474 | |||
475 | params.extend(kw_params) |
||
476 | params.extend(filenames) |
||
477 | logging.debug (params) |
||
478 | return self.execute(*params) |
||
479 | |||
480 | def set_keywords(self, mode, keywords, filename): |
||
481 | """Modifies the keywords tag for the given file. |
||
482 | |||
483 | This is a convenience function derived from `set_keywords_batch()`. |
||
484 | Only difference is that it takes as last argument only one file name |
||
485 | as a string. |
||
486 | """ |
||
487 | return self.set_keywords_batch(mode, keywords, [filename]) |
||
488 |