1 | # -*- coding: utf-8 -*- |
||
2 | # PyExifTool <http://github.com/smarnach/pyexiftool> |
||
3 | # Copyright 2012 Sven Marnach. Enhancements by Leo Broska |
||
4 | |||
5 | # This file is part of PyExifTool. |
||
6 | # |
||
7 | # PyExifTool is free software: you can redistribute it and/or modify |
||
8 | # it under the terms of the GNU General Public License as published by |
||
9 | # the Free Software Foundation, either version 3 of the License, or |
||
10 | # (at your option) any later version. |
||
11 | # |
||
12 | # PyExifTool is distributed in the hope that it will be useful, |
||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
15 | # GNU General Public License for more details. |
||
16 | # |
||
17 | # You should have received a copy of the GNU General Public License |
||
18 | # along with PyExifTool. If not, see <http://www.gnu.org/licenses/>. |
||
19 | |||
20 | """ |
||
21 | PyExifTool is a Python library to communicate with an instance of Phil |
||
22 | Harvey's excellent ExifTool_ command-line application. The library |
||
23 | provides the class :py:class:`ExifTool` that runs the command-line |
||
24 | tool in batch mode and features methods to send commands to that |
||
25 | program, including methods to extract meta-information from one or |
||
26 | more image files. Since ``exiftool`` is run in batch mode, only a |
||
27 | single instance needs to be launched and can be reused for many |
||
28 | queries. This is much more efficient than launching a separate |
||
29 | process for every single query. |
||
30 | |||
31 | .. _ExifTool: http://www.sno.phy.queensu.ca/~phil/exiftool/ |
||
32 | |||
33 | The source code can be checked out from the github repository with |
||
34 | |||
35 | :: |
||
36 | |||
37 | git clone git://github.com/smarnach/pyexiftool.git |
||
38 | |||
39 | Alternatively, you can download a tarball_. There haven't been any |
||
40 | releases yet. |
||
41 | |||
42 | .. _tarball: https://github.com/smarnach/pyexiftool/tarball/master |
||
43 | |||
44 | PyExifTool is licenced under GNU GPL version 3 or later. |
||
45 | |||
46 | Example usage:: |
||
47 | |||
48 | import exiftool |
||
49 | |||
50 | files = ["a.jpg", "b.png", "c.tif"] |
||
51 | with exiftool.ExifTool() as et: |
||
52 | metadata = et.get_metadata_batch(files) |
||
53 | for d in metadata: |
||
54 | print("{:20.20} {:20.20}".format(d["SourceFile"], |
||
55 | d["EXIF:DateTimeOriginal"])) |
||
56 | """ |
||
57 | |||
58 | from __future__ import unicode_literals |
||
59 | |||
60 | import sys |
||
61 | import subprocess |
||
62 | import os |
||
63 | import json |
||
64 | import warnings |
||
65 | import logging |
||
66 | import codecs |
||
67 | |||
68 | try: # Py3k compatibility |
||
69 | basestring |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
Loading history...
|
|||
70 | except NameError: |
||
71 | basestring = (bytes, str) |
||
72 | |||
73 | executable = "exiftool" |
||
74 | """The name of the executable to run. |
||
75 | |||
76 | If the executable is not located in one of the paths listed in the |
||
77 | ``PATH`` environment variable, the full path should be given here. |
||
78 | """ |
||
79 | |||
80 | # Sentinel indicating the end of the output of a sequence of commands. |
||
81 | # The standard value should be fine. |
||
82 | sentinel = b"{ready}" |
||
83 | |||
84 | # The block size when reading from exiftool. The standard value |
||
85 | # should be fine, though other values might give better performance in |
||
86 | # some cases. |
||
87 | block_size = 4096 |
||
88 | |||
89 | # constants related to keywords manipulations |
||
90 | KW_TAGNAME = "IPTC:Keywords" |
||
91 | KW_REPLACE, KW_ADD, KW_REMOVE = range(3) |
||
92 | |||
93 | |||
94 | # This code has been adapted from Lib/os.py in the Python source tree |
||
95 | # (sha1 265e36e277f3) |
||
96 | def _fscodec(): |
||
97 | encoding = sys.getfilesystemencoding() |
||
98 | errors = "strict" |
||
99 | if encoding != "mbcs": |
||
100 | try: |
||
101 | codecs.lookup_error("surrogateescape") |
||
102 | except LookupError: |
||
103 | pass |
||
104 | else: |
||
105 | errors = "surrogateescape" |
||
106 | |||
107 | def fsencode(filename): |
||
108 | """ |
||
109 | Encode filename to the filesystem encoding with 'surrogateescape' error |
||
110 | handler, return bytes unchanged. On Windows, use 'strict' error handler if |
||
111 | the file system encoding is 'mbcs' (which is the default encoding). |
||
112 | """ |
||
113 | if isinstance(filename, bytes): |
||
114 | return filename |
||
115 | else: |
||
116 | return filename.encode(encoding, errors) |
||
117 | |||
118 | return fsencode |
||
119 | |||
120 | fsencode = _fscodec() |
||
121 | del _fscodec |
||
122 | |||
123 | #string helper |
||
124 | def strip_nl (s): |
||
125 | return ' '.join(s.splitlines()) |
||
126 | |||
127 | |||
128 | # Error checking function |
||
129 | # Note: They are quite fragile, beacsue teh just parse the output text from exiftool |
||
130 | def check_ok (result): |
||
131 | """Evaluates the output from a exiftool write operation (e.g. `set_tags`) |
||
132 | |||
133 | The argument is the result from the execute method. |
||
134 | |||
135 | The result is True or False. |
||
136 | """ |
||
137 | return not result is None and (not "due to errors" in result) |
||
138 | |||
139 | def format_error (result): |
||
140 | """Evaluates the output from a exiftool write operation (e.g. `set_tags`) |
||
141 | |||
142 | The argument is the result from the execute method. |
||
143 | |||
144 | The result is a human readable one-line string. |
||
145 | """ |
||
146 | if check_ok (result): |
||
147 | return 'exiftool finished probably properly. ("%s")' % strip_nl(result) |
||
148 | else: |
||
149 | if result is None: |
||
150 | return "exiftool operation can't be evaluated: No result given" |
||
151 | else: |
||
152 | return 'exiftool finished with error: "%s"' % strip_nl(result) |
||
153 | |||
154 | |||
155 | class ExifTool(object): |
||
156 | """Run the `exiftool` command-line tool and communicate to it. |
||
157 | |||
158 | You can pass two arguments to the constructor: |
||
159 | - ``addedargs`` (list of strings): contains additional paramaters for |
||
160 | the stay-open instance of exiftool |
||
161 | - ``executable`` (string): file name of the ``exiftool`` executable. |
||
162 | The default value ``exiftool`` will only work if the executable |
||
163 | is in your ``PATH`` |
||
164 | |||
165 | Most methods of this class are only available after calling |
||
166 | :py:meth:`start()`, which will actually launch the subprocess. To |
||
167 | avoid leaving the subprocess running, make sure to call |
||
168 | :py:meth:`terminate()` method when finished using the instance. |
||
169 | This method will also be implicitly called when the instance is |
||
170 | garbage collected, but there are circumstance when this won't ever |
||
171 | happen, so you should not rely on the implicit process |
||
172 | termination. Subprocesses won't be automatically terminated if |
||
173 | the parent process exits, so a leaked subprocess will stay around |
||
174 | until manually killed. |
||
175 | |||
176 | A convenient way to make sure that the subprocess is terminated is |
||
177 | to use the :py:class:`ExifTool` instance as a context manager:: |
||
178 | |||
179 | with ExifTool() as et: |
||
180 | ... |
||
181 | |||
182 | .. warning:: Note that there is no error handling. Nonsensical |
||
183 | options will be silently ignored by exiftool, so there's not |
||
184 | much that can be done in that regard. You should avoid passing |
||
185 | non-existent files to any of the methods, since this will lead |
||
186 | to undefied behaviour. |
||
187 | |||
188 | .. py:attribute:: running |
||
189 | |||
190 | A Boolean value indicating whether this instance is currently |
||
191 | associated with a running subprocess. |
||
192 | """ |
||
193 | |||
194 | def __init__(self, executable_=None, addedargs=None): |
||
195 | |||
196 | if executable_ is None: |
||
197 | self.executable = executable |
||
198 | else: |
||
199 | self.executable = executable_ |
||
200 | |||
201 | if addedargs is None: |
||
202 | self.addedargs = [] |
||
203 | elif type(addedargs) is list: |
||
204 | self.addedargs = addedargs |
||
205 | else: |
||
206 | raise TypeError("addedargs not a list of strings") |
||
207 | |||
208 | self.running = False |
||
209 | |||
210 | def start(self): |
||
211 | """Start an ``exiftool`` process in batch mode for this instance. |
||
212 | |||
213 | This method will issue a ``UserWarning`` if the subprocess is |
||
214 | already running. The process is started with the ``-G`` and |
||
215 | ``-n`` as common arguments, which are automatically included |
||
216 | in every command you run with :py:meth:`execute()`. |
||
217 | """ |
||
218 | if self.running: |
||
219 | warnings.warn("ExifTool already running; doing nothing.") |
||
220 | return |
||
221 | with open(os.devnull, "w") as devnull: |
||
222 | procargs = [self.executable, "-stay_open", "True", "-@", "-", |
||
223 | "-common_args", "-G", "-n"]; |
||
224 | procargs.extend(self.addedargs) |
||
225 | logging.debug(procargs) |
||
226 | self._process = subprocess.Popen( |
||
227 | procargs, |
||
228 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
||
229 | stderr=devnull) |
||
230 | self.running = True |
||
231 | |||
232 | def terminate(self): |
||
233 | """Terminate the ``exiftool`` process of this instance. |
||
234 | |||
235 | If the subprocess isn't running, this method will do nothing. |
||
236 | """ |
||
237 | if not self.running: |
||
238 | return |
||
239 | self._process.stdin.write(b"-stay_open\nFalse\n") |
||
240 | self._process.stdin.flush() |
||
241 | self._process.communicate() |
||
242 | del self._process |
||
243 | self.running = False |
||
244 | |||
245 | def __enter__(self): |
||
246 | self.start() |
||
247 | return self |
||
248 | |||
249 | def __exit__(self, exc_type, exc_val, exc_tb): |
||
250 | self.terminate() |
||
251 | |||
252 | def __del__(self): |
||
253 | self.terminate() |
||
254 | |||
255 | def execute(self, *params): |
||
256 | """Execute the given batch of parameters with ``exiftool``. |
||
257 | |||
258 | This method accepts any number of parameters and sends them to |
||
259 | the attached ``exiftool`` process. The process must be |
||
260 | running, otherwise ``ValueError`` is raised. The final |
||
261 | ``-execute`` necessary to actually run the batch is appended |
||
262 | automatically; see the documentation of :py:meth:`start()` for |
||
263 | the common options. The ``exiftool`` output is read up to the |
||
264 | end-of-output sentinel and returned as a raw ``bytes`` object, |
||
265 | excluding the sentinel. |
||
266 | |||
267 | The parameters must also be raw ``bytes``, in whatever |
||
268 | encoding exiftool accepts. For filenames, this should be the |
||
269 | system's filesystem encoding. |
||
270 | |||
271 | .. note:: This is considered a low-level method, and should |
||
272 | rarely be needed by application developers. |
||
273 | """ |
||
274 | if not self.running: |
||
275 | raise ValueError("ExifTool instance not running.") |
||
276 | self._process.stdin.write(b"\n".join(params + (b"-execute\n",))) |
||
277 | self._process.stdin.flush() |
||
278 | output = b"" |
||
279 | fd = self._process.stdout.fileno() |
||
280 | while not output[-32:].strip().endswith(sentinel): |
||
281 | output += os.read(fd, block_size) |
||
282 | return output.strip()[:-len(sentinel)] |
||
283 | |||
284 | def execute_json(self, *params): |
||
285 | """Execute the given batch of parameters and parse the JSON output. |
||
286 | |||
287 | This method is similar to :py:meth:`execute()`. It |
||
288 | automatically adds the parameter ``-j`` to request JSON output |
||
289 | from ``exiftool`` and parses the output. The return value is |
||
290 | a list of dictionaries, mapping tag names to the corresponding |
||
291 | values. All keys are Unicode strings with the tag names |
||
292 | including the ExifTool group name in the format <group>:<tag>. |
||
293 | The values can have multiple types. All strings occurring as |
||
294 | values will be Unicode strings. Each dictionary contains the |
||
295 | name of the file it corresponds to in the key ``"SourceFile"``. |
||
296 | |||
297 | The parameters to this function must be either raw strings |
||
298 | (type ``str`` in Python 2.x, type ``bytes`` in Python 3.x) or |
||
299 | Unicode strings (type ``unicode`` in Python 2.x, type ``str`` |
||
300 | in Python 3.x). Unicode strings will be encoded using |
||
301 | system's filesystem encoding. This behaviour means you can |
||
302 | pass in filenames according to the convention of the |
||
303 | respective Python version – as raw strings in Python 2.x and |
||
304 | as Unicode strings in Python 3.x. |
||
305 | """ |
||
306 | params = map(fsencode, params) |
||
307 | # Some latin bytes won't decode to utf-8. |
||
308 | # Try utf-8 and fallback to latin. |
||
309 | # http://stackoverflow.com/a/5552623/1318758 |
||
310 | # https://github.com/jmathai/elodie/issues/127 |
||
311 | try: |
||
312 | return json.loads(self.execute(b"-j", *params).decode("utf-8")) |
||
313 | except UnicodeDecodeError as e: |
||
314 | return json.loads(self.execute(b"-j", *params).decode("latin-1")) |
||
315 | |||
316 | def get_metadata_batch(self, filenames): |
||
317 | """Return all meta-data for the given files. |
||
318 | |||
319 | The return value will have the format described in the |
||
320 | documentation of :py:meth:`execute_json()`. |
||
321 | """ |
||
322 | return self.execute_json(*filenames) |
||
323 | |||
324 | def get_metadata(self, filename): |
||
325 | """Return meta-data for a single file. |
||
326 | |||
327 | The returned dictionary has the format described in the |
||
328 | documentation of :py:meth:`execute_json()`. |
||
329 | """ |
||
330 | return self.execute_json(filename)[0] |
||
331 | |||
332 | def get_tags_batch(self, tags, filenames): |
||
333 | """Return only specified tags for the given files. |
||
334 | |||
335 | The first argument is an iterable of tags. The tag names may |
||
336 | include group names, as usual in the format <group>:<tag>. |
||
337 | |||
338 | The second argument is an iterable of file names. |
||
339 | |||
340 | The format of the return value is the same as for |
||
341 | :py:meth:`execute_json()`. |
||
342 | """ |
||
343 | # Explicitly ruling out strings here because passing in a |
||
344 | # string would lead to strange and hard-to-find errors |
||
345 | if isinstance(tags, basestring): |
||
346 | raise TypeError("The argument 'tags' must be " |
||
347 | "an iterable of strings") |
||
348 | if isinstance(filenames, basestring): |
||
349 | raise TypeError("The argument 'filenames' must be " |
||
350 | "an iterable of strings") |
||
351 | params = ["-" + t for t in tags] |
||
352 | params.extend(filenames) |
||
353 | return self.execute_json(*params) |
||
354 | |||
355 | def get_tags(self, tags, filename): |
||
356 | """Return only specified tags for a single file. |
||
357 | |||
358 | The returned dictionary has the format described in the |
||
359 | documentation of :py:meth:`execute_json()`. |
||
360 | """ |
||
361 | return self.get_tags_batch(tags, [filename])[0] |
||
362 | |||
363 | def get_tag_batch(self, tag, filenames): |
||
364 | """Extract a single tag from the given files. |
||
365 | |||
366 | The first argument is a single tag name, as usual in the |
||
367 | format <group>:<tag>. |
||
368 | |||
369 | The second argument is an iterable of file names. |
||
370 | |||
371 | The return value is a list of tag values or ``None`` for |
||
372 | non-existent tags, in the same order as ``filenames``. |
||
373 | """ |
||
374 | data = self.get_tags_batch([tag], filenames) |
||
375 | result = [] |
||
376 | for d in data: |
||
377 | d.pop("SourceFile") |
||
378 | result.append(next(iter(d.values()), None)) |
||
379 | return result |
||
380 | |||
381 | def get_tag(self, tag, filename): |
||
382 | """Extract a single tag from a single file. |
||
383 | |||
384 | The return value is the value of the specified tag, or |
||
385 | ``None`` if this tag was not found in the file. |
||
386 | """ |
||
387 | return self.get_tag_batch(tag, [filename])[0] |
||
388 | |||
389 | def set_tags_batch(self, tags, filenames): |
||
390 | """Writes the values of the specified tags for the given files. |
||
391 | |||
392 | The first argument is a dictionary of tags and values. The tag names may |
||
393 | include group names, as usual in the format <group>:<tag>. |
||
394 | |||
395 | The second argument is an iterable of file names. |
||
396 | |||
397 | The format of the return value is the same as for |
||
398 | :py:meth:`execute()`. |
||
399 | |||
400 | It can be passed into `check_ok()` and `format_error()`. |
||
401 | """ |
||
402 | # Explicitly ruling out strings here because passing in a |
||
403 | # string would lead to strange and hard-to-find errors |
||
404 | if isinstance(tags, basestring): |
||
405 | raise TypeError("The argument 'tags' must be dictionary " |
||
406 | "of strings") |
||
407 | if isinstance(filenames, basestring): |
||
408 | raise TypeError("The argument 'filenames' must be " |
||
409 | "an iterable of strings") |
||
410 | |||
411 | params = [] |
||
412 | params_utf8 = [] |
||
413 | for tag, value in tags.items(): |
||
414 | params.append(u'-%s=%s' % (tag, value)) |
||
415 | |||
416 | params.extend(filenames) |
||
417 | params_utf8 = [x.encode('utf-8') for x in params] |
||
418 | return self.execute(*params_utf8) |
||
419 | |||
420 | def set_tags(self, tags, filename): |
||
421 | """Writes the values of the specified tags for the given file. |
||
422 | |||
423 | This is a convenience function derived from `set_tags_batch()`. |
||
424 | Only difference is that it takes as last arugemnt only one file name |
||
425 | as a string. |
||
426 | """ |
||
427 | return self.set_tags_batch(tags, [filename]) |
||
428 | |||
429 | def set_keywords_batch(self, mode, keywords, filenames): |
||
430 | """Modifies the keywords tag for the given files. |
||
431 | |||
432 | The first argument is the operation mode: |
||
433 | KW_REPLACE: Replace (i.e. set) the full keywords tag with `keywords`. |
||
434 | KW_ADD: Add `keywords` to the keywords tag. |
||
435 | If a keyword is present, just keep it. |
||
436 | KW_REMOVE: Remove `keywords` from the keywords tag. |
||
437 | If a keyword wasn't present, just leave it. |
||
438 | |||
439 | The second argument is an iterable of key words. |
||
440 | |||
441 | The third argument is an iterable of file names. |
||
442 | |||
443 | The format of the return value is the same as for |
||
444 | :py:meth:`execute()`. |
||
445 | |||
446 | It can be passed into `check_ok()` and `format_error()`. |
||
447 | """ |
||
448 | # Explicitly ruling out strings here because passing in a |
||
449 | # string would lead to strange and hard-to-find errors |
||
450 | if isinstance(keywords, basestring): |
||
451 | raise TypeError("The argument 'keywords' must be " |
||
452 | "an iterable of strings") |
||
453 | if isinstance(filenames, basestring): |
||
454 | raise TypeError("The argument 'filenames' must be " |
||
455 | "an iterable of strings") |
||
456 | |||
457 | params = [] |
||
458 | |||
459 | kw_operation = {KW_REPLACE:"-%s=%s", |
||
460 | KW_ADD:"-%s+=%s", |
||
461 | KW_REMOVE:"-%s-=%s"}[mode] |
||
462 | |||
463 | kw_params = [ kw_operation % (KW_TAGNAME, w) for w in keywords ] |
||
464 | |||
465 | params.extend(kw_params) |
||
466 | params.extend(filenames) |
||
467 | logging.debug (params) |
||
468 | return self.execute(*params) |
||
469 | |||
470 | def set_keywords(self, mode, keywords, filename): |
||
471 | """Modifies the keywords tag for the given file. |
||
472 | |||
473 | This is a convenience function derived from `set_keywords_batch()`. |
||
474 | Only difference is that it takes as last argument only one file name |
||
475 | as a string. |
||
476 | """ |
||
477 | return self.set_keywords_batch(mode, keywords, [filename]) |
||
478 |