1
|
|
|
""" |
2
|
|
|
Technical image metadata |
3
|
|
|
""" |
4
|
|
|
|
5
|
|
|
from math import sqrt |
6
|
|
|
from io import BytesIO |
7
|
|
|
from subprocess import run, PIPE |
8
|
|
|
from distutils.spawn import find_executable as which |
9
|
|
|
from ocrd_utils import getLogger |
10
|
|
|
|
11
|
|
|
class OcrdExif(): |
12
|
|
|
"""Represents technical image metadata. |
13
|
|
|
|
14
|
|
|
Attributes: |
15
|
|
|
width (int): pixel dimensions |
16
|
|
|
height (int): pixel dimensions |
17
|
|
|
photometricInterpretation (str): pixel type/depth, e.g. \ |
18
|
|
|
|
19
|
|
|
* ``1`` for b/w, |
20
|
|
|
* ``L`` for 8-bit grayscale, |
21
|
|
|
* ``RGB`` for 24-bit truecolor, |
22
|
|
|
* ``I`` for 32-bit signed integer grayscale, |
23
|
|
|
* ``F`` for floating-point grayscale |
24
|
|
|
(see PIL concept **mode**) |
25
|
|
|
resolution (int): pixel density |
26
|
|
|
xResolution (int): pixel density |
27
|
|
|
yResolution (int): pixel density |
28
|
|
|
resolutionUnit (str): unit of measurement (either ``inches`` or ``cm``) |
29
|
|
|
""" |
30
|
|
|
|
31
|
|
|
def __init__(self, img): |
32
|
|
|
""" |
33
|
|
|
Arguments: |
34
|
|
|
img (`PIL.Image`): PIL image technical metadata is about. |
35
|
|
|
""" |
36
|
|
|
# print(img.__dict__) |
37
|
|
|
self.width = img.width |
38
|
|
|
self.height = img.height |
39
|
|
|
self.photometricInterpretation = img.mode |
40
|
|
|
self.n_frames = img.n_frames if 'n_frames' in img.__dict__ else 1 |
41
|
|
|
if which('identify'): |
42
|
|
|
self.run_identify(img) |
43
|
|
|
else: |
44
|
|
|
getLogger('ocrd_exif').warning("ImageMagick 'identify' not available, Consider installing ImageMagick for more robust pixel density estimation") |
45
|
|
|
self.run_pil(img) |
46
|
|
|
|
47
|
|
|
def run_identify(self, img): |
48
|
|
|
for prop in ['compression', 'photometric_interpretation']: |
49
|
|
|
setattr(self, prop, img.info[prop] if prop in img.info else None) |
50
|
|
|
if img.filename: |
51
|
|
|
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', img.filename], check=False, stderr=PIPE, stdout=PIPE) |
52
|
|
|
else: |
53
|
|
|
with BytesIO() as bio: |
54
|
|
|
img.save(bio, format=img.format) |
55
|
|
|
ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue()) |
56
|
|
|
if ret.returncode: |
57
|
|
|
stderr = ret.stderr.decode('utf-8') |
58
|
|
|
if 'no decode delegate for this image format' in stderr: |
59
|
|
|
getLogger('ocrd_exif').warning("ImageMagick does not support the '%s' image format. ", img.format) |
60
|
|
|
else: |
61
|
|
|
getLogger('ocrd_exif').error("identify exited with non-zero %s: %s", ret.returncode, stderr) |
62
|
|
|
self.xResolution = self.yResolution = 1 |
63
|
|
|
self.resolutionUnit = 'inches' |
64
|
|
|
else: |
65
|
|
|
tokens = ret.stdout.decode('utf-8').split(' ', 3) |
66
|
|
|
self.xResolution = max(int(float(tokens[0])), 1) |
67
|
|
|
self.yResolution = max(int(float(tokens[1])), 1) |
68
|
|
|
self.resolutionUnit = 'inches' if tokens[2] == 'undefined' else \ |
69
|
|
|
'cm' if tokens[2] == 'PixelsPerCentimeter' else \ |
70
|
|
|
'inches' |
71
|
|
|
self.resolution = round(sqrt(self.xResolution * self.yResolution)) |
72
|
|
|
|
73
|
|
|
def run_pil(self, img): |
74
|
|
|
if img.format in ('TIFF', 'PNG') and 'dpi' in img.info: |
75
|
|
|
self.xResolution = int(img.info['dpi'][0]) |
76
|
|
|
self.yResolution = int(img.info['dpi'][1]) |
77
|
|
|
if img.format == 'TIFF': |
78
|
|
|
self.resolutionUnit = 'cm' if img.tag.get(296) == 3 else 'inches' |
79
|
|
|
else: |
80
|
|
|
self.resolutionUnit = 'inches' |
81
|
|
|
elif img.format == 'JPEG' and 'jfif_density' in img.info: |
82
|
|
|
self.xResolution = img.info['jfif_density'][0] |
83
|
|
|
self.yResolution = img.info['jfif_density'][1] |
84
|
|
|
self.resolutionUnit = 'cm' if img.info['jfif_unit'] == 2 else 'inches' |
85
|
|
|
elif img.format == 'PNG' and 'aspect' in img.info: |
86
|
|
|
self.xResolution = img.info['aspect'][0] |
87
|
|
|
self.yResolution = img.info['aspect'][1] |
88
|
|
|
self.resolutionUnit = 'inches' |
89
|
|
|
else: |
90
|
|
|
# if img.format == 'JPEG2000': |
91
|
|
|
# import sys |
92
|
|
|
# print('JPEG 2000 not supported yet :(', file=sys.stderr) |
93
|
|
|
self.xResolution = 1 |
94
|
|
|
self.yResolution = 1 |
95
|
|
|
self.resolutionUnit = 'inches' |
96
|
|
|
# print('format=%s type=%s' % (img.format, type(self.xResolution)) |
97
|
|
|
self.resolution = round(sqrt(self.xResolution * self.yResolution)) |
98
|
|
|
|
99
|
|
|
def to_xml(self): |
100
|
|
|
""" |
101
|
|
|
Serialize all properties as XML string. |
102
|
|
|
""" |
103
|
|
|
ret = '<exif>' |
104
|
|
|
for k in self.__dict__: |
105
|
|
|
ret += '<%s>%s</%s>' % (k, self.__dict__[k], k) |
106
|
|
|
ret += '</exif>' |
107
|
|
|
return ret |
108
|
|
|
|