Passed
Push — master ( 6d359e...f9c2b6 )
by Konstantin
02:35
created

ocrd_models.ocrd_exif   A

Complexity

Total Complexity 24

Size/Duplication

Total Lines 108
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
wmc 24
eloc 63
dl 0
loc 108
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A OcrdExif.to_xml() 0 9 2
C OcrdExif.run_identify() 0 25 9
C OcrdExif.run_pil() 0 25 10
A OcrdExif.__init__() 0 15 3
1
"""
2
Technical image metadata
3
"""
4
5
from math import sqrt
6
from io import BytesIO
7
from subprocess import run, PIPE
8
from distutils.spawn import find_executable as which
9
from ocrd_utils import getLogger
10
11
class OcrdExif():
12
    """Represents technical image metadata.
13
14
    Attributes:
15
        width (int): pixel dimensions
16
        height (int): pixel dimensions
17
        photometricInterpretation (str): pixel type/depth, e.g. \
18
19
            * ``1`` for b/w,
20
            * ``L`` for 8-bit grayscale,
21
            * ``RGB`` for 24-bit truecolor,
22
            * ``I`` for 32-bit signed integer grayscale,
23
            * ``F`` for floating-point grayscale
24
          (see PIL concept **mode**)
25
        resolution (int): pixel density
26
        xResolution (int): pixel density
27
        yResolution (int): pixel density
28
        resolutionUnit (str): unit of measurement (either ``inches`` or ``cm``)
29
    """
30
31
    def __init__(self, img):
32
        """
33
        Arguments:
34
            img (`PIL.Image`): PIL image technical metadata is about.
35
        """
36
        #  print(img.__dict__)
37
        self.width = img.width
38
        self.height = img.height
39
        self.photometricInterpretation = img.mode
40
        self.n_frames = img.n_frames if 'n_frames' in img.__dict__ else 1
41
        if which('identify'):
42
            self.run_identify(img)
43
        else:
44
            getLogger('ocrd_exif').warning("ImageMagick 'identify' not available, Consider installing ImageMagick for more robust pixel density estimation")
45
            self.run_pil(img)
46
47
    def run_identify(self, img):
48
        for prop in ['compression', 'photometric_interpretation']:
49
            setattr(self, prop, img.info[prop] if prop in img.info else None)
50
        if img.filename:
51
            ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', img.filename], check=False, stderr=PIPE, stdout=PIPE)
52
        else:
53
            with BytesIO() as bio:
54
                img.save(bio, format=img.format)
55
                ret = run(['identify', '-format', r'%[resolution.x] %[resolution.y] %U', '/dev/stdin'], check=False, stderr=PIPE, stdout=PIPE, input=bio.getvalue())
56
        if ret.returncode:
57
            stderr = ret.stderr.decode('utf-8')
58
            if 'no decode delegate for this image format' in stderr:
59
                getLogger('ocrd_exif').warning("ImageMagick does not support the '%s' image format. ", img.format)
60
            else:
61
                getLogger('ocrd_exif').error("identify exited with non-zero %s: %s", ret.returncode, stderr)
62
            self.xResolution = self.yResolution = 1
63
            self.resolutionUnit = 'inches'
64
        else:
65
            tokens = ret.stdout.decode('utf-8').split(' ', 3)
66
            self.xResolution = max(int(float(tokens[0])), 1)
67
            self.yResolution = max(int(float(tokens[1])), 1)
68
            self.resolutionUnit = 'inches' if tokens[2] == 'undefined' else \
69
                                  'cm' if tokens[2] == 'PixelsPerCentimeter' else \
70
                                  'inches'
71
        self.resolution = round(sqrt(self.xResolution * self.yResolution))
72
73
    def run_pil(self, img):
74
        if img.format in ('TIFF', 'PNG') and 'dpi' in img.info:
75
            self.xResolution = int(img.info['dpi'][0])
76
            self.yResolution = int(img.info['dpi'][1])
77
            if img.format == 'TIFF':
78
                self.resolutionUnit = 'cm' if img.tag.get(296) == 3 else 'inches'
79
            else:
80
                self.resolutionUnit = 'inches'
81
        elif img.format == 'JPEG' and 'jfif_density' in img.info:
82
            self.xResolution = img.info['jfif_density'][0]
83
            self.yResolution = img.info['jfif_density'][1]
84
            self.resolutionUnit = 'cm' if img.info['jfif_unit'] == 2 else 'inches'
85
        elif img.format == 'PNG' and 'aspect' in img.info:
86
            self.xResolution = img.info['aspect'][0]
87
            self.yResolution = img.info['aspect'][1]
88
            self.resolutionUnit = 'inches'
89
        else:
90
            #  if img.format == 'JPEG2000':
91
            #      import sys
92
            #      print('JPEG 2000 not supported yet :(', file=sys.stderr)
93
            self.xResolution = 1
94
            self.yResolution = 1
95
            self.resolutionUnit = 'inches'
96
        #  print('format=%s type=%s' % (img.format, type(self.xResolution))
97
        self.resolution = round(sqrt(self.xResolution * self.yResolution))
98
99
    def to_xml(self):
100
        """
101
        Serialize all properties as XML string.
102
        """
103
        ret = '<exif>'
104
        for k in self.__dict__:
105
            ret += '<%s>%s</%s>' % (k, self.__dict__[k], k)
106
        ret += '</exif>'
107
        return ret
108