1
|
|
|
import sys |
2
|
|
|
|
3
|
|
|
import numpy as np |
4
|
|
|
from PIL import Image, ImageStat, ImageDraw, ImageChops |
5
|
|
|
|
6
|
|
|
from .logging import getLogger |
7
|
|
|
from .introspect import membername |
8
|
|
|
|
9
|
|
|
# Allow processing of images with up to 1.6bn pixels |
10
|
|
|
# https://github.com/OCR-D/core/issues/735 |
11
|
|
|
Image.MAX_IMAGE_PIXELS = 40_000 ** 2 |
12
|
|
|
|
13
|
|
|
__all__ = [ |
14
|
|
|
'adjust_canvas_to_rotation', |
15
|
|
|
'adjust_canvas_to_transposition', |
16
|
|
|
'bbox_from_points', |
17
|
|
|
'bbox_from_polygon', |
18
|
|
|
'bbox_from_xywh', |
19
|
|
|
'coordinates_for_segment', |
20
|
|
|
'coordinates_of_segment', |
21
|
|
|
'image_from_polygon', |
22
|
|
|
'points_from_bbox', |
23
|
|
|
'points_from_polygon', |
24
|
|
|
'points_from_x0y0x1y1', |
25
|
|
|
'points_from_xywh', |
26
|
|
|
'points_from_y0x0y1x1', |
27
|
|
|
'polygon_from_bbox', |
28
|
|
|
'polygon_from_points', |
29
|
|
|
'polygon_from_x0y0x1y1', |
30
|
|
|
'polygon_from_xywh', |
31
|
|
|
'polygon_mask', |
32
|
|
|
'rotate_coordinates', |
33
|
|
|
'shift_coordinates', |
34
|
|
|
'scale_coordinates', |
35
|
|
|
'transform_coordinates', |
36
|
|
|
'transpose_coordinates', |
37
|
|
|
'xywh_from_bbox', |
38
|
|
|
'xywh_from_points', |
39
|
|
|
'xywh_from_polygon', |
40
|
|
|
] |
41
|
|
|
|
42
|
|
|
def adjust_canvas_to_rotation(size, angle): |
43
|
|
|
"""Calculate the enlarged image size after rotation. |
44
|
|
|
|
45
|
|
|
Given a numpy array ``size`` of an original canvas (width and height), |
46
|
|
|
and a rotation angle in degrees counter-clockwise ``angle``, |
47
|
|
|
calculate the new size which is necessary to encompass the full |
48
|
|
|
image after rotation. |
49
|
|
|
|
50
|
|
|
Return a numpy array of the enlarged width and height. |
51
|
|
|
""" |
52
|
|
|
angle = np.deg2rad(angle) |
53
|
|
|
sin = np.abs(np.sin(angle)) |
54
|
|
|
cos = np.abs(np.cos(angle)) |
55
|
|
|
return np.dot(np.array([[cos, sin], |
56
|
|
|
[sin, cos]]), |
57
|
|
|
np.array(size)) |
58
|
|
|
|
59
|
|
|
def adjust_canvas_to_transposition(size, method): |
60
|
|
|
"""Calculate the flipped image size after transposition. |
61
|
|
|
|
62
|
|
|
Given a numpy array ``size`` of an original canvas (width and height), |
63
|
|
|
and a transposition mode ``method`` (see ``transpose_image``), |
64
|
|
|
calculate the new size after transposition. |
65
|
|
|
|
66
|
|
|
Return a numpy array of the enlarged width and height. |
67
|
|
|
""" |
68
|
|
|
if method in [Image.ROTATE_90, |
69
|
|
|
Image.ROTATE_270, |
70
|
|
|
Image.TRANSPOSE, |
71
|
|
|
Image.TRANSVERSE]: |
72
|
|
|
size = size[::-1] |
73
|
|
|
return size |
74
|
|
|
|
75
|
|
|
def bbox_from_points(points): |
76
|
|
|
"""Construct a numeric list representing a bounding box from polygon coordinates in page representation.""" |
77
|
|
|
xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] |
78
|
|
|
return bbox_from_polygon(xys) |
79
|
|
|
|
80
|
|
|
def bbox_from_polygon(polygon): |
81
|
|
|
"""Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" |
82
|
|
|
minx = sys.maxsize |
83
|
|
|
miny = sys.maxsize |
84
|
|
|
maxx = -sys.maxsize |
85
|
|
|
maxy = -sys.maxsize |
86
|
|
|
for xy in polygon: |
87
|
|
|
if xy[0] < minx: |
88
|
|
|
minx = xy[0] |
89
|
|
|
if xy[0] > maxx: |
90
|
|
|
maxx = xy[0] |
91
|
|
|
if xy[1] < miny: |
92
|
|
|
miny = xy[1] |
93
|
|
|
if xy[1] > maxy: |
94
|
|
|
maxy = xy[1] |
95
|
|
|
return minx, miny, maxx, maxy |
96
|
|
|
|
97
|
|
|
def bbox_from_xywh(xywh): |
98
|
|
|
"""Convert a bounding box from a numeric dict to a numeric list representation.""" |
99
|
|
|
return ( |
100
|
|
|
xywh['x'], |
101
|
|
|
xywh['y'], |
102
|
|
|
xywh['x'] + xywh['w'], |
103
|
|
|
xywh['y'] + xywh['h'] |
104
|
|
|
) |
105
|
|
|
|
106
|
|
|
def coordinates_of_segment(segment, parent_image, parent_coords): |
107
|
|
|
"""Extract the coordinates of a PAGE segment element relative to its parent. |
108
|
|
|
|
109
|
|
|
Given... |
110
|
|
|
|
111
|
|
|
- ``segment``, a PAGE segment object in absolute coordinates |
112
|
|
|
(i.e. RegionType / TextLineType / WordType / GlyphType), and |
113
|
|
|
- ``parent_image``, the PIL.Image of its corresponding parent object |
114
|
|
|
(i.e. PageType / RegionType / TextLineType / WordType), (not used), |
115
|
|
|
along with |
116
|
|
|
- ``parent_coords``, its corresponding affine transformation, |
117
|
|
|
|
118
|
|
|
...calculate the relative coordinates of the segment within the image. |
119
|
|
|
|
120
|
|
|
That is, apply the given transform to the points annotated in ``segment``. |
121
|
|
|
The transform encodes (recursively): |
122
|
|
|
|
123
|
|
|
1. Whenever ``parent_image`` or any of its parents was cropped, |
124
|
|
|
all points must be shifted by the offset |
125
|
|
|
(i.e. coordinate system gets translated by the upper left). |
126
|
|
|
2. Whenever ``parent_image`` or any of its parents was rotated, |
127
|
|
|
all points must be rotated around the center of that image |
128
|
|
|
(i.e. coordinate system gets translated by the center in |
129
|
|
|
opposite direction, rotated purely, and translated back; |
130
|
|
|
the latter involves an additional offset from the increase |
131
|
|
|
in canvas size necessary to accommodate all points). |
132
|
|
|
|
133
|
|
|
Return the rounded numpy array of the resulting polygon. |
134
|
|
|
""" |
135
|
|
|
# get polygon: |
136
|
|
|
polygon = np.array(polygon_from_points(segment.get_Coords().points)) |
137
|
|
|
# apply affine transform: |
138
|
|
|
polygon = transform_coordinates(polygon, parent_coords['transform']) |
139
|
|
|
return np.round(polygon).astype(np.int32) |
140
|
|
|
|
141
|
|
|
def polygon_from_points(points): |
142
|
|
|
""" |
143
|
|
|
Convert polygon coordinates in page representation to polygon coordinates in numeric list representation. |
144
|
|
|
""" |
145
|
|
|
polygon = [] |
146
|
|
|
for pair in points.split(" "): |
147
|
|
|
x_y = pair.split(",") |
148
|
|
|
polygon.append([float(x_y[0]), float(x_y[1])]) |
149
|
|
|
return polygon |
150
|
|
|
|
151
|
|
|
|
152
|
|
|
def coordinates_for_segment(polygon, parent_image, parent_coords): |
153
|
|
|
"""Convert relative coordinates to absolute. |
154
|
|
|
|
155
|
|
|
Given... |
156
|
|
|
|
157
|
|
|
- ``polygon``, a numpy array of points relative to |
158
|
|
|
- ``parent_image``, a PIL.Image (not used), along with |
159
|
|
|
- ``parent_coords``, its corresponding affine transformation, |
160
|
|
|
|
161
|
|
|
...calculate the absolute coordinates within the page. |
162
|
|
|
|
163
|
|
|
That is, apply the given transform inversely to ``polygon`` |
164
|
|
|
The transform encodes (recursively): |
165
|
|
|
|
166
|
|
|
1. Whenever ``parent_image`` or any of its parents was cropped, |
167
|
|
|
all points must be shifted by the offset in opposite direction |
168
|
|
|
(i.e. coordinate system gets translated by the upper left). |
169
|
|
|
2. Whenever ``parent_image`` or any of its parents was rotated, |
170
|
|
|
all points must be rotated around the center of that image in |
171
|
|
|
opposite direction |
172
|
|
|
(i.e. coordinate system gets translated by the center in |
173
|
|
|
opposite direction, rotated purely, and translated back; |
174
|
|
|
the latter involves an additional offset from the increase |
175
|
|
|
in canvas size necessary to accommodate all points). |
176
|
|
|
|
177
|
|
|
Return the rounded numpy array of the resulting polygon. |
178
|
|
|
""" |
179
|
|
|
polygon = np.array(polygon, dtype=np.float32) # avoid implicit type cast problems |
180
|
|
|
# apply inverse of affine transform: |
181
|
|
|
inv_transform = np.linalg.inv(parent_coords['transform']) |
182
|
|
|
polygon = transform_coordinates(polygon, inv_transform) |
183
|
|
|
return np.round(polygon).astype(np.int32) |
184
|
|
|
|
185
|
|
|
def polygon_mask(image, coordinates): |
186
|
|
|
""""Create a mask image of a polygon. |
187
|
|
|
|
188
|
|
|
Given a PIL.Image ``image`` (merely for dimensions), and |
189
|
|
|
a numpy array ``polygon`` of relative coordinates into the image, |
190
|
|
|
create a new image of the same size with black background, and |
191
|
|
|
fill everything inside the polygon hull with white. |
192
|
|
|
|
193
|
|
|
Return the new PIL.Image. |
194
|
|
|
""" |
195
|
|
|
mask = Image.new('L', image.size, 0) |
196
|
|
|
coordinates = list(map(tuple, coordinates)) |
197
|
|
|
ImageDraw.Draw(mask).polygon(coordinates, outline=255, fill=255) |
198
|
|
|
return mask |
199
|
|
|
|
200
|
|
|
def rotate_coordinates(transform, angle, orig=np.array([0, 0])): |
201
|
|
|
"""Compose an affine coordinate transformation with a passive rotation. |
202
|
|
|
|
203
|
|
|
Given a numpy array ``transform`` of an existing transformation |
204
|
|
|
matrix in homogeneous (3d) coordinates, and a rotation angle in |
205
|
|
|
degrees counter-clockwise ``angle``, as well as a numpy array |
206
|
|
|
``orig`` of the center of rotation, calculate the affine |
207
|
|
|
coordinate transform corresponding to the composition of both |
208
|
|
|
transformations. (This entails translation to the center, followed |
209
|
|
|
by pure rotation, and subsequent translation back. However, since |
210
|
|
|
rotation necessarily increases the bounding box, and thus image size, |
211
|
|
|
do not translate back the same amount, but to the enlarged offset.) |
212
|
|
|
|
213
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
214
|
|
|
""" |
215
|
|
|
LOG = getLogger('ocrd_utils.coords.rotate_coordinates') |
216
|
|
|
rad = np.deg2rad(angle) |
217
|
|
|
cos = np.cos(rad) |
218
|
|
|
sin = np.sin(rad) |
219
|
|
|
# get rotation matrix for passive rotation: |
220
|
|
|
rot = np.array([[+cos, sin, 0], |
221
|
|
|
[-sin, cos, 0], |
222
|
|
|
[0, 0, 1]]) |
223
|
|
|
# shift to center of rotation |
224
|
|
|
transform = shift_coordinates(transform, -orig) |
225
|
|
|
# apply pure rotation |
226
|
|
|
LOG.debug('rotating coordinates by %.2f° around %s', angle, str(orig)) |
227
|
|
|
transform = np.dot(rot, transform) |
228
|
|
|
# shift back |
229
|
|
|
transform = shift_coordinates( |
230
|
|
|
transform, |
231
|
|
|
#orig) |
232
|
|
|
# the image (bounding box) increases with rotation, |
233
|
|
|
# so we must translate back to the new upper left: |
234
|
|
|
adjust_canvas_to_rotation(orig, angle)) |
235
|
|
|
return transform |
236
|
|
|
|
237
|
|
|
def rotate_image(image, angle, fill='background', transparency=False): |
238
|
|
|
""""Rotate an image, enlarging and filling with background. |
239
|
|
|
|
240
|
|
|
Given a PIL.Image ``image`` and a rotation angle in degrees |
241
|
|
|
counter-clockwise ``angle``, rotate the image, increasing its |
242
|
|
|
size at the margins accordingly, and filling everything outside |
243
|
|
|
the original image according to ``fill``: |
244
|
|
|
|
245
|
|
|
- if ``background`` (the default), |
246
|
|
|
then use the median color of the image; |
247
|
|
|
- otherwise use the given color, e.g. ``'white'`` or (255,255,255). |
248
|
|
|
|
249
|
|
|
Moreover, if ``transparency`` is true, then add an alpha channel |
250
|
|
|
fully opaque (i.e. everything outside the original image will |
251
|
|
|
be transparent for those that can interpret alpha channels). |
252
|
|
|
(This is true for images which already have an alpha channel, |
253
|
|
|
regardless of the setting used.) |
254
|
|
|
|
255
|
|
|
Return a new PIL.Image. |
256
|
|
|
""" |
257
|
|
|
LOG = getLogger('ocrd_utils.rotate_image') |
258
|
|
|
LOG.debug('rotating image by %.2f°', angle) |
259
|
|
|
if transparency and image.mode in ['RGB', 'L']: |
260
|
|
|
# ensure no information is lost by adding transparency channel |
261
|
|
|
# initialized to fully opaque (so cropping and rotation will |
262
|
|
|
# expose areas as transparent): |
263
|
|
|
image = image.copy() |
264
|
|
|
image.putalpha(255) |
265
|
|
|
if fill == 'background': |
266
|
|
|
background = ImageStat.Stat(image) |
267
|
|
|
if len(background.bands) > 1: |
268
|
|
|
background = background.median |
269
|
|
|
if image.mode in ['RGBA', 'LA']: |
270
|
|
|
background[-1] = 0 # fully transparent |
271
|
|
|
background = tuple(background) |
272
|
|
|
else: |
273
|
|
|
background = background.median[0] |
274
|
|
|
else: |
275
|
|
|
background = fill |
276
|
|
|
new_image = image.rotate(angle, |
277
|
|
|
expand=True, |
278
|
|
|
#resample=Image.BILINEAR, |
279
|
|
|
fillcolor=background) |
280
|
|
|
if new_image.mode in ['LA']: |
281
|
|
|
# workaround for #1600 (bug in LA support which |
282
|
|
|
# causes areas fully transparent before rotation |
283
|
|
|
# to be filled with black here): |
284
|
|
|
image = new_image |
285
|
|
|
new_image = Image.new(image.mode, image.size, background) |
286
|
|
|
new_image.paste(image, mask=image.getchannel('A')) |
287
|
|
|
return new_image |
288
|
|
|
|
289
|
|
|
|
290
|
|
|
def shift_coordinates(transform, offset): |
291
|
|
|
"""Compose an affine coordinate transformation with a translation. |
292
|
|
|
|
293
|
|
|
Given a numpy array ``transform`` of an existing transformation |
294
|
|
|
matrix in homogeneous (3d) coordinates, and a numpy array |
295
|
|
|
``offset`` of the translation vector, calculate the affine |
296
|
|
|
coordinate transform corresponding to the composition of both |
297
|
|
|
transformations. |
298
|
|
|
|
299
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
300
|
|
|
""" |
301
|
|
|
LOG = getLogger('ocrd_utils.coords.shift_coordinates') |
302
|
|
|
LOG.debug('shifting coordinates by %s', str(offset)) |
303
|
|
|
shift = np.eye(3) |
304
|
|
|
shift[0, 2] = offset[0] |
305
|
|
|
shift[1, 2] = offset[1] |
306
|
|
|
return np.dot(shift, transform) |
307
|
|
|
|
308
|
|
|
def scale_coordinates(transform, factors): |
309
|
|
|
"""Compose an affine coordinate transformation with a proportional scaling. |
310
|
|
|
Given a numpy array ``transform`` of an existing transformation |
311
|
|
|
matrix in homogeneous (3d) coordinates, and a numpy array |
312
|
|
|
``factors`` of the scaling factors, calculate the affine |
313
|
|
|
coordinate transform corresponding to the composition of both |
314
|
|
|
transformations. |
315
|
|
|
|
316
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
317
|
|
|
""" |
318
|
|
|
LOG = getLogger('ocrd_utils.coords.scale_coordinates') |
319
|
|
|
LOG.debug('scaling coordinates by %s', str(factors)) |
320
|
|
|
scale = np.eye(3) |
321
|
|
|
scale[0, 0] = factors[0] |
322
|
|
|
scale[1, 1] = factors[1] |
323
|
|
|
return np.dot(scale, transform) |
324
|
|
|
|
325
|
|
|
def transform_coordinates(polygon, transform=None): |
326
|
|
|
"""Apply an affine transformation to a set of points. |
327
|
|
|
Augment the 2d numpy array of points ``polygon`` with a an extra |
328
|
|
|
column of ones (homogeneous coordinates), then multiply with |
329
|
|
|
the transformation matrix ``transform`` (or the identity matrix), |
330
|
|
|
and finally remove the extra column from the result. |
331
|
|
|
""" |
332
|
|
|
if transform is None: |
333
|
|
|
transform = np.eye(3) |
334
|
|
|
polygon = np.insert(polygon, 2, 1, axis=1) # make 3d homogeneous coordinates |
335
|
|
|
polygon = np.dot(transform, polygon.T).T |
336
|
|
|
# ones = polygon[:,2] |
337
|
|
|
# assert np.all(np.array_equal(ones, np.clip(ones, 1 - 1e-2, 1 + 1e-2))), \ |
338
|
|
|
# 'affine transform failed' # should never happen |
339
|
|
|
polygon = np.delete(polygon, 2, axis=1) # remove z coordinate again |
340
|
|
|
return polygon |
341
|
|
|
|
342
|
|
|
def transpose_coordinates(transform, method, orig=np.array([0, 0])): |
343
|
|
|
""""Compose an affine coordinate transformation with a transposition (i.e. flip or rotate in 90° multiples). |
344
|
|
|
|
345
|
|
|
Given a numpy array ``transform`` of an existing transformation |
346
|
|
|
matrix in homogeneous (3d) coordinates, a transposition mode ``method``, |
347
|
|
|
as well as a numpy array ``orig`` of the center of the image, |
348
|
|
|
calculate the affine coordinate transform corresponding to the composition |
349
|
|
|
of both transformations, which is respectively: |
350
|
|
|
|
351
|
|
|
- ``PIL.Image.FLIP_LEFT_RIGHT``: |
352
|
|
|
entails translation to the center, followed by pure reflection |
353
|
|
|
about the y-axis, and subsequent translation back |
354
|
|
|
- ``PIL.Image.FLIP_TOP_BOTTOM``: |
355
|
|
|
entails translation to the center, followed by pure reflection |
356
|
|
|
about the x-axis, and subsequent translation back |
357
|
|
|
- ``PIL.Image.ROTATE_180``: |
358
|
|
|
entails translation to the center, followed by pure reflection |
359
|
|
|
about the origin, and subsequent translation back |
360
|
|
|
- ``PIL.Image.ROTATE_90``: |
361
|
|
|
entails translation to the center, followed by pure rotation |
362
|
|
|
by 90° counter-clockwise, and subsequent translation back |
363
|
|
|
- ``PIL.Image.ROTATE_270``: |
364
|
|
|
entails translation to the center, followed by pure rotation |
365
|
|
|
by 270° counter-clockwise, and subsequent translation back |
366
|
|
|
- ``PIL.Image.TRANSPOSE``: |
367
|
|
|
entails translation to the center, followed by pure rotation |
368
|
|
|
by 90° counter-clockwise and pure reflection about the x-axis, |
369
|
|
|
and subsequent translation back |
370
|
|
|
- ``PIL.Image.TRANSVERSE``: |
371
|
|
|
entails translation to the center, followed by pure rotation |
372
|
|
|
by 90° counter-clockwise and pure reflection about the y-axis, |
373
|
|
|
and subsequent translation back |
374
|
|
|
|
375
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
376
|
|
|
""" |
377
|
|
|
LOG = getLogger('ocrd_utils.coords.transpose_coordinates') |
378
|
|
|
LOG.debug('transposing coordinates with %s around %s', membername(Image, method), str(orig)) |
379
|
|
|
# get rotation matrix for passive rotation/reflection: |
380
|
|
|
rot90 = np.array([[0, 1, 0], |
381
|
|
|
[-1, 0, 0], |
382
|
|
|
[0, 0, 1]]) |
383
|
|
|
reflx = np.array([[1, 0, 0], |
384
|
|
|
[0, -1, 0], |
385
|
|
|
[0, 0, 1]]) |
386
|
|
|
refly = np.array([[-1, 0, 0], |
387
|
|
|
[0, 1, 0], |
388
|
|
|
[0, 0, 1]]) |
389
|
|
|
transform = shift_coordinates(transform, -orig) |
390
|
|
|
operations = { |
391
|
|
|
Image.FLIP_LEFT_RIGHT: [refly], |
392
|
|
|
Image.FLIP_TOP_BOTTOM: [reflx], |
393
|
|
|
Image.ROTATE_180: [reflx, refly], |
394
|
|
|
Image.ROTATE_90: [rot90], |
395
|
|
|
Image.ROTATE_270: [rot90, reflx, refly], |
396
|
|
|
Image.TRANSPOSE: [rot90, reflx], |
397
|
|
|
Image.TRANSVERSE: [rot90, refly] |
398
|
|
|
}.get(method) # no default |
399
|
|
|
for operation in operations: |
400
|
|
|
transform = np.dot(operation, transform) |
401
|
|
|
transform = shift_coordinates( |
402
|
|
|
transform, |
403
|
|
|
# the image (bounding box) may flip with transposition, |
404
|
|
|
# so we must translate back to the new upper left: |
405
|
|
|
adjust_canvas_to_transposition(orig, method)) |
406
|
|
|
return transform |
407
|
|
|
|
408
|
|
|
def transpose_image(image, method): |
409
|
|
|
""""Transpose (i.e. flip or rotate in 90° multiples) an image. |
410
|
|
|
|
411
|
|
|
Given a PIL.Image ``image`` and a transposition mode ``method``, |
412
|
|
|
apply the respective operation: |
413
|
|
|
|
414
|
|
|
- ``PIL.Image.FLIP_LEFT_RIGHT``: |
415
|
|
|
all pixels get mirrored at half the width of the image |
416
|
|
|
- ``PIL.Image.FLIP_TOP_BOTTOM``: |
417
|
|
|
all pixels get mirrored at half the height of the image |
418
|
|
|
- ``PIL.Image.ROTATE_180``: |
419
|
|
|
all pixels get mirrored at both, the width and half the height |
420
|
|
|
of the image, |
421
|
|
|
i.e. the image gets rotated by 180° counter-clockwise |
422
|
|
|
- ``PIL.Image.ROTATE_90``: |
423
|
|
|
rows become columns (but counted from the right) and |
424
|
|
|
columns become rows, |
425
|
|
|
i.e. the image gets rotated by 90° counter-clockwise; |
426
|
|
|
width becomes height and vice versa |
427
|
|
|
- ``PIL.Image.ROTATE_270``: |
428
|
|
|
rows become columns and |
429
|
|
|
columns become rows (but counted from the bottom), |
430
|
|
|
i.e. the image gets rotated by 270° counter-clockwise; |
431
|
|
|
width becomes height and vice versa |
432
|
|
|
- ``PIL.Image.TRANSPOSE``: |
433
|
|
|
rows become columns and vice versa, |
434
|
|
|
i.e. all pixels get mirrored at the main diagonal; |
435
|
|
|
width becomes height and vice versa |
436
|
|
|
- ``PIL.Image.TRANSVERSE``: |
437
|
|
|
rows become columns (but counted from the right) and |
438
|
|
|
columns become rows (but counted from the bottom), |
439
|
|
|
i.e. all pixels get mirrored at the opposite diagonal; |
440
|
|
|
width becomes height and vice versa |
441
|
|
|
|
442
|
|
|
Return a new PIL.Image. |
443
|
|
|
""" |
444
|
|
|
LOG = getLogger('ocrd_utils.transpose_image') |
445
|
|
|
LOG.debug('transposing image with %s', membername(Image, method)) |
446
|
|
|
return image.transpose(method) |
447
|
|
|
|
448
|
|
|
def crop_image(image, box=None): |
449
|
|
|
""""Crop an image to a rectangle, filling with background. |
450
|
|
|
|
451
|
|
|
Given a PIL.Image ``image`` and a list ``box`` of the bounding |
452
|
|
|
rectangle relative to the image, crop at the box coordinates, |
453
|
|
|
filling everything outside ``image`` with the background. |
454
|
|
|
(This covers the case where ``box`` indexes are negative or |
455
|
|
|
larger than ``image`` width/height. PIL.Image.crop would fill |
456
|
|
|
with black.) Since ``image`` is not necessarily binarized yet, |
457
|
|
|
determine the background from the median color (instead of |
458
|
|
|
white). |
459
|
|
|
|
460
|
|
|
Return a new PIL.Image. |
461
|
|
|
""" |
462
|
|
|
LOG = getLogger('ocrd_utils.crop_image') |
463
|
|
|
if not box: |
464
|
|
|
box = (0, 0, image.width, image.height) |
465
|
|
|
elif box[0] < 0 or box[1] < 0 or box[2] > image.width or box[3] > image.height: |
466
|
|
|
# (It should be invalid in PAGE-XML to extend beyond parents.) |
467
|
|
|
LOG.warning('crop coordinates (%s) exceed image (%dx%d)', |
468
|
|
|
str(box), image.width, image.height) |
469
|
|
|
LOG.debug('cropping image to %s', str(box)) |
470
|
|
|
xywh = xywh_from_bbox(*box) |
471
|
|
|
poly = polygon_from_bbox(*box) |
472
|
|
|
background = ImageStat.Stat(image, mask=polygon_mask(image, poly)) |
473
|
|
|
if len(background.bands) > 1: |
474
|
|
|
background = tuple(background.median) |
475
|
|
|
else: |
476
|
|
|
background = background.median[0] |
477
|
|
|
new_image = Image.new(image.mode, (xywh['w'], xywh['h']), |
478
|
|
|
background) # or 'white' |
479
|
|
|
new_image.paste(image, (-xywh['x'], -xywh['y'])) |
480
|
|
|
return new_image |
481
|
|
|
|
482
|
|
|
def image_from_polygon(image, polygon, fill='background', transparency=False): |
483
|
|
|
""""Mask an image with a polygon. |
484
|
|
|
|
485
|
|
|
Given a PIL.Image ``image`` and a numpy array ``polygon`` |
486
|
|
|
of relative coordinates into the image, fill everything |
487
|
|
|
outside the polygon hull to a color according to ``fill``: |
488
|
|
|
|
489
|
|
|
- if ``background`` (the default), |
490
|
|
|
then use the median color of the image; |
491
|
|
|
- otherwise use the given color, e.g. ``'white'`` or (255,255,255). |
492
|
|
|
|
493
|
|
|
Moreover, if ``transparency`` is true, then add an alpha channel |
494
|
|
|
from the polygon mask (i.e. everything outside the polygon will |
495
|
|
|
be transparent, for those consumers that can interpret alpha channels). |
496
|
|
|
Images which already have an alpha channel will have it shrunk |
497
|
|
|
from the polygon mask (i.e. everything outside the polygon will |
498
|
|
|
be transparent, in addition to existing transparent pixels). |
499
|
|
|
|
500
|
|
|
Return a new PIL.Image. |
501
|
|
|
""" |
502
|
|
|
mask = polygon_mask(image, polygon) |
503
|
|
|
if fill == 'background': |
504
|
|
|
background = ImageStat.Stat(image, mask=mask) |
505
|
|
|
if len(background.bands) > 1: |
506
|
|
|
background = tuple(background.median) |
507
|
|
|
else: |
508
|
|
|
background = background.median[0] |
509
|
|
|
else: |
510
|
|
|
background = fill |
511
|
|
|
new_image = Image.new(image.mode, image.size, background) |
512
|
|
|
new_image.paste(image, mask=mask) |
513
|
|
|
# ensure no information is lost by a adding transparency channel |
514
|
|
|
# initialized to fully transparent outside the polygon mask |
515
|
|
|
# (so consumers do not have to rely on background estimation, |
516
|
|
|
# which can fail on foreground-dominated segments, or white, |
517
|
|
|
# which can be inconsistent on unbinarized images): |
518
|
|
|
if image.mode in ['RGBA', 'LA']: |
519
|
|
|
# ensure transparency maximizes (i.e. parent mask AND mask): |
520
|
|
|
mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque |
521
|
|
|
new_image.putalpha(mask) |
522
|
|
|
elif transparency and image.mode in ['RGB', 'L']: |
523
|
|
|
# introduce transparency: |
524
|
|
|
new_image.putalpha(mask) |
525
|
|
|
return new_image |
526
|
|
|
|
527
|
|
|
def points_from_bbox(minx, miny, maxx, maxy): |
528
|
|
|
"""Construct polygon coordinates in page representation from a numeric list representing a bounding box.""" |
529
|
|
|
return "%i,%i %i,%i %i,%i %i,%i" % ( |
530
|
|
|
minx, miny, maxx, miny, maxx, maxy, minx, maxy) |
531
|
|
|
|
532
|
|
|
def points_from_polygon(polygon): |
533
|
|
|
"""Convert polygon coordinates from a numeric list representation to a page representation.""" |
534
|
|
|
return " ".join("%i,%i" % (x, y) for x, y in polygon) |
535
|
|
|
|
536
|
|
|
def points_from_xywh(box): |
537
|
|
|
""" |
538
|
|
|
Construct polygon coordinates in page representation from numeric dict representing a bounding box. |
539
|
|
|
""" |
540
|
|
|
x, y, w, h = box['x'], box['y'], box['w'], box['h'] |
541
|
|
|
# tesseract uses a different region representation format |
542
|
|
|
return "%i,%i %i,%i %i,%i %i,%i" % ( |
543
|
|
|
x, y, |
544
|
|
|
x + w, y, |
545
|
|
|
x + w, y + h, |
546
|
|
|
x, y + h |
547
|
|
|
) |
548
|
|
|
def points_from_y0x0y1x1(yxyx): |
549
|
|
|
""" |
550
|
|
|
Construct a polygon representation from a rectangle described as a list [y0, x0, y1, x1] |
551
|
|
|
""" |
552
|
|
|
y0 = yxyx[0] |
553
|
|
|
x0 = yxyx[1] |
554
|
|
|
y1 = yxyx[2] |
555
|
|
|
x1 = yxyx[3] |
556
|
|
|
return "%s,%s %s,%s %s,%s %s,%s" % ( |
557
|
|
|
x0, y0, |
558
|
|
|
x1, y0, |
559
|
|
|
x1, y1, |
560
|
|
|
x0, y1 |
561
|
|
|
) |
562
|
|
|
|
563
|
|
|
def points_from_x0y0x1y1(xyxy): |
564
|
|
|
""" |
565
|
|
|
Construct a polygon representation from a rectangle described as a list [x0, y0, x1, y1] |
566
|
|
|
""" |
567
|
|
|
x0 = xyxy[0] |
568
|
|
|
y0 = xyxy[1] |
569
|
|
|
x1 = xyxy[2] |
570
|
|
|
y1 = xyxy[3] |
571
|
|
|
return "%s,%s %s,%s %s,%s %s,%s" % ( |
572
|
|
|
x0, y0, |
573
|
|
|
x1, y0, |
574
|
|
|
x1, y1, |
575
|
|
|
x0, y1 |
576
|
|
|
) |
577
|
|
|
|
578
|
|
|
def polygon_from_bbox(minx, miny, maxx, maxy): |
579
|
|
|
"""Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" |
580
|
|
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] |
581
|
|
|
|
582
|
|
|
def polygon_from_x0y0x1y1(x0y0x1y1): |
583
|
|
|
"""Construct polygon coordinates in numeric list representation from a string list representing a bounding box.""" |
584
|
|
|
minx = int(x0y0x1y1[0]) |
585
|
|
|
miny = int(x0y0x1y1[1]) |
586
|
|
|
maxx = int(x0y0x1y1[2]) |
587
|
|
|
maxy = int(x0y0x1y1[3]) |
588
|
|
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] |
589
|
|
|
|
590
|
|
|
def polygon_from_xywh(xywh): |
591
|
|
|
"""Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box.""" |
592
|
|
|
return polygon_from_bbox(*bbox_from_xywh(xywh)) |
593
|
|
|
|
594
|
|
|
def xywh_from_bbox(minx, miny, maxx, maxy): |
595
|
|
|
"""Convert a bounding box from a numeric list to a numeric dict representation.""" |
596
|
|
|
return { |
597
|
|
|
'x': minx, |
598
|
|
|
'y': miny, |
599
|
|
|
'w': maxx - minx, |
600
|
|
|
'h': maxy - miny, |
601
|
|
|
} |
602
|
|
|
|
603
|
|
|
def xywh_from_points(points): |
604
|
|
|
""" |
605
|
|
|
Construct a numeric dict representing a bounding box from polygon coordinates in page representation. |
606
|
|
|
""" |
607
|
|
|
return xywh_from_bbox(*bbox_from_points(points)) |
608
|
|
|
|
609
|
|
|
|
610
|
|
|
def xywh_from_polygon(polygon): |
611
|
|
|
"""Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" |
612
|
|
|
return xywh_from_bbox(*bbox_from_polygon(polygon)) |
613
|
|
|
|