1
|
|
|
import sys |
2
|
|
|
|
3
|
|
|
import numpy as np |
4
|
|
|
from PIL import Image, ImageStat, ImageDraw, ImageChops |
5
|
|
|
|
6
|
|
|
from .logging import getLogger |
7
|
|
|
from .introspect import membername |
8
|
|
|
|
9
|
|
|
__all__ = [ |
10
|
|
|
'adjust_canvas_to_rotation', |
11
|
|
|
'adjust_canvas_to_transposition', |
12
|
|
|
'bbox_from_points', |
13
|
|
|
'bbox_from_polygon', |
14
|
|
|
'bbox_from_xywh', |
15
|
|
|
'coordinates_for_segment', |
16
|
|
|
'coordinates_of_segment', |
17
|
|
|
'image_from_polygon', |
18
|
|
|
'points_from_bbox', |
19
|
|
|
'points_from_polygon', |
20
|
|
|
'points_from_x0y0x1y1', |
21
|
|
|
'points_from_xywh', |
22
|
|
|
'points_from_y0x0y1x1', |
23
|
|
|
'polygon_from_bbox', |
24
|
|
|
'polygon_from_points', |
25
|
|
|
'polygon_from_x0y0x1y1', |
26
|
|
|
'polygon_from_xywh', |
27
|
|
|
'polygon_mask', |
28
|
|
|
'rotate_coordinates', |
29
|
|
|
'shift_coordinates', |
30
|
|
|
'transform_coordinates', |
31
|
|
|
'transpose_coordinates', |
32
|
|
|
'xywh_from_bbox', |
33
|
|
|
'xywh_from_points', |
34
|
|
|
'xywh_from_polygon', |
35
|
|
|
] |
36
|
|
|
|
37
|
|
|
def adjust_canvas_to_rotation(size, angle): |
38
|
|
|
"""Calculate the enlarged image size after rotation. |
39
|
|
|
|
40
|
|
|
Given a numpy array ``size`` of an original canvas (width and height), |
41
|
|
|
and a rotation angle in degrees counter-clockwise ``angle``, |
42
|
|
|
calculate the new size which is necessary to encompass the full |
43
|
|
|
image after rotation. |
44
|
|
|
|
45
|
|
|
Return a numpy array of the enlarged width and height. |
46
|
|
|
""" |
47
|
|
|
angle = np.deg2rad(angle) |
48
|
|
|
sin = np.abs(np.sin(angle)) |
49
|
|
|
cos = np.abs(np.cos(angle)) |
50
|
|
|
return np.dot(np.array([[cos, sin], |
51
|
|
|
[sin, cos]]), |
52
|
|
|
np.array(size)) |
53
|
|
|
|
54
|
|
|
def adjust_canvas_to_transposition(size, method): |
55
|
|
|
"""Calculate the flipped image size after transposition. |
56
|
|
|
|
57
|
|
|
Given a numpy array ``size`` of an original canvas (width and height), |
58
|
|
|
and a transposition mode ``method`` (see ``transpose_image``), |
59
|
|
|
calculate the new size after transposition. |
60
|
|
|
|
61
|
|
|
Return a numpy array of the enlarged width and height. |
62
|
|
|
""" |
63
|
|
|
if method in [Image.ROTATE_90, |
64
|
|
|
Image.ROTATE_270, |
65
|
|
|
Image.TRANSPOSE, |
66
|
|
|
Image.TRANSVERSE]: |
67
|
|
|
size = size[::-1] |
68
|
|
|
return size |
69
|
|
|
|
70
|
|
|
def bbox_from_points(points): |
71
|
|
|
"""Construct a numeric list representing a bounding box from polygon coordinates in page representation.""" |
72
|
|
|
xys = [[int(p) for p in pair.split(',')] for pair in points.split(' ')] |
73
|
|
|
return bbox_from_polygon(xys) |
74
|
|
|
|
75
|
|
|
def bbox_from_polygon(polygon): |
76
|
|
|
"""Construct a numeric list representing a bounding box from polygon coordinates in numeric list representation.""" |
77
|
|
|
minx = sys.maxsize |
78
|
|
|
miny = sys.maxsize |
79
|
|
|
maxx = -sys.maxsize |
80
|
|
|
maxy = -sys.maxsize |
81
|
|
|
for xy in polygon: |
82
|
|
|
if xy[0] < minx: |
83
|
|
|
minx = xy[0] |
84
|
|
|
if xy[0] > maxx: |
85
|
|
|
maxx = xy[0] |
86
|
|
|
if xy[1] < miny: |
87
|
|
|
miny = xy[1] |
88
|
|
|
if xy[1] > maxy: |
89
|
|
|
maxy = xy[1] |
90
|
|
|
return minx, miny, maxx, maxy |
91
|
|
|
|
92
|
|
|
def bbox_from_xywh(xywh): |
93
|
|
|
"""Convert a bounding box from a numeric dict to a numeric list representation.""" |
94
|
|
|
return ( |
95
|
|
|
xywh['x'], |
96
|
|
|
xywh['y'], |
97
|
|
|
xywh['x'] + xywh['w'], |
98
|
|
|
xywh['y'] + xywh['h'] |
99
|
|
|
) |
100
|
|
|
|
101
|
|
|
def coordinates_of_segment(segment, parent_image, parent_coords): |
102
|
|
|
"""Extract the coordinates of a PAGE segment element relative to its parent. |
103
|
|
|
|
104
|
|
|
Given... |
105
|
|
|
|
106
|
|
|
- ``segment``, a PAGE segment object in absolute coordinates |
107
|
|
|
(i.e. RegionType / TextLineType / WordType / GlyphType), and |
108
|
|
|
- ``parent_image``, the PIL.Image of its corresponding parent object |
109
|
|
|
(i.e. PageType / RegionType / TextLineType / WordType), (not used), |
110
|
|
|
along with |
111
|
|
|
- ``parent_coords``, its corresponding affine transformation, |
112
|
|
|
|
113
|
|
|
...calculate the relative coordinates of the segment within the image. |
114
|
|
|
|
115
|
|
|
That is, apply the given transform to the points annotated in ``segment``. |
116
|
|
|
The transform encodes (recursively): |
117
|
|
|
|
118
|
|
|
1. Whenever ``parent_image`` or any of its parents was cropped, |
119
|
|
|
all points must be shifted by the offset |
120
|
|
|
(i.e. coordinate system gets translated by the upper left). |
121
|
|
|
2. Whenever ``parent_image`` or any of its parents was rotated, |
122
|
|
|
all points must be rotated around the center of that image |
123
|
|
|
(i.e. coordinate system gets translated by the center in |
124
|
|
|
opposite direction, rotated purely, and translated back; |
125
|
|
|
the latter involves an additional offset from the increase |
126
|
|
|
in canvas size necessary to accommodate all points). |
127
|
|
|
|
128
|
|
|
Return the rounded numpy array of the resulting polygon. |
129
|
|
|
""" |
130
|
|
|
# get polygon: |
131
|
|
|
polygon = np.array(polygon_from_points(segment.get_Coords().points)) |
132
|
|
|
# apply affine transform: |
133
|
|
|
polygon = transform_coordinates(polygon, parent_coords['transform']) |
134
|
|
|
return np.round(polygon).astype(np.int32) |
135
|
|
|
|
136
|
|
|
def polygon_from_points(points): |
137
|
|
|
""" |
138
|
|
|
Convert polygon coordinates in page representation to polygon coordinates in numeric list representation. |
139
|
|
|
""" |
140
|
|
|
polygon = [] |
141
|
|
|
for pair in points.split(" "): |
142
|
|
|
x_y = pair.split(",") |
143
|
|
|
polygon.append([float(x_y[0]), float(x_y[1])]) |
144
|
|
|
return polygon |
145
|
|
|
|
146
|
|
|
|
147
|
|
|
def coordinates_for_segment(polygon, parent_image, parent_coords): |
148
|
|
|
"""Convert relative coordinates to absolute. |
149
|
|
|
|
150
|
|
|
Given... |
151
|
|
|
|
152
|
|
|
- ``polygon``, a numpy array of points relative to |
153
|
|
|
- ``parent_image``, a PIL.Image (not used), along with |
154
|
|
|
- ``parent_coords``, its corresponding affine transformation, |
155
|
|
|
|
156
|
|
|
...calculate the absolute coordinates within the page. |
157
|
|
|
|
158
|
|
|
That is, apply the given transform inversely to ``polygon`` |
159
|
|
|
The transform encodes (recursively): |
160
|
|
|
|
161
|
|
|
1. Whenever ``parent_image`` or any of its parents was cropped, |
162
|
|
|
all points must be shifted by the offset in opposite direction |
163
|
|
|
(i.e. coordinate system gets translated by the upper left). |
164
|
|
|
2. Whenever ``parent_image`` or any of its parents was rotated, |
165
|
|
|
all points must be rotated around the center of that image in |
166
|
|
|
opposite direction |
167
|
|
|
(i.e. coordinate system gets translated by the center in |
168
|
|
|
opposite direction, rotated purely, and translated back; |
169
|
|
|
the latter involves an additional offset from the increase |
170
|
|
|
in canvas size necessary to accommodate all points). |
171
|
|
|
|
172
|
|
|
Return the rounded numpy array of the resulting polygon. |
173
|
|
|
""" |
174
|
|
|
polygon = np.array(polygon, dtype=np.float32) # avoid implicit type cast problems |
175
|
|
|
# apply inverse of affine transform: |
176
|
|
|
inv_transform = np.linalg.inv(parent_coords['transform']) |
177
|
|
|
polygon = transform_coordinates(polygon, inv_transform) |
178
|
|
|
return np.round(polygon).astype(np.int32) |
179
|
|
|
|
180
|
|
|
def polygon_mask(image, coordinates): |
181
|
|
|
""""Create a mask image of a polygon. |
182
|
|
|
|
183
|
|
|
Given a PIL.Image ``image`` (merely for dimensions), and |
184
|
|
|
a numpy array ``polygon`` of relative coordinates into the image, |
185
|
|
|
create a new image of the same size with black background, and |
186
|
|
|
fill everything inside the polygon hull with white. |
187
|
|
|
|
188
|
|
|
Return the new PIL.Image. |
189
|
|
|
""" |
190
|
|
|
mask = Image.new('L', image.size, 0) |
191
|
|
|
if isinstance(coordinates, np.ndarray): |
192
|
|
|
coordinates = list(map(tuple, coordinates)) |
193
|
|
|
ImageDraw.Draw(mask).polygon(coordinates, outline=255, fill=255) |
194
|
|
|
return mask |
195
|
|
|
|
196
|
|
|
def rotate_coordinates(transform, angle, orig=np.array([0, 0])): |
197
|
|
|
"""Compose an affine coordinate transformation with a passive rotation. |
198
|
|
|
|
199
|
|
|
Given a numpy array ``transform`` of an existing transformation |
200
|
|
|
matrix in homogeneous (3d) coordinates, and a rotation angle in |
201
|
|
|
degrees counter-clockwise ``angle``, as well as a numpy array |
202
|
|
|
``orig`` of the center of rotation, calculate the affine |
203
|
|
|
coordinate transform corresponding to the composition of both |
204
|
|
|
transformations. (This entails translation to the center, followed |
205
|
|
|
by pure rotation, and subsequent translation back. However, since |
206
|
|
|
rotation necessarily increases the bounding box, and thus image size, |
207
|
|
|
do not translate back the same amount, but to the enlarged offset.) |
208
|
|
|
|
209
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
210
|
|
|
""" |
211
|
|
|
LOG = getLogger('ocrd_utils.coords.rotate_coordinates') |
212
|
|
|
rad = np.deg2rad(angle) |
213
|
|
|
cos = np.cos(rad) |
214
|
|
|
sin = np.sin(rad) |
215
|
|
|
# get rotation matrix for passive rotation: |
216
|
|
|
rot = np.array([[+cos, sin, 0], |
217
|
|
|
[-sin, cos, 0], |
218
|
|
|
[0, 0, 1]]) |
219
|
|
|
# shift to center of rotation |
220
|
|
|
transform = shift_coordinates(transform, -orig) |
221
|
|
|
# apply pure rotation |
222
|
|
|
LOG.debug('rotating coordinates by %.2f° around %s', angle, str(orig)) |
223
|
|
|
transform = np.dot(rot, transform) |
224
|
|
|
# shift back |
225
|
|
|
transform = shift_coordinates( |
226
|
|
|
transform, |
227
|
|
|
#orig) |
228
|
|
|
# the image (bounding box) increases with rotation, |
229
|
|
|
# so we must translate back to the new upper left: |
230
|
|
|
adjust_canvas_to_rotation(orig, angle)) |
231
|
|
|
return transform |
232
|
|
|
|
233
|
|
|
def rotate_image(image, angle, fill='background', transparency=False): |
234
|
|
|
""""Rotate an image, enlarging and filling with background. |
235
|
|
|
|
236
|
|
|
Given a PIL.Image ``image`` and a rotation angle in degrees |
237
|
|
|
counter-clockwise ``angle``, rotate the image, increasing its |
238
|
|
|
size at the margins accordingly, and filling everything outside |
239
|
|
|
the original image according to ``fill``: |
240
|
|
|
|
241
|
|
|
- if ``background`` (the default), |
242
|
|
|
then use the median color of the image; |
243
|
|
|
- otherwise use the given color, e.g. ``'white'`` or (255,255,255). |
244
|
|
|
|
245
|
|
|
Moreover, if ``transparency`` is true, then add an alpha channel |
246
|
|
|
fully opaque (i.e. everything outside the original image will |
247
|
|
|
be transparent for those that can interpret alpha channels). |
248
|
|
|
(This is true for images which already have an alpha channel, |
249
|
|
|
regardless of the setting used.) |
250
|
|
|
|
251
|
|
|
Return a new PIL.Image. |
252
|
|
|
""" |
253
|
|
|
LOG = getLogger('ocrd_utils.rotate_image') |
254
|
|
|
LOG.debug('rotating image by %.2f°', angle) |
255
|
|
|
if transparency and image.mode in ['RGB', 'L']: |
256
|
|
|
# ensure no information is lost by adding transparency channel |
257
|
|
|
# initialized to fully opaque (so cropping and rotation will |
258
|
|
|
# expose areas as transparent): |
259
|
|
|
image = image.copy() |
260
|
|
|
image.putalpha(255) |
261
|
|
|
if fill == 'background': |
262
|
|
|
background = ImageStat.Stat(image) |
263
|
|
|
if len(background.bands) > 1: |
264
|
|
|
background = background.median |
265
|
|
|
if image.mode in ['RGBA', 'LA']: |
266
|
|
|
background[-1] = 0 # fully transparent |
267
|
|
|
background = tuple(background) |
268
|
|
|
else: |
269
|
|
|
background = background.median[0] |
270
|
|
|
else: |
271
|
|
|
background = fill |
272
|
|
|
new_image = image.rotate(angle, |
273
|
|
|
expand=True, |
274
|
|
|
#resample=Image.BILINEAR, |
275
|
|
|
fillcolor=background) |
276
|
|
|
if new_image.mode in ['LA']: |
277
|
|
|
# workaround for #1600 (bug in LA support which |
278
|
|
|
# causes areas fully transparent before rotation |
279
|
|
|
# to be filled with black here): |
280
|
|
|
image = new_image |
281
|
|
|
new_image = Image.new(image.mode, image.size, background) |
282
|
|
|
new_image.paste(image, mask=image.getchannel('A')) |
283
|
|
|
return new_image |
284
|
|
|
|
285
|
|
|
|
286
|
|
|
def shift_coordinates(transform, offset): |
287
|
|
|
"""Compose an affine coordinate transformation with a translation. |
288
|
|
|
|
289
|
|
|
Given a numpy array ``transform`` of an existing transformation |
290
|
|
|
matrix in homogeneous (3d) coordinates, and a numpy array |
291
|
|
|
``offset`` of the translation vector, calculate the affine |
292
|
|
|
coordinate transform corresponding to the composition of both |
293
|
|
|
transformations. |
294
|
|
|
|
295
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
296
|
|
|
""" |
297
|
|
|
LOG = getLogger('ocrd_utils.coords.shift_coordinates') |
298
|
|
|
LOG.debug('shifting coordinates by %s', str(offset)) |
299
|
|
|
shift = np.eye(3) |
300
|
|
|
shift[0, 2] = offset[0] |
301
|
|
|
shift[1, 2] = offset[1] |
302
|
|
|
return np.dot(shift, transform) |
303
|
|
|
|
304
|
|
|
def transform_coordinates(polygon, transform=None): |
305
|
|
|
"""Apply an affine transformation to a set of points. |
306
|
|
|
Augment the 2d numpy array of points ``polygon`` with a an extra |
307
|
|
|
column of ones (homogeneous coordinates), then multiply with |
308
|
|
|
the transformation matrix ``transform`` (or the identity matrix), |
309
|
|
|
and finally remove the extra column from the result. |
310
|
|
|
""" |
311
|
|
|
if transform is None: |
312
|
|
|
transform = np.eye(3) |
313
|
|
|
polygon = np.insert(polygon, 2, 1, axis=1) # make 3d homogeneous coordinates |
314
|
|
|
polygon = np.dot(transform, polygon.T).T |
315
|
|
|
# ones = polygon[:,2] |
316
|
|
|
# assert np.all(np.array_equal(ones, np.clip(ones, 1 - 1e-2, 1 + 1e-2))), \ |
317
|
|
|
# 'affine transform failed' # should never happen |
318
|
|
|
polygon = np.delete(polygon, 2, axis=1) # remove z coordinate again |
319
|
|
|
return polygon |
320
|
|
|
|
321
|
|
|
def transpose_coordinates(transform, method, orig=np.array([0, 0])): |
322
|
|
|
""""Compose an affine coordinate transformation with a transposition (i.e. flip or rotate in 90° multiples). |
323
|
|
|
|
324
|
|
|
Given a numpy array ``transform`` of an existing transformation |
325
|
|
|
matrix in homogeneous (3d) coordinates, a transposition mode ``method``, |
326
|
|
|
as well as a numpy array ``orig`` of the center of the image, |
327
|
|
|
calculate the affine coordinate transform corresponding to the composition |
328
|
|
|
of both transformations, which is respectively: |
329
|
|
|
|
330
|
|
|
- ``PIL.Image.FLIP_LEFT_RIGHT``: |
331
|
|
|
entails translation to the center, followed by pure reflection |
332
|
|
|
about the y-axis, and subsequent translation back |
333
|
|
|
- ``PIL.Image.FLIP_TOP_BOTTOM``: |
334
|
|
|
entails translation to the center, followed by pure reflection |
335
|
|
|
about the x-axis, and subsequent translation back |
336
|
|
|
- ``PIL.Image.ROTATE_180``: |
337
|
|
|
entails translation to the center, followed by pure reflection |
338
|
|
|
about the origin, and subsequent translation back |
339
|
|
|
- ``PIL.Image.ROTATE_90``: |
340
|
|
|
entails translation to the center, followed by pure rotation |
341
|
|
|
by 90° counter-clockwise, and subsequent translation back |
342
|
|
|
- ``PIL.Image.ROTATE_270``: |
343
|
|
|
entails translation to the center, followed by pure rotation |
344
|
|
|
by 270° counter-clockwise, and subsequent translation back |
345
|
|
|
- ``PIL.Image.TRANSPOSE``: |
346
|
|
|
entails translation to the center, followed by pure rotation |
347
|
|
|
by 90° counter-clockwise and pure reflection about the x-axis, |
348
|
|
|
and subsequent translation back |
349
|
|
|
- ``PIL.Image.TRANSVERSE``: |
350
|
|
|
entails translation to the center, followed by pure rotation |
351
|
|
|
by 90° counter-clockwise and pure reflection about the y-axis, |
352
|
|
|
and subsequent translation back |
353
|
|
|
|
354
|
|
|
Return a numpy array of the resulting affine transformation matrix. |
355
|
|
|
""" |
356
|
|
|
LOG = getLogger('ocrd_utils.coords.transpose_coordinates') |
357
|
|
|
LOG.debug('transposing coordinates with %s around %s', membername(Image, method), str(orig)) |
358
|
|
|
# get rotation matrix for passive rotation/reflection: |
359
|
|
|
rot90 = np.array([[0, 1, 0], |
360
|
|
|
[-1, 0, 0], |
361
|
|
|
[0, 0, 1]]) |
362
|
|
|
reflx = np.array([[1, 0, 0], |
363
|
|
|
[0, -1, 0], |
364
|
|
|
[0, 0, 1]]) |
365
|
|
|
refly = np.array([[-1, 0, 0], |
366
|
|
|
[0, 1, 0], |
367
|
|
|
[0, 0, 1]]) |
368
|
|
|
transform = shift_coordinates(transform, -orig) |
369
|
|
|
operations = { |
370
|
|
|
Image.FLIP_LEFT_RIGHT: [refly], |
371
|
|
|
Image.FLIP_TOP_BOTTOM: [reflx], |
372
|
|
|
Image.ROTATE_180: [reflx, refly], |
373
|
|
|
Image.ROTATE_90: [rot90], |
374
|
|
|
Image.ROTATE_270: [rot90, reflx, refly], |
375
|
|
|
Image.TRANSPOSE: [rot90, reflx], |
376
|
|
|
Image.TRANSVERSE: [rot90, refly] |
377
|
|
|
}.get(method) # no default |
378
|
|
|
for operation in operations: |
379
|
|
|
transform = np.dot(operation, transform) |
380
|
|
|
transform = shift_coordinates( |
381
|
|
|
transform, |
382
|
|
|
# the image (bounding box) may flip with transposition, |
383
|
|
|
# so we must translate back to the new upper left: |
384
|
|
|
adjust_canvas_to_transposition(orig, method)) |
385
|
|
|
return transform |
386
|
|
|
|
387
|
|
|
def transpose_image(image, method): |
388
|
|
|
""""Transpose (i.e. flip or rotate in 90° multiples) an image. |
389
|
|
|
|
390
|
|
|
Given a PIL.Image ``image`` and a transposition mode ``method``, |
391
|
|
|
apply the respective operation: |
392
|
|
|
|
393
|
|
|
- ``PIL.Image.FLIP_LEFT_RIGHT``: |
394
|
|
|
all pixels get mirrored at half the width of the image |
395
|
|
|
- ``PIL.Image.FLIP_TOP_BOTTOM``: |
396
|
|
|
all pixels get mirrored at half the height of the image |
397
|
|
|
- ``PIL.Image.ROTATE_180``: |
398
|
|
|
all pixels get mirrored at both, the width and half the height |
399
|
|
|
of the image, |
400
|
|
|
i.e. the image gets rotated by 180° counter-clockwise |
401
|
|
|
- ``PIL.Image.ROTATE_90``: |
402
|
|
|
rows become columns (but counted from the right) and |
403
|
|
|
columns become rows, |
404
|
|
|
i.e. the image gets rotated by 90° counter-clockwise; |
405
|
|
|
width becomes height and vice versa |
406
|
|
|
- ``PIL.Image.ROTATE_270``: |
407
|
|
|
rows become columns and |
408
|
|
|
columns become rows (but counted from the bottom), |
409
|
|
|
i.e. the image gets rotated by 270° counter-clockwise; |
410
|
|
|
width becomes height and vice versa |
411
|
|
|
- ``PIL.Image.TRANSPOSE``: |
412
|
|
|
rows become columns and vice versa, |
413
|
|
|
i.e. all pixels get mirrored at the main diagonal; |
414
|
|
|
width becomes height and vice versa |
415
|
|
|
- ``PIL.Image.TRANSVERSE``: |
416
|
|
|
rows become columns (but counted from the right) and |
417
|
|
|
columns become rows (but counted from the bottom), |
418
|
|
|
i.e. all pixels get mirrored at the opposite diagonal; |
419
|
|
|
width becomes height and vice versa |
420
|
|
|
|
421
|
|
|
Return a new PIL.Image. |
422
|
|
|
""" |
423
|
|
|
LOG = getLogger('ocrd_utils.transpose_image') |
424
|
|
|
LOG.debug('transposing image with %s', membername(Image, method)) |
425
|
|
|
return image.transpose(method) |
426
|
|
|
|
427
|
|
|
def crop_image(image, box=None): |
428
|
|
|
""""Crop an image to a rectangle, filling with background. |
429
|
|
|
|
430
|
|
|
Given a PIL.Image ``image`` and a list ``box`` of the bounding |
431
|
|
|
rectangle relative to the image, crop at the box coordinates, |
432
|
|
|
filling everything outside ``image`` with the background. |
433
|
|
|
(This covers the case where ``box`` indexes are negative or |
434
|
|
|
larger than ``image`` width/height. PIL.Image.crop would fill |
435
|
|
|
with black.) Since ``image`` is not necessarily binarized yet, |
436
|
|
|
determine the background from the median color (instead of |
437
|
|
|
white). |
438
|
|
|
|
439
|
|
|
Return a new PIL.Image. |
440
|
|
|
""" |
441
|
|
|
LOG = getLogger('ocrd_utils.crop_image') |
442
|
|
|
if not box: |
443
|
|
|
box = (0, 0, image.width, image.height) |
444
|
|
|
elif box[0] < 0 or box[1] < 0 or box[2] > image.width or box[3] > image.height: |
445
|
|
|
# (It should be invalid in PAGE-XML to extend beyond parents.) |
446
|
|
|
LOG.warning('crop coordinates (%s) exceed image (%dx%d)', |
447
|
|
|
str(box), image.width, image.height) |
448
|
|
|
LOG.debug('cropping image to %s', str(box)) |
449
|
|
|
xywh = xywh_from_bbox(*box) |
450
|
|
|
background = ImageStat.Stat(image) |
451
|
|
|
if len(background.bands) > 1: |
452
|
|
|
background = tuple(background.median) |
453
|
|
|
else: |
454
|
|
|
background = background.median[0] |
455
|
|
|
new_image = Image.new(image.mode, (xywh['w'], xywh['h']), |
456
|
|
|
background) # or 'white' |
457
|
|
|
new_image.paste(image, (-xywh['x'], -xywh['y'])) |
458
|
|
|
return new_image |
459
|
|
|
|
460
|
|
|
def image_from_polygon(image, polygon, fill='background', transparency=False): |
461
|
|
|
""""Mask an image with a polygon. |
462
|
|
|
|
463
|
|
|
Given a PIL.Image ``image`` and a numpy array ``polygon`` |
464
|
|
|
of relative coordinates into the image, fill everything |
465
|
|
|
outside the polygon hull to a color according to ``fill``: |
466
|
|
|
|
467
|
|
|
- if ``background`` (the default), |
468
|
|
|
then use the median color of the image; |
469
|
|
|
- otherwise use the given color, e.g. ``'white'`` or (255,255,255). |
470
|
|
|
|
471
|
|
|
Moreover, if ``transparency`` is true, then add an alpha channel |
472
|
|
|
from the polygon mask (i.e. everything outside the polygon will |
473
|
|
|
be transparent, for those consumers that can interpret alpha channels). |
474
|
|
|
Images which already have an alpha channel will have it shrunk |
475
|
|
|
from the polygon mask (i.e. everything outside the polygon will |
476
|
|
|
be transparent, in addition to existing transparent pixels). |
477
|
|
|
|
478
|
|
|
Return a new PIL.Image. |
479
|
|
|
""" |
480
|
|
|
mask = polygon_mask(image, polygon) |
481
|
|
|
if fill == 'background': |
482
|
|
|
background = ImageStat.Stat(image) |
483
|
|
|
if len(background.bands) > 1: |
484
|
|
|
background = tuple(background.median) |
485
|
|
|
else: |
486
|
|
|
background = background.median[0] |
487
|
|
|
else: |
488
|
|
|
background = fill |
489
|
|
|
new_image = Image.new(image.mode, image.size, background) |
490
|
|
|
new_image.paste(image, mask=mask) |
491
|
|
|
# ensure no information is lost by a adding transparency channel |
492
|
|
|
# initialized to fully transparent outside the polygon mask |
493
|
|
|
# (so consumers do not have to rely on background estimation, |
494
|
|
|
# which can fail on foreground-dominated segments, or white, |
495
|
|
|
# which can be inconsistent on unbinarized images): |
496
|
|
|
if image.mode in ['RGBA', 'LA']: |
497
|
|
|
# ensure transparency maximizes (i.e. parent mask AND mask): |
498
|
|
|
mask = ImageChops.darker(mask, image.getchannel('A')) # min opaque |
499
|
|
|
new_image.putalpha(mask) |
500
|
|
|
elif transparency and image.mode in ['RGB', 'L']: |
501
|
|
|
# introduce transparency: |
502
|
|
|
new_image.putalpha(mask) |
503
|
|
|
return new_image |
504
|
|
|
|
505
|
|
|
def points_from_bbox(minx, miny, maxx, maxy): |
506
|
|
|
"""Construct polygon coordinates in page representation from a numeric list representing a bounding box.""" |
507
|
|
|
return "%i,%i %i,%i %i,%i %i,%i" % ( |
508
|
|
|
minx, miny, maxx, miny, maxx, maxy, minx, maxy) |
509
|
|
|
|
510
|
|
|
def points_from_polygon(polygon): |
511
|
|
|
"""Convert polygon coordinates from a numeric list representation to a page representation.""" |
512
|
|
|
return " ".join("%i,%i" % (x, y) for x, y in polygon) |
513
|
|
|
|
514
|
|
|
def points_from_xywh(box): |
515
|
|
|
""" |
516
|
|
|
Construct polygon coordinates in page representation from numeric dict representing a bounding box. |
517
|
|
|
""" |
518
|
|
|
x, y, w, h = box['x'], box['y'], box['w'], box['h'] |
519
|
|
|
# tesseract uses a different region representation format |
520
|
|
|
return "%i,%i %i,%i %i,%i %i,%i" % ( |
521
|
|
|
x, y, |
522
|
|
|
x + w, y, |
523
|
|
|
x + w, y + h, |
524
|
|
|
x, y + h |
525
|
|
|
) |
526
|
|
|
def points_from_y0x0y1x1(yxyx): |
527
|
|
|
""" |
528
|
|
|
Construct a polygon representation from a rectangle described as a list [y0, x0, y1, x1] |
529
|
|
|
""" |
530
|
|
|
y0 = yxyx[0] |
531
|
|
|
x0 = yxyx[1] |
532
|
|
|
y1 = yxyx[2] |
533
|
|
|
x1 = yxyx[3] |
534
|
|
|
return "%s,%s %s,%s %s,%s %s,%s" % ( |
535
|
|
|
x0, y0, |
536
|
|
|
x1, y0, |
537
|
|
|
x1, y1, |
538
|
|
|
x0, y1 |
539
|
|
|
) |
540
|
|
|
|
541
|
|
|
def points_from_x0y0x1y1(xyxy): |
542
|
|
|
""" |
543
|
|
|
Construct a polygon representation from a rectangle described as a list [x0, y0, x1, y1] |
544
|
|
|
""" |
545
|
|
|
x0 = xyxy[0] |
546
|
|
|
y0 = xyxy[1] |
547
|
|
|
x1 = xyxy[2] |
548
|
|
|
y1 = xyxy[3] |
549
|
|
|
return "%s,%s %s,%s %s,%s %s,%s" % ( |
550
|
|
|
x0, y0, |
551
|
|
|
x1, y0, |
552
|
|
|
x1, y1, |
553
|
|
|
x0, y1 |
554
|
|
|
) |
555
|
|
|
|
556
|
|
|
def polygon_from_bbox(minx, miny, maxx, maxy): |
557
|
|
|
"""Construct polygon coordinates in numeric list representation from a numeric list representing a bounding box.""" |
558
|
|
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] |
559
|
|
|
|
560
|
|
|
def polygon_from_x0y0x1y1(x0y0x1y1): |
561
|
|
|
"""Construct polygon coordinates in numeric list representation from a string list representing a bounding box.""" |
562
|
|
|
minx = int(x0y0x1y1[0]) |
563
|
|
|
miny = int(x0y0x1y1[1]) |
564
|
|
|
maxx = int(x0y0x1y1[2]) |
565
|
|
|
maxy = int(x0y0x1y1[3]) |
566
|
|
|
return [[minx, miny], [maxx, miny], [maxx, maxy], [minx, maxy]] |
567
|
|
|
|
568
|
|
|
def polygon_from_xywh(xywh): |
569
|
|
|
"""Construct polygon coordinates in numeric list representation from numeric dict representing a bounding box.""" |
570
|
|
|
return polygon_from_bbox(*bbox_from_xywh(xywh)) |
571
|
|
|
|
572
|
|
|
def xywh_from_bbox(minx, miny, maxx, maxy): |
573
|
|
|
"""Convert a bounding box from a numeric list to a numeric dict representation.""" |
574
|
|
|
return { |
575
|
|
|
'x': minx, |
576
|
|
|
'y': miny, |
577
|
|
|
'w': maxx - minx, |
578
|
|
|
'h': maxy - miny, |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
def xywh_from_points(points): |
582
|
|
|
""" |
583
|
|
|
Construct a numeric dict representing a bounding box from polygon coordinates in page representation. |
584
|
|
|
""" |
585
|
|
|
return xywh_from_bbox(*bbox_from_points(points)) |
586
|
|
|
|
587
|
|
|
|
588
|
|
|
def xywh_from_polygon(polygon): |
589
|
|
|
"""Construct a numeric dict representing a bounding box from polygon coordinates in numeric list representation.""" |
590
|
|
|
return xywh_from_bbox(*bbox_from_polygon(polygon)) |
591
|
|
|
|