|
1
|
|
|
import io |
|
2
|
|
|
from os import makedirs, unlink, listdir, path |
|
3
|
|
|
from pathlib import Path |
|
4
|
|
|
|
|
5
|
|
|
import cv2 |
|
6
|
|
|
from PIL import Image |
|
7
|
|
|
import numpy as np |
|
8
|
|
|
from deprecated.sphinx import deprecated |
|
9
|
|
|
|
|
10
|
|
|
from ocrd_models import OcrdMets, OcrdFile |
|
11
|
|
|
from ocrd_models.ocrd_page import parse, BorderType |
|
12
|
|
|
from ocrd_modelfactory import exif_from_filename |
|
13
|
|
|
from ocrd_utils import ( |
|
14
|
|
|
atomic_write, |
|
15
|
|
|
getLogger, |
|
16
|
|
|
image_from_polygon, |
|
17
|
|
|
coordinates_of_segment, |
|
18
|
|
|
adjust_canvas_to_rotation, |
|
19
|
|
|
adjust_canvas_to_transposition, |
|
20
|
|
|
shift_coordinates, |
|
21
|
|
|
rotate_coordinates, |
|
22
|
|
|
transform_coordinates, |
|
23
|
|
|
transpose_coordinates, |
|
24
|
|
|
crop_image, |
|
25
|
|
|
rotate_image, |
|
26
|
|
|
transpose_image, |
|
27
|
|
|
bbox_from_polygon, |
|
28
|
|
|
polygon_from_points, |
|
29
|
|
|
xywh_from_bbox, |
|
30
|
|
|
pushd_popd, |
|
31
|
|
|
MIME_TO_EXT, |
|
32
|
|
|
MIME_TO_PIL, |
|
33
|
|
|
MIMETYPE_PAGE, |
|
34
|
|
|
REGEX_PREFIX |
|
35
|
|
|
) |
|
36
|
|
|
|
|
37
|
|
|
from .workspace_backup import WorkspaceBackupManager |
|
38
|
|
|
|
|
39
|
|
|
class Workspace(): |
|
40
|
|
|
""" |
|
41
|
|
|
A workspace is a temporary directory set up for a processor. It's the |
|
42
|
|
|
interface to the METS/PAGE XML and delegates download and upload to the |
|
43
|
|
|
Resolver. |
|
44
|
|
|
|
|
45
|
|
|
Args: |
|
46
|
|
|
|
|
47
|
|
|
directory (string) : Folder to work in |
|
48
|
|
|
mets (:class:`OcrdMets`) : OcrdMets representing this workspace. Loaded from 'mets.xml' if ``None``. |
|
49
|
|
|
mets_basename (string) : Basename of the METS XML file. Default: Last URL segment of the mets_url. |
|
50
|
|
|
overwrite_mode (boolean) : Whether to force add operations on this workspace globally |
|
51
|
|
|
baseurl (string) : Base URL to prefix to relative URL. |
|
52
|
|
|
""" |
|
53
|
|
|
|
|
54
|
|
|
def __init__(self, resolver, directory, mets=None, mets_basename='mets.xml', automatic_backup=False, baseurl=None): |
|
55
|
|
|
self.resolver = resolver |
|
56
|
|
|
self.directory = directory |
|
57
|
|
|
self.mets_target = str(Path(directory, mets_basename)) |
|
58
|
|
|
self.overwrite_mode = False |
|
59
|
|
|
if mets is None: |
|
60
|
|
|
mets = OcrdMets(filename=self.mets_target) |
|
61
|
|
|
self.mets = mets |
|
62
|
|
|
self.automatic_backup = automatic_backup |
|
63
|
|
|
self.baseurl = baseurl |
|
64
|
|
|
# print(mets.to_xml(xmllint=True).decode('utf-8')) |
|
65
|
|
|
|
|
66
|
|
|
def __str__(self): |
|
67
|
|
|
return 'Workspace[directory=%s, baseurl=%s, file_groups=%s, files=%s]' % ( |
|
68
|
|
|
self.directory, |
|
69
|
|
|
self.baseurl, |
|
70
|
|
|
self.mets.file_groups, |
|
71
|
|
|
[str(f) for f in self.mets.find_all_files()], |
|
72
|
|
|
) |
|
73
|
|
|
|
|
74
|
|
|
def reload_mets(self): |
|
75
|
|
|
""" |
|
76
|
|
|
Reload METS from disk. |
|
77
|
|
|
""" |
|
78
|
|
|
self.mets = OcrdMets(filename=self.mets_target) |
|
79
|
|
|
|
|
80
|
|
|
|
|
81
|
|
|
@deprecated(version='1.0.0', reason="Use workspace.download_file") |
|
82
|
|
|
def download_url(self, url, **kwargs): |
|
83
|
|
|
""" |
|
84
|
|
|
Download a URL to the workspace. |
|
85
|
|
|
|
|
86
|
|
|
Args: |
|
87
|
|
|
url (string): URL to download to directory |
|
88
|
|
|
**kwargs : See :py:mod:`ocrd_models.ocrd_file.OcrdFile` |
|
89
|
|
|
|
|
90
|
|
|
Returns: |
|
91
|
|
|
The local filename of the downloaded file |
|
92
|
|
|
""" |
|
93
|
|
|
f = OcrdFile(None, url=url, **kwargs) |
|
94
|
|
|
f = self.download_file(f) |
|
95
|
|
|
return f.local_filename |
|
96
|
|
|
|
|
97
|
|
|
|
|
98
|
|
|
def download_file(self, f, _recursion_count=0): |
|
99
|
|
|
""" |
|
100
|
|
|
Download a :py:mod:`ocrd.model.ocrd_file.OcrdFile` to the workspace. |
|
101
|
|
|
""" |
|
102
|
|
|
log = getLogger('ocrd.workspace.download_file') |
|
103
|
|
|
log.debug('download_file %s [_recursion_count=%s]' % (f, _recursion_count)) |
|
104
|
|
|
with pushd_popd(self.directory): |
|
105
|
|
|
try: |
|
106
|
|
|
# If the f.url is already a file path, and is within self.directory, do nothing |
|
107
|
|
|
url_path = Path(f.url).resolve() |
|
108
|
|
|
if not (url_path.exists() and url_path.relative_to(str(Path(self.directory).resolve()))): |
|
109
|
|
|
raise Exception("Not already downloaded, moving on") |
|
110
|
|
|
except Exception as e: |
|
111
|
|
|
basename = '%s%s' % (f.ID, MIME_TO_EXT.get(f.mimetype, '')) if f.ID else f.basename |
|
112
|
|
|
try: |
|
113
|
|
|
f.url = self.resolver.download_to_directory(self.directory, f.url, subdir=f.fileGrp, basename=basename) |
|
114
|
|
|
except FileNotFoundError as e: |
|
115
|
|
|
if not self.baseurl: |
|
116
|
|
|
raise Exception("No baseurl defined by workspace. Cannot retrieve '%s'" % f.url) |
|
117
|
|
|
if _recursion_count >= 1: |
|
118
|
|
|
raise Exception("Already tried prepending baseurl '%s'. Cannot retrieve '%s'" % (self.baseurl, f.url)) |
|
119
|
|
|
log.debug("First run of resolver.download_to_directory(%s) failed, try prepending baseurl '%s': %s", f.url, self.baseurl, e) |
|
120
|
|
|
f.url = '%s/%s' % (self.baseurl, f.url) |
|
121
|
|
|
f.url = self.download_file(f, _recursion_count + 1).local_filename |
|
122
|
|
|
f.local_filename = f.url |
|
123
|
|
|
return f |
|
124
|
|
|
|
|
125
|
|
|
def remove_file(self, ID, force=False, keep_file=False, page_recursive=False, page_same_group=False): |
|
126
|
|
|
""" |
|
127
|
|
|
Remove a file from the workspace. |
|
128
|
|
|
|
|
129
|
|
|
Arguments: |
|
130
|
|
|
ID (string|OcrdFile): ID of the file to delete or the file itself |
|
131
|
|
|
force (boolean): Continue removing even if file not found in METS |
|
132
|
|
|
keep_file (boolean): Whether to keep files on disk |
|
133
|
|
|
page_recursive (boolean): Whether to remove all images referenced in the file if the file is a PAGE-XML document. |
|
134
|
|
|
page_same_group (boolean): Remove only images in the same file group as the PAGE-XML. Has no effect unless ``page_recursive`` is ``True``. |
|
135
|
|
|
""" |
|
136
|
|
|
log = getLogger('ocrd.workspace.remove_file') |
|
137
|
|
|
log.debug('Deleting mets:file %s', ID) |
|
138
|
|
|
if not force and self.overwrite_mode: |
|
139
|
|
|
force = True |
|
140
|
|
|
if isinstance(ID, OcrdFile): |
|
141
|
|
|
ID = ID.ID |
|
142
|
|
|
try: |
|
143
|
|
|
ocrd_file_ = self.mets.remove_file(ID) |
|
144
|
|
|
ocrd_files = [ocrd_file_] if isinstance(ocrd_file_, OcrdFile) else ocrd_file_ |
|
145
|
|
|
if page_recursive: |
|
146
|
|
|
with pushd_popd(self.directory): |
|
147
|
|
|
for ocrd_file in ocrd_files: |
|
148
|
|
|
if ocrd_file.mimetype != MIMETYPE_PAGE: |
|
149
|
|
|
continue |
|
150
|
|
|
ocrd_page = parse(self.download_file(ocrd_file).local_filename, silence=True) |
|
151
|
|
|
for img_url in ocrd_page.get_AllAlternativeImagePaths(): |
|
152
|
|
|
img_kwargs = {'url': img_url} |
|
153
|
|
|
if page_same_group: |
|
154
|
|
|
img_kwargs['fileGrp'] = ocrd_file.fileGrp |
|
155
|
|
|
for img_file in self.mets.find_files(**img_kwargs): |
|
156
|
|
|
self.remove_file(img_file, keep_file=keep_file, force=force) |
|
157
|
|
|
if not keep_file: |
|
158
|
|
|
with pushd_popd(self.directory): |
|
159
|
|
|
for ocrd_file in ocrd_files: |
|
160
|
|
|
if not ocrd_file.local_filename: |
|
161
|
|
|
log.warning("File not locally available %s", ocrd_file) |
|
162
|
|
|
if not force: |
|
163
|
|
|
raise Exception("File not locally available %s" % ocrd_file) |
|
164
|
|
|
else: |
|
165
|
|
|
log.info("rm %s [cwd=%s]", ocrd_file.local_filename, self.directory) |
|
166
|
|
|
unlink(ocrd_file.local_filename) |
|
167
|
|
|
return ocrd_file_ |
|
168
|
|
|
except FileNotFoundError as e: |
|
169
|
|
|
if not force: |
|
170
|
|
|
raise e |
|
171
|
|
|
|
|
172
|
|
|
def remove_file_group(self, USE, recursive=False, force=False, keep_files=False, page_recursive=False, page_same_group=False): |
|
173
|
|
|
""" |
|
174
|
|
|
Remove a fileGrp. |
|
175
|
|
|
|
|
176
|
|
|
Arguments: |
|
177
|
|
|
USE (string): USE attribute of the fileGrp to delete |
|
178
|
|
|
recursive (boolean): Whether to recursively delete all files in the group |
|
179
|
|
|
force (boolean): Continue removing even if group or containing files not found in METS |
|
180
|
|
|
keep_files (boolean): When deleting recursively whether to keep files on disk |
|
181
|
|
|
page_recursive (boolean): Whether to remove all images referenced in the file if the file is a PAGE-XML document. |
|
182
|
|
|
page_same_group (boolean): Remove only images in the same file group as the PAGE-XML. Has no effect unless ``page_recursive`` is ``True``. |
|
183
|
|
|
""" |
|
184
|
|
|
if not force and self.overwrite_mode: |
|
185
|
|
|
force = True |
|
186
|
|
|
|
|
187
|
|
|
if (not USE.startswith(REGEX_PREFIX)) and (USE not in self.mets.file_groups) and (not force): |
|
188
|
|
|
raise Exception("No such fileGrp: %s" % USE) |
|
189
|
|
|
|
|
190
|
|
|
file_dirs = [] |
|
191
|
|
|
if recursive: |
|
192
|
|
|
for f in self.mets.find_files(fileGrp=USE): |
|
193
|
|
|
self.remove_file(f, force=force, keep_file=keep_files, page_recursive=page_recursive, page_same_group=page_same_group) |
|
194
|
|
|
if f.local_filename: |
|
195
|
|
|
file_dirs.append(path.dirname(f.local_filename)) |
|
196
|
|
|
|
|
197
|
|
|
self.mets.remove_file_group(USE, force=force) |
|
198
|
|
|
|
|
199
|
|
|
# PLEASE NOTE: this only removes directories in the workspace if they are empty |
|
200
|
|
|
# and named after the fileGrp which is a convention in OCR-D. |
|
201
|
|
|
with pushd_popd(self.directory): |
|
202
|
|
|
if Path(USE).is_dir() and not listdir(USE): |
|
203
|
|
|
Path(USE).rmdir() |
|
204
|
|
|
if file_dirs: |
|
205
|
|
|
for file_dir in set(file_dirs): |
|
206
|
|
|
if Path(file_dir).is_dir() and not listdir(file_dir): |
|
207
|
|
|
Path(file_dir).rmdir() |
|
208
|
|
|
|
|
209
|
|
|
|
|
210
|
|
|
def add_file(self, file_grp, content=None, **kwargs): |
|
211
|
|
|
""" |
|
212
|
|
|
Add an output file. Creates an :class:`OcrdFile` to pass around and adds that to the |
|
213
|
|
|
OcrdMets OUTPUT section. |
|
214
|
|
|
""" |
|
215
|
|
|
log = getLogger('ocrd.workspace.add_file') |
|
216
|
|
|
log.debug( |
|
217
|
|
|
'outputfile file_grp=%s local_filename=%s content=%s', |
|
218
|
|
|
file_grp, |
|
219
|
|
|
kwargs.get('local_filename'), |
|
220
|
|
|
content is not None) |
|
221
|
|
|
if 'pageId' not in kwargs: |
|
222
|
|
|
raise ValueError("workspace.add_file must be passed a 'pageId' kwarg, even if it is None.") |
|
223
|
|
|
if content is not None and 'local_filename' not in kwargs: |
|
224
|
|
|
raise Exception("'content' was set but no 'local_filename'") |
|
225
|
|
|
if self.overwrite_mode: |
|
226
|
|
|
kwargs['force'] = True |
|
227
|
|
|
|
|
228
|
|
|
with pushd_popd(self.directory): |
|
229
|
|
|
if 'local_filename' in kwargs: |
|
230
|
|
|
# If the local filename has folder components, create those folders |
|
231
|
|
|
local_filename_dir = kwargs['local_filename'].rsplit('/', 1)[0] |
|
232
|
|
|
if local_filename_dir != kwargs['local_filename'] and not Path(local_filename_dir).is_dir(): |
|
233
|
|
|
makedirs(local_filename_dir) |
|
234
|
|
|
if 'url' not in kwargs: |
|
235
|
|
|
kwargs['url'] = kwargs['local_filename'] |
|
236
|
|
|
|
|
237
|
|
|
# print(kwargs) |
|
238
|
|
|
ret = self.mets.add_file(file_grp, **kwargs) |
|
239
|
|
|
|
|
240
|
|
|
if content is not None: |
|
241
|
|
|
with open(kwargs['local_filename'], 'wb') as f: |
|
242
|
|
|
if isinstance(content, str): |
|
243
|
|
|
content = bytes(content, 'utf-8') |
|
244
|
|
|
f.write(content) |
|
245
|
|
|
|
|
246
|
|
|
return ret |
|
247
|
|
|
|
|
248
|
|
|
def save_mets(self): |
|
249
|
|
|
""" |
|
250
|
|
|
Write out the current state of the METS file. |
|
251
|
|
|
""" |
|
252
|
|
|
log = getLogger('ocrd.workspace.save_mets') |
|
253
|
|
|
log.info("Saving mets '%s'", self.mets_target) |
|
254
|
|
|
if self.automatic_backup: |
|
255
|
|
|
WorkspaceBackupManager(self).add() |
|
256
|
|
|
with atomic_write(self.mets_target) as f: |
|
257
|
|
|
f.write(self.mets.to_xml(xmllint=True).decode('utf-8')) |
|
258
|
|
|
|
|
259
|
|
|
def resolve_image_exif(self, image_url): |
|
260
|
|
|
""" |
|
261
|
|
|
Get the EXIF metadata about an image URL as :class:`OcrdExif` |
|
262
|
|
|
|
|
263
|
|
|
Args: |
|
264
|
|
|
image_url (string) : URL of image |
|
265
|
|
|
|
|
266
|
|
|
Return |
|
267
|
|
|
:class:`OcrdExif` |
|
268
|
|
|
""" |
|
269
|
|
|
if not image_url: |
|
270
|
|
|
# avoid "finding" just any file |
|
271
|
|
|
raise Exception("Cannot resolve empty image path") |
|
272
|
|
|
f = next(self.mets.find_files(url=image_url), OcrdFile(None, url=image_url)) |
|
273
|
|
|
image_filename = self.download_file(f).local_filename |
|
274
|
|
|
ocrd_exif = exif_from_filename(image_filename) |
|
275
|
|
|
return ocrd_exif |
|
276
|
|
|
|
|
277
|
|
|
@deprecated(version='1.0.0', reason="Use workspace.image_from_page and workspace.image_from_segment") |
|
278
|
|
|
def resolve_image_as_pil(self, image_url, coords=None): |
|
279
|
|
|
return self._resolve_image_as_pil(image_url, coords) |
|
280
|
|
|
|
|
281
|
|
|
def _resolve_image_as_pil(self, image_url, coords=None): |
|
282
|
|
|
""" |
|
283
|
|
|
Resolve an image URL to a PIL image. |
|
284
|
|
|
|
|
285
|
|
|
Args: |
|
286
|
|
|
- coords (list) : Coordinates of the bounding box to cut from the image |
|
287
|
|
|
|
|
288
|
|
|
Returns: |
|
289
|
|
|
Image or region in image as PIL.Image |
|
290
|
|
|
|
|
291
|
|
|
""" |
|
292
|
|
|
if not image_url: |
|
293
|
|
|
# avoid "finding" just any file |
|
294
|
|
|
raise Exception("Cannot resolve empty image path") |
|
295
|
|
|
log = getLogger('ocrd.workspace._resolve_image_as_pil') |
|
296
|
|
|
f = next(self.mets.find_files(url=image_url), OcrdFile(None, url=image_url)) |
|
297
|
|
|
image_filename = self.download_file(f).local_filename |
|
298
|
|
|
|
|
299
|
|
|
with pushd_popd(self.directory): |
|
300
|
|
|
pil_image = Image.open(image_filename) |
|
301
|
|
|
pil_image.load() # alloc and give up the FD |
|
302
|
|
|
|
|
303
|
|
|
# Pillow does not properly support higher color depths |
|
304
|
|
|
# (e.g. 16-bit or 32-bit or floating point grayscale), |
|
305
|
|
|
# clipping its dynamic range to the lower 8-bit in |
|
306
|
|
|
# many operations (including paste, putalpha, ImageStat...), |
|
307
|
|
|
# even including conversion. |
|
308
|
|
|
# Cf. Pillow#3011 Pillow#3159 Pillow#3838 (still open in 8.0) |
|
309
|
|
|
# So to be on the safe side, we must re-quantize these |
|
310
|
|
|
# to 8-bit via numpy (conversion to/from which fortunately |
|
311
|
|
|
# seems to work reliably): |
|
312
|
|
|
if (pil_image.mode.startswith('I') or |
|
313
|
|
|
pil_image.mode.startswith('F')): |
|
314
|
|
|
arr_image = np.array(pil_image) |
|
315
|
|
|
if arr_image.dtype.kind == 'i': |
|
316
|
|
|
# signed integer is *not* trustworthy in this context |
|
317
|
|
|
# (usually a mistake in the array interface) |
|
318
|
|
|
log.debug('Casting image "%s" from signed to unsigned', image_url) |
|
319
|
|
|
arr_image.dtype = np.dtype('u' + arr_image.dtype.name) |
|
320
|
|
|
if arr_image.dtype.kind == 'u': |
|
321
|
|
|
# integer needs to be scaled linearly to 8 bit |
|
322
|
|
|
# of course, an image might actually have some lower range |
|
323
|
|
|
# (e.g. 10-bit in I;16 or 20-bit in I or 4-bit in L), |
|
324
|
|
|
# but that would be guessing anyway, so here don't |
|
325
|
|
|
# make assumptions on _scale_, just reduce _precision_ |
|
326
|
|
|
log.debug('Reducing image "%s" from depth %d bit to 8 bit', |
|
327
|
|
|
image_url, arr_image.dtype.itemsize * 8) |
|
328
|
|
|
arr_image = arr_image >> 8 * (arr_image.dtype.itemsize-1) |
|
329
|
|
|
arr_image = arr_image.astype(np.uint8) |
|
330
|
|
|
elif arr_image.dtype.kind == 'f': |
|
331
|
|
|
# float needs to be scaled from [0,1.0] to [0,255] |
|
332
|
|
|
log.debug('Reducing image "%s" from floating point to 8 bit', |
|
333
|
|
|
image_url) |
|
334
|
|
|
arr_image *= 255 |
|
335
|
|
|
arr_image = arr_image.astype(np.uint8) |
|
336
|
|
|
pil_image = Image.fromarray(arr_image) |
|
337
|
|
|
|
|
338
|
|
|
if coords is None: |
|
339
|
|
|
return pil_image |
|
340
|
|
|
|
|
341
|
|
|
# FIXME: remove or replace this by (image_from_polygon+) crop_image ... |
|
342
|
|
|
log.debug("Converting PIL to OpenCV: %s", image_url) |
|
343
|
|
|
color_conversion = cv2.COLOR_GRAY2BGR if pil_image.mode in ('1', 'L') else cv2.COLOR_RGB2BGR |
|
344
|
|
|
pil_as_np_array = np.array(pil_image).astype('uint8') if pil_image.mode == '1' else np.array(pil_image) |
|
345
|
|
|
cv2_image = cv2.cvtColor(pil_as_np_array, color_conversion) |
|
346
|
|
|
|
|
347
|
|
|
poly = np.array(coords, np.int32) |
|
348
|
|
|
log.debug("Cutting region %s from %s", coords, image_url) |
|
349
|
|
|
region_cut = cv2_image[ |
|
350
|
|
|
np.min(poly[:, 1]):np.max(poly[:, 1]), |
|
351
|
|
|
np.min(poly[:, 0]):np.max(poly[:, 0]) |
|
352
|
|
|
] |
|
353
|
|
|
return Image.fromarray(region_cut) |
|
354
|
|
|
|
|
355
|
|
|
def image_from_page(self, page, page_id, |
|
356
|
|
|
fill='background', transparency=False, |
|
357
|
|
|
feature_selector='', feature_filter=''): |
|
358
|
|
|
"""Extract an image for a PAGE-XML page from the workspace. |
|
359
|
|
|
|
|
360
|
|
|
Given ``page``, a PAGE PageType object, extract its PIL.Image, |
|
361
|
|
|
either from its AlternativeImage (if it exists), or from its |
|
362
|
|
|
@imageFilename (otherwise). Also crop it, if a Border exists, |
|
363
|
|
|
and rotate it, if any @orientation angle is annotated. |
|
364
|
|
|
|
|
365
|
|
|
If ``feature_selector`` and/or ``feature_filter`` is given, then |
|
366
|
|
|
select/filter among the @imageFilename image and the available |
|
367
|
|
|
AlternativeImages the last one which contains all of the selected, |
|
368
|
|
|
but none of the filtered features (i.e. @comments classes), or |
|
369
|
|
|
raise an error. |
|
370
|
|
|
|
|
371
|
|
|
(Required and produced features need not be in the same order, so |
|
372
|
|
|
``feature_selector`` is merely a mask specifying Boolean AND, and |
|
373
|
|
|
``feature_filter`` is merely a mask specifying Boolean OR.) |
|
374
|
|
|
|
|
375
|
|
|
If the chosen image does not have the feature "cropped" yet, but |
|
376
|
|
|
a Border exists, and unless "cropped" is being filtered, then crop it. |
|
377
|
|
|
Likewise, if the chosen image does not have the feature "deskewed" yet, |
|
378
|
|
|
but an @orientation angle is annotated, and unless "deskewed" is being |
|
379
|
|
|
filtered, then rotate it. (However, if @orientation is above the |
|
380
|
|
|
[-45°,45°] interval, then apply as much transposition as possible first, |
|
381
|
|
|
unless "rotated-90" / "rotated-180" / "rotated-270" is being filtered.) |
|
382
|
|
|
|
|
383
|
|
|
Cropping uses a polygon mask (not just the bounding box rectangle). |
|
384
|
|
|
Areas outside the polygon will be filled according to ``fill``: |
|
385
|
|
|
|
|
386
|
|
|
- if ``background`` (the default), |
|
387
|
|
|
then fill with the median color of the image; |
|
388
|
|
|
- otherwise, use the given color, e.g. ``white`` or (255,255,255). |
|
389
|
|
|
|
|
390
|
|
|
Moreover, if ``transparency`` is true, and unless the image already |
|
391
|
|
|
has an alpha channel, then add an alpha channel which is fully opaque |
|
392
|
|
|
before cropping and rotating. (Thus, only the exposed areas will be |
|
393
|
|
|
transparent afterwards, for those that can interpret alpha channels). |
|
394
|
|
|
|
|
395
|
|
|
Return a tuple: |
|
396
|
|
|
|
|
397
|
|
|
* the extracted image, |
|
398
|
|
|
* a dictionary with information about the extracted image: |
|
399
|
|
|
|
|
400
|
|
|
- ``transform``: a Numpy array with an affine transform which |
|
401
|
|
|
converts from absolute coordinates to those relative to the image, |
|
402
|
|
|
i.e. after cropping to the page's border / bounding box (if any) |
|
403
|
|
|
and deskewing with the page's orientation angle (if any) |
|
404
|
|
|
- ``angle``: the rotation/reflection angle applied to the image so far, |
|
405
|
|
|
- ``features``: the AlternativeImage @comments for the image, i.e. |
|
406
|
|
|
names of all operations that lead up to this result, |
|
407
|
|
|
|
|
408
|
|
|
* an OcrdExif instance associated with the original image. |
|
409
|
|
|
|
|
410
|
|
|
(The first two can be used to annotate a new AlternativeImage, |
|
411
|
|
|
or be passed down with ``image_from_segment``.) |
|
412
|
|
|
|
|
413
|
|
|
Example: |
|
414
|
|
|
|
|
415
|
|
|
* get a raw (colored) but already deskewed and cropped image: |
|
416
|
|
|
|
|
417
|
|
|
`` |
|
418
|
|
|
page_image, page_coords, page_image_info = workspace.image_from_page( |
|
419
|
|
|
page, page_id, |
|
420
|
|
|
feature_selector='deskewed,cropped', |
|
421
|
|
|
feature_filter='binarized,grayscale_normalized') |
|
422
|
|
|
`` |
|
423
|
|
|
""" |
|
424
|
|
|
log = getLogger('ocrd.workspace.image_from_page') |
|
425
|
|
|
page_image_info = self.resolve_image_exif(page.imageFilename) |
|
426
|
|
|
page_image = self._resolve_image_as_pil(page.imageFilename) |
|
427
|
|
|
page_coords = dict() |
|
428
|
|
|
# use identity as initial affine coordinate transform: |
|
429
|
|
|
page_coords['transform'] = np.eye(3) |
|
430
|
|
|
# interim bbox (updated with each change to the transform): |
|
431
|
|
|
page_bbox = [0, 0, page_image.width, page_image.height] |
|
432
|
|
|
page_xywh = {'x': 0, 'y': 0, |
|
433
|
|
|
'w': page_image.width, 'h': page_image.height} |
|
434
|
|
|
|
|
435
|
|
|
border = page.get_Border() |
|
436
|
|
|
# page angle: PAGE @orientation is defined clockwise, |
|
437
|
|
|
# whereas PIL/ndimage rotation is in mathematical direction: |
|
438
|
|
|
page_coords['angle'] = -(page.get_orientation() or 0) |
|
439
|
|
|
# map angle from (-180,180] to [0,360], and partition into multiples of 90; |
|
440
|
|
|
# but avoid unnecessary large remainders, i.e. split symmetrically: |
|
441
|
|
|
orientation = (page_coords['angle'] + 45) % 360 |
|
442
|
|
|
orientation = orientation - (orientation % 90) |
|
443
|
|
|
skew = (page_coords['angle'] % 360) - orientation |
|
444
|
|
|
skew = 180 - (180 - skew) % 360 # map to [-45,45] |
|
445
|
|
|
page_coords['angle'] = 0 # nothing applied yet (depends on filters) |
|
446
|
|
|
log.debug("page '%s' has %s orientation=%d skew=%.2f", |
|
447
|
|
|
page_id, "border," if border else "", orientation, skew) |
|
448
|
|
|
|
|
449
|
|
|
# initialize AlternativeImage@comments classes as empty: |
|
450
|
|
|
page_coords['features'] = '' |
|
451
|
|
|
alternative_image = None |
|
452
|
|
|
alternative_images = page.get_AlternativeImage() |
|
453
|
|
View Code Duplication |
if alternative_images: |
|
|
|
|
|
|
454
|
|
|
# (e.g. from page-level cropping, binarization, deskewing or despeckling) |
|
455
|
|
|
if feature_selector or feature_filter: |
|
456
|
|
|
alternative_image = None |
|
457
|
|
|
# search from the end, because by convention we always append, |
|
458
|
|
|
# and among multiple satisfactory images we want the most recent: |
|
459
|
|
|
for alternative_image in reversed(alternative_images): |
|
460
|
|
|
features = alternative_image.get_comments() |
|
461
|
|
|
if not features: |
|
462
|
|
|
log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", |
|
463
|
|
|
alternative_images.index(alternative_image) + 1, page_id) |
|
464
|
|
|
features = '' |
|
465
|
|
|
if (all(feature in features |
|
466
|
|
|
for feature in feature_selector.split(',') if feature) and |
|
467
|
|
|
not any(feature in features |
|
468
|
|
|
for feature in feature_filter.split(',') if feature)): |
|
469
|
|
|
break |
|
470
|
|
|
else: |
|
471
|
|
|
alternative_image = None |
|
472
|
|
|
else: |
|
473
|
|
|
alternative_image = alternative_images[-1] |
|
474
|
|
|
features = alternative_image.get_comments() |
|
475
|
|
|
if not features: |
|
476
|
|
|
log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", |
|
477
|
|
|
alternative_images.index(alternative_image) + 1, page_id) |
|
478
|
|
|
features = '' |
|
479
|
|
|
if alternative_image: |
|
480
|
|
|
log.debug("Using AlternativeImage %d (%s) for page '%s'", |
|
481
|
|
|
alternative_images.index(alternative_image) + 1, |
|
482
|
|
|
features, page_id) |
|
|
|
|
|
|
483
|
|
|
page_image = self._resolve_image_as_pil(alternative_image.get_filename()) |
|
484
|
|
|
page_coords['features'] = features |
|
485
|
|
|
|
|
486
|
|
|
# adjust the coord transformation to the steps applied on the image, |
|
487
|
|
|
# and apply steps on the existing image in case it is missing there, |
|
488
|
|
|
# but traverse all steps (crop/reflect/rotate) in a particular order: |
|
489
|
|
|
# - existing image features take priority (in the order annotated), |
|
490
|
|
|
# - next is cropping (if necessary but not already applied), |
|
491
|
|
|
# - next is reflection (if necessary but not already applied), |
|
492
|
|
|
# - next is rotation (if necessary but not already applied). |
|
493
|
|
|
# This helps deal with arbitrary workflows (e.g. crop then deskew, |
|
494
|
|
|
# or deskew then crop), regardless of where images are generated. |
|
495
|
|
|
alternative_image_features = page_coords['features'].split(',') |
|
496
|
|
|
for duplicate_feature in set([feature for feature in alternative_image_features |
|
497
|
|
|
# features relevant in reconstructing coordinates: |
|
498
|
|
|
if (feature in ['cropped', 'deskewed', 'rotated-90', |
|
499
|
|
|
'rotated-180', 'rotated-270'] and |
|
500
|
|
|
alternative_image_features.count(feature) > 1)]): |
|
501
|
|
|
log.error("Duplicate feature %s in AlternativeImage for page '%s'", |
|
502
|
|
|
duplicate_feature, page_id) |
|
503
|
|
|
for i, feature in enumerate(alternative_image_features + |
|
504
|
|
|
(['cropped'] |
|
505
|
|
|
if (border and |
|
506
|
|
|
not 'cropped' in alternative_image_features and |
|
507
|
|
|
not 'cropped' in feature_filter.split(',')) |
|
508
|
|
|
else []) + |
|
509
|
|
|
(['rotated-%d' % orientation] |
|
510
|
|
|
if (orientation and |
|
511
|
|
|
not 'rotated-%d' % orientation in alternative_image_features and |
|
512
|
|
|
not 'rotated-%d' % orientation in feature_filter.split(',')) |
|
513
|
|
|
else []) + |
|
514
|
|
|
(['deskewed'] |
|
515
|
|
|
if (skew and |
|
516
|
|
|
not 'deskewed' in alternative_image_features and |
|
517
|
|
|
not 'deskewed' in feature_filter.split(',')) |
|
518
|
|
|
else []) + |
|
519
|
|
|
# not a feature to be added, but merely as a fallback position |
|
520
|
|
|
# to always enter loop at i == len(alternative_image_features) |
|
521
|
|
|
['_check']): |
|
522
|
|
|
# image geometry vs feature consistency can only be checked |
|
523
|
|
|
# after all features on the existing AlternativeImage have |
|
524
|
|
|
# been adjusted for in the transform, and when there is a mismatch, |
|
525
|
|
|
# additional steps applied here would only repeat the respective |
|
526
|
|
|
# error message; so we only check once at the boundary between |
|
527
|
|
|
# existing and new features |
|
528
|
|
|
# FIXME we should check/enforce consistency when _adding_ AlternativeImage |
|
529
|
|
|
if (i == len(alternative_image_features) and |
|
530
|
|
|
not (page_xywh['w'] - 2 < page_image.width < page_xywh['w'] + 2 and |
|
531
|
|
|
page_xywh['h'] - 2 < page_image.height < page_xywh['h'] + 2)): |
|
532
|
|
|
log.error('page "%s" image (%s; %dx%d) has not been cropped properly (%dx%d)', |
|
533
|
|
|
page_id, page_coords['features'], |
|
534
|
|
|
page_image.width, page_image.height, |
|
535
|
|
|
page_xywh['w'], page_xywh['h']) |
|
536
|
|
|
name = "%s for page '%s'" % ("AlternativeImage" if alternative_image |
|
537
|
|
|
else "original image", page_id) |
|
538
|
|
|
# adjust transform to feature, and ensure feature is applied to image |
|
539
|
|
|
if feature == 'cropped': |
|
540
|
|
|
page_image, page_coords, page_xywh = _crop( |
|
541
|
|
|
log, name, border, page_image, page_coords, |
|
542
|
|
|
fill=fill, transparency=transparency) |
|
543
|
|
|
elif feature == 'rotated-%d' % orientation: |
|
544
|
|
|
page_image, page_coords, page_xywh = _reflect( |
|
545
|
|
|
log, name, orientation, page_image, page_coords, page_xywh) |
|
546
|
|
|
elif feature == 'deskewed': |
|
547
|
|
|
page_image, page_coords, page_xywh = _rotate( |
|
548
|
|
|
log, name, skew, border, page_image, page_coords, page_xywh, |
|
549
|
|
|
fill=fill, transparency=transparency) |
|
550
|
|
|
|
|
551
|
|
|
# verify constraints again: |
|
552
|
|
|
if not all(feature in page_coords['features'] |
|
553
|
|
|
for feature in feature_selector.split(',') if feature): |
|
554
|
|
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' + |
|
555
|
|
|
'selector="%s" in page "%s"' % ( |
|
556
|
|
|
feature_selector, page_id)) |
|
557
|
|
|
if any(feature in page_coords['features'] |
|
558
|
|
|
for feature in feature_filter.split(',') if feature): |
|
559
|
|
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' + |
|
560
|
|
|
'filter="%s" in page "%s"' % ( |
|
561
|
|
|
feature_filter, page_id)) |
|
562
|
|
|
page_image.format = 'PNG' # workaround for tesserocr#194 |
|
563
|
|
|
return page_image, page_coords, page_image_info |
|
564
|
|
|
|
|
565
|
|
|
def image_from_segment(self, segment, parent_image, parent_coords, |
|
566
|
|
|
fill='background', transparency=False, |
|
567
|
|
|
feature_selector='', feature_filter=''): |
|
568
|
|
|
"""Extract an image for a PAGE-XML hierarchy segment from its parent's image. |
|
569
|
|
|
|
|
570
|
|
|
Given... |
|
571
|
|
|
|
|
572
|
|
|
* ``parent_image``, a PIL.Image of the parent, with |
|
573
|
|
|
* ``parent_coords``, a dict with information about ``parent_image``: |
|
574
|
|
|
- ``transform``: a Numpy array with an affine transform which |
|
575
|
|
|
converts from absolute coordinates to those relative to the image, |
|
576
|
|
|
i.e. after applying all operations (starting with the original image) |
|
577
|
|
|
- ``angle``: the rotation/reflection angle applied to the image so far, |
|
578
|
|
|
- ``features``: the AlternativeImage @comments for the image, i.e. |
|
579
|
|
|
names of all operations that lead up to this result, and |
|
580
|
|
|
* ``segment``, a PAGE segment object logically contained in it |
|
581
|
|
|
(i.e. TextRegionType / TextLineType / WordType / GlyphType), |
|
582
|
|
|
|
|
583
|
|
|
...extract the segment's corresponding PIL.Image, either from |
|
584
|
|
|
AlternativeImage (if it exists), or producing a new image via |
|
585
|
|
|
cropping from ``parent_image`` (otherwise). |
|
586
|
|
|
|
|
587
|
|
|
If ``feature_selector`` and/or ``feature_filter`` is given, then |
|
588
|
|
|
select/filter among the cropped ``parent_image`` and the available |
|
589
|
|
|
AlternativeImages the last one which contains all of the selected, |
|
590
|
|
|
but none of the filtered features (i.e. @comments classes), or |
|
591
|
|
|
raise an error. |
|
592
|
|
|
|
|
593
|
|
|
(Required and produced features need not be in the same order, so |
|
594
|
|
|
``feature_selector`` is merely a mask specifying Boolean AND, and |
|
595
|
|
|
``feature_filter`` is merely a mask specifying Boolean OR.) |
|
596
|
|
|
|
|
597
|
|
|
Cropping uses a polygon mask (not just the bounding box rectangle). |
|
598
|
|
|
Areas outside the polygon will be filled according to ``fill``: |
|
599
|
|
|
|
|
600
|
|
|
- if ``background`` (the default), |
|
601
|
|
|
then fill with the median color of the image; |
|
602
|
|
|
- otherwise, use the given color, e.g. ``white`` or (255,255,255). |
|
603
|
|
|
|
|
604
|
|
|
Moreover, if ``transparency`` is true, and unless the image already |
|
605
|
|
|
has an alpha channel, then add an alpha channel which is fully opaque |
|
606
|
|
|
before cropping and rotating. (Thus, only the exposed areas will be |
|
607
|
|
|
transparent afterwards, for those that can interpret alpha channels). |
|
608
|
|
|
|
|
609
|
|
|
When cropping, compensate any @orientation angle annotated for the |
|
610
|
|
|
parent (from parent-level deskewing) by rotating the segment coordinates |
|
611
|
|
|
in an inverse transformation (i.e. translation to center, then passive |
|
612
|
|
|
rotation, and translation back). |
|
613
|
|
|
|
|
614
|
|
|
Regardless, if any @orientation angle is annotated for the segment |
|
615
|
|
|
(from segment-level deskewing), and the chosen image does not have |
|
616
|
|
|
the feature "deskewed" yet, and unless "deskewed" is being filtered, |
|
617
|
|
|
then rotate it - compensating for any previous ``angle``. (However, |
|
618
|
|
|
if @orientation is above the [-45°,45°] interval, then apply as much |
|
619
|
|
|
transposition as possible first, unless "rotated-90" / "rotated-180" / |
|
620
|
|
|
"rotated-270" is being filtered.) |
|
621
|
|
|
|
|
622
|
|
|
Return a tuple: |
|
623
|
|
|
|
|
624
|
|
|
* the extracted image, |
|
625
|
|
|
* a dictionary with information about the extracted image: |
|
626
|
|
|
- ``transform``: a Numpy array with an affine transform which |
|
627
|
|
|
converts from absolute coordinates to those relative to the image, |
|
628
|
|
|
i.e. after applying all parent operations, and then cropping to |
|
629
|
|
|
the segment's bounding box, and deskewing with the segment's |
|
630
|
|
|
orientation angle (if any) |
|
631
|
|
|
- ``angle``: the rotation/reflection angle applied to the image so far, |
|
632
|
|
|
- ``features``: the AlternativeImage @comments for the image, i.e. |
|
633
|
|
|
names of all operations that lead up to this result. |
|
634
|
|
|
|
|
635
|
|
|
(These can be used to create a new AlternativeImage, or passed down |
|
636
|
|
|
for calls on lower hierarchy levels.) |
|
637
|
|
|
|
|
638
|
|
|
Example: |
|
639
|
|
|
|
|
640
|
|
|
* get a raw (colored) but already deskewed and cropped image: |
|
641
|
|
|
|
|
642
|
|
|
``image, xywh = workspace.image_from_segment(region, |
|
643
|
|
|
page_image, page_xywh, |
|
644
|
|
|
feature_selector='deskewed,cropped', |
|
645
|
|
|
feature_filter='binarized,grayscale_normalized')`` |
|
646
|
|
|
""" |
|
647
|
|
|
log = getLogger('ocrd.workspace.image_from_segment') |
|
648
|
|
|
# note: We should mask overlapping neighbouring segments here, |
|
649
|
|
|
# but finding the right clipping rules can be difficult if operating |
|
650
|
|
|
# on the raw (non-binary) image data alone: for each intersection, it |
|
651
|
|
|
# must be decided which one of either segment or neighbour to assign, |
|
652
|
|
|
# e.g. an ImageRegion which properly contains our TextRegion should be |
|
653
|
|
|
# completely ignored, but an ImageRegion which is properly contained |
|
654
|
|
|
# in our TextRegion should be completely masked, while partial overlap |
|
655
|
|
|
# may be more difficult to decide. On the other hand, on the binary image, |
|
656
|
|
|
# we can use connected component analysis to mask foreground areas which |
|
657
|
|
|
# originate in the neighbouring regions. But that would introduce either |
|
658
|
|
|
# the assumption that the input has already been binarized, or a dependency |
|
659
|
|
|
# on some ad-hoc binarization method. Thus, it is preferable to use |
|
660
|
|
|
# a dedicated processor for this (which produces clipped AlternativeImage |
|
661
|
|
|
# or reduced polygon coordinates). |
|
662
|
|
|
segment_image, segment_coords, segment_xywh = _crop( |
|
663
|
|
|
log, "parent image for segment '%s'" % segment.id, |
|
664
|
|
|
segment, parent_image, parent_coords, |
|
665
|
|
|
fill=fill, transparency=transparency) |
|
666
|
|
|
|
|
667
|
|
|
# Semantics of missing @orientation at region level could be either |
|
668
|
|
|
# - inherited from page level: same as line or word level (no @orientation), |
|
669
|
|
|
# - zero (unrotate page angle): different from line or word level (because |
|
670
|
|
|
# otherwise deskewing would never have an effect on lines and words) |
|
671
|
|
|
# The PAGE specification is silent here (but does generally not concern itself |
|
672
|
|
|
# much with AlternativeImage coordinate consistency). |
|
673
|
|
|
# Since our (generateDS-backed) ocrd_page supports the zero/none distinction, |
|
674
|
|
|
# we choose the former (i.e. None is inheritance). |
|
675
|
|
|
if 'orientation' in segment.__dict__ and segment.get_orientation() is not None: |
|
676
|
|
|
# region angle: PAGE @orientation is defined clockwise, |
|
677
|
|
|
# whereas PIL/ndimage rotation is in mathematical direction: |
|
678
|
|
|
angle = -segment.get_orientation() |
|
679
|
|
|
# @orientation is always absolute; if higher levels |
|
680
|
|
|
# have already rotated, then we must compensate: |
|
681
|
|
|
angle -= parent_coords['angle'] |
|
682
|
|
|
# map angle from (-180,180] to [0,360], and partition into multiples of 90; |
|
683
|
|
|
# but avoid unnecessary large remainders, i.e. split symmetrically: |
|
684
|
|
|
orientation = (angle + 45) % 360 |
|
685
|
|
|
orientation = orientation - (orientation % 90) |
|
686
|
|
|
skew = (angle % 360) - orientation |
|
687
|
|
|
skew = 180 - (180 - skew) % 360 # map to [-45,45] |
|
688
|
|
|
log.debug("segment '%s' has orientation=%d skew=%.2f", |
|
689
|
|
|
segment.id, orientation, skew) |
|
690
|
|
|
else: |
|
691
|
|
|
orientation = 0 |
|
692
|
|
|
skew = 0 |
|
693
|
|
|
segment_coords['angle'] = parent_coords['angle'] # nothing applied yet (depends on filters) |
|
694
|
|
|
|
|
695
|
|
|
# initialize AlternativeImage@comments classes from parent, except |
|
696
|
|
|
# for those operations that can apply on multiple hierarchy levels: |
|
697
|
|
|
segment_coords['features'] = ','.join( |
|
698
|
|
|
[feature for feature in parent_coords['features'].split(',') |
|
699
|
|
|
if feature in ['binarized', 'grayscale_normalized', |
|
700
|
|
|
'despeckled', 'dewarped']]) |
|
701
|
|
|
|
|
702
|
|
|
alternative_image = None |
|
703
|
|
|
alternative_images = segment.get_AlternativeImage() |
|
704
|
|
View Code Duplication |
if alternative_images: |
|
|
|
|
|
|
705
|
|
|
# (e.g. from segment-level cropping, binarization, deskewing or despeckling) |
|
706
|
|
|
if feature_selector or feature_filter: |
|
707
|
|
|
alternative_image = None |
|
708
|
|
|
# search from the end, because by convention we always append, |
|
709
|
|
|
# and among multiple satisfactory images we want the most recent: |
|
710
|
|
|
for alternative_image in reversed(alternative_images): |
|
711
|
|
|
features = alternative_image.get_comments() |
|
712
|
|
|
if not features: |
|
713
|
|
|
log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", |
|
714
|
|
|
alternative_images.index(alternative_image) + 1, segment.id) |
|
715
|
|
|
features = '' |
|
716
|
|
|
if (all(feature in features |
|
717
|
|
|
for feature in feature_selector.split(',') if feature) and |
|
718
|
|
|
not any(feature in features |
|
719
|
|
|
for feature in feature_filter.split(',') if feature)): |
|
720
|
|
|
break |
|
721
|
|
|
else: |
|
722
|
|
|
alternative_image = None |
|
723
|
|
|
else: |
|
724
|
|
|
alternative_image = alternative_images[-1] |
|
725
|
|
|
features = alternative_image.get_comments() |
|
726
|
|
|
if not features: |
|
727
|
|
|
log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", |
|
728
|
|
|
alternative_images.index(alternative_image) + 1, segment.id) |
|
729
|
|
|
features = '' |
|
730
|
|
|
if alternative_image: |
|
731
|
|
|
log.debug("Using AlternativeImage %d (%s) for segment '%s'", |
|
732
|
|
|
alternative_images.index(alternative_image) + 1, |
|
733
|
|
|
features, segment.id) |
|
|
|
|
|
|
734
|
|
|
segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) |
|
735
|
|
|
segment_coords['features'] = features |
|
736
|
|
|
|
|
737
|
|
|
alternative_image_features = segment_coords['features'].split(',') |
|
738
|
|
|
for duplicate_feature in set([feature for feature in alternative_image_features |
|
739
|
|
|
# features relevant in reconstructing coordinates: |
|
740
|
|
|
if (feature in ['deskewed', 'rotated-90', |
|
741
|
|
|
'rotated-180', 'rotated-270'] and |
|
742
|
|
|
alternative_image_features.count(feature) > 1)]): |
|
743
|
|
|
log.error("Duplicate feature %s in AlternativeImage for segment '%s'", |
|
744
|
|
|
duplicate_feature, segment.id) |
|
745
|
|
|
for i, feature in enumerate(alternative_image_features + |
|
746
|
|
|
(['rotated-%d' % orientation] |
|
747
|
|
|
if (orientation and |
|
748
|
|
|
not 'rotated-%d' % orientation in alternative_image_features and |
|
749
|
|
|
not 'rotated-%d' % orientation in feature_filter.split(',')) |
|
750
|
|
|
else []) + |
|
751
|
|
|
(['deskewed'] |
|
752
|
|
|
if (skew and |
|
753
|
|
|
not 'deskewed' in alternative_image_features and |
|
754
|
|
|
not 'deskewed' in feature_filter.split(',')) |
|
755
|
|
|
else []) + |
|
756
|
|
|
# not a feature to be added, but merely as a fallback position |
|
757
|
|
|
# to always enter loop at i == len(alternative_image_features) |
|
758
|
|
|
['_check']): |
|
759
|
|
|
# image geometry vs feature consistency can only be checked |
|
760
|
|
|
# after all features on the existing AlternativeImage have |
|
761
|
|
|
# been adjusted for in the transform, and when there is a mismatch, |
|
762
|
|
|
# additional steps applied here would only repeat the respective |
|
763
|
|
|
# error message; so we only check once at the boundary between |
|
764
|
|
|
# existing and new features |
|
765
|
|
|
# FIXME we should enforce consistency here (i.e. split into transposition |
|
766
|
|
|
# and minimal rotation, rotation always reshapes, rescaling never happens) |
|
767
|
|
|
# FIXME: inconsistency currently unavoidable with line-level dewarping (which increases height) |
|
768
|
|
|
if (i == len(alternative_image_features) and |
|
769
|
|
|
not (segment_xywh['w'] - 2 < segment_image.width < segment_xywh['w'] + 2 and |
|
770
|
|
|
segment_xywh['h'] - 2 < segment_image.height < segment_xywh['h'] + 2)): |
|
771
|
|
|
log.error('segment "%s" image (%s; %dx%d) has not been cropped properly (%dx%d)', |
|
772
|
|
|
segment.id, segment_coords['features'], |
|
773
|
|
|
segment_image.width, segment_image.height, |
|
774
|
|
|
segment_xywh['w'], segment_xywh['h']) |
|
775
|
|
|
name = "%s for segment '%s'" % ("AlternativeImage" if alternative_image |
|
776
|
|
|
else "parent image", segment.id) |
|
777
|
|
|
# adjust transform to feature, and ensure feature is applied to image |
|
778
|
|
|
if feature == 'rotated-%d' % orientation: |
|
779
|
|
|
segment_image, segment_coords, segment_xywh = _reflect( |
|
780
|
|
|
log, name, orientation, segment_image, segment_coords, segment_xywh) |
|
781
|
|
|
elif feature == 'deskewed': |
|
782
|
|
|
segment_image, segment_coords, segment_xywh = _rotate( |
|
783
|
|
|
log, name, skew, segment, segment_image, segment_coords, segment_xywh, |
|
784
|
|
|
fill=fill, transparency=transparency) |
|
785
|
|
|
|
|
786
|
|
|
# verify constraints again: |
|
787
|
|
|
if not all(feature in segment_coords['features'] |
|
788
|
|
|
for feature in feature_selector.split(',') if feature): |
|
789
|
|
|
raise Exception('Found no AlternativeImage that satisfies all requirements' + |
|
790
|
|
|
'selector="%s" in segment "%s"' % ( |
|
791
|
|
|
feature_selector, segment.id)) |
|
792
|
|
|
if any(feature in segment_coords['features'] |
|
793
|
|
|
for feature in feature_filter.split(',') if feature): |
|
794
|
|
|
raise Exception('Found no AlternativeImage that satisfies all requirements ' + |
|
795
|
|
|
'filter="%s" in segment "%s"' % ( |
|
796
|
|
|
feature_filter, segment.id)) |
|
797
|
|
|
segment_image.format = 'PNG' # workaround for tesserocr#194 |
|
798
|
|
|
return segment_image, segment_coords |
|
799
|
|
|
|
|
800
|
|
|
# pylint: disable=redefined-builtin |
|
801
|
|
|
def save_image_file(self, image, |
|
802
|
|
|
file_id, |
|
803
|
|
|
file_grp, |
|
804
|
|
|
page_id=None, |
|
805
|
|
|
mimetype='image/png', |
|
806
|
|
|
force=False): |
|
807
|
|
|
"""Store and reference an image as file into the workspace. |
|
808
|
|
|
|
|
809
|
|
|
Given a PIL.Image `image`, and an ID `file_id` to use in METS, |
|
810
|
|
|
store the image under the fileGrp `file_grp` and physical page |
|
811
|
|
|
`page_id` into the workspace (in a file name based on |
|
812
|
|
|
the `file_grp`, `file_id` and `format` extension). |
|
813
|
|
|
|
|
814
|
|
|
Return the (absolute) path of the created file. |
|
815
|
|
|
""" |
|
816
|
|
|
log = getLogger('ocrd.workspace.save_image_file') |
|
817
|
|
|
if not force and self.overwrite_mode: |
|
818
|
|
|
force = True |
|
819
|
|
|
image_bytes = io.BytesIO() |
|
820
|
|
|
image.save(image_bytes, format=MIME_TO_PIL[mimetype]) |
|
821
|
|
|
file_path = str(Path(file_grp, '%s%s' % (file_id, MIME_TO_EXT[mimetype]))) |
|
822
|
|
|
out = self.add_file( |
|
823
|
|
|
ID=file_id, |
|
824
|
|
|
file_grp=file_grp, |
|
825
|
|
|
pageId=page_id, |
|
826
|
|
|
local_filename=file_path, |
|
827
|
|
|
mimetype=mimetype, |
|
828
|
|
|
content=image_bytes.getvalue(), |
|
829
|
|
|
force=force) |
|
830
|
|
|
log.info('created file ID: %s, file_grp: %s, path: %s', |
|
831
|
|
|
file_id, file_grp, out.local_filename) |
|
832
|
|
|
return file_path |
|
833
|
|
|
|
|
834
|
|
|
def _crop(log, name, segment, parent_image, parent_coords, **kwargs): |
|
835
|
|
|
segment_coords = parent_coords.copy() |
|
836
|
|
|
# get polygon outline of segment relative to parent image: |
|
837
|
|
|
segment_polygon = coordinates_of_segment(segment, parent_image, parent_coords) |
|
838
|
|
|
# get relative bounding box: |
|
839
|
|
|
segment_bbox = bbox_from_polygon(segment_polygon) |
|
840
|
|
|
# get size of the segment in the parent image after cropping |
|
841
|
|
|
# (i.e. possibly different from size before rotation at the parent, but |
|
842
|
|
|
# also possibly different from size after rotation below/AlternativeImage): |
|
843
|
|
|
segment_xywh = xywh_from_bbox(*segment_bbox) |
|
844
|
|
|
# crop, if (still) necessary: |
|
845
|
|
|
if (not isinstance(segment, BorderType) or # always crop below page level |
|
846
|
|
|
not 'cropped' in parent_coords['features']): |
|
847
|
|
|
if isinstance(segment, BorderType): |
|
848
|
|
|
log.info("Cropping %s", name) |
|
849
|
|
|
segment_coords['features'] += ',cropped' |
|
850
|
|
|
# create a mask from the segment polygon: |
|
851
|
|
|
segment_image = image_from_polygon(parent_image, segment_polygon, **kwargs) |
|
852
|
|
|
# crop to bbox: |
|
853
|
|
|
segment_image = crop_image(segment_image, box=segment_bbox) |
|
854
|
|
|
else: |
|
855
|
|
|
segment_image = parent_image |
|
856
|
|
|
# subtract offset from parent in affine coordinate transform: |
|
857
|
|
|
# (consistent with image cropping) |
|
858
|
|
|
segment_coords['transform'] = shift_coordinates( |
|
859
|
|
|
parent_coords['transform'], |
|
860
|
|
|
np.array([-segment_bbox[0], |
|
861
|
|
|
-segment_bbox[1]])) |
|
862
|
|
|
return segment_image, segment_coords, segment_xywh |
|
863
|
|
|
|
|
864
|
|
|
def _reflect(log, name, orientation, segment_image, segment_coords, segment_xywh): |
|
865
|
|
|
# Transpose in affine coordinate transform: |
|
866
|
|
|
# (consistent with image transposition or AlternativeImage below) |
|
867
|
|
|
transposition = { |
|
868
|
|
|
90: Image.ROTATE_90, |
|
869
|
|
|
180: Image.ROTATE_180, |
|
870
|
|
|
270: Image.ROTATE_270 |
|
871
|
|
|
}.get(orientation) # no default |
|
872
|
|
|
segment_coords['transform'] = transpose_coordinates( |
|
873
|
|
|
segment_coords['transform'], transposition, |
|
874
|
|
|
np.array([0.5 * segment_xywh['w'], |
|
875
|
|
|
0.5 * segment_xywh['h']])) |
|
876
|
|
|
segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_transposition( |
|
877
|
|
|
[segment_xywh['w'], segment_xywh['h']], transposition) |
|
878
|
|
|
segment_coords['angle'] += orientation |
|
879
|
|
|
# transpose, if (still) necessary: |
|
880
|
|
|
if not 'rotated-%d' % orientation in segment_coords['features']: |
|
881
|
|
|
log.info("Transposing %s by %d°", name, orientation) |
|
882
|
|
|
segment_image = transpose_image(segment_image, transposition) |
|
883
|
|
|
segment_coords['features'] += ',rotated-%d' % orientation |
|
884
|
|
|
return segment_image, segment_coords, segment_xywh |
|
885
|
|
|
|
|
886
|
|
|
def _rotate(log, name, skew, segment, segment_image, segment_coords, segment_xywh, **kwargs): |
|
887
|
|
|
# Rotate around center in affine coordinate transform: |
|
888
|
|
|
# (consistent with image rotation or AlternativeImage below) |
|
889
|
|
|
segment_coords['transform'] = rotate_coordinates( |
|
890
|
|
|
segment_coords['transform'], skew, |
|
891
|
|
|
np.array([0.5 * segment_xywh['w'], |
|
892
|
|
|
0.5 * segment_xywh['h']])) |
|
893
|
|
|
segment_xywh['w'], segment_xywh['h'] = adjust_canvas_to_rotation( |
|
894
|
|
|
[segment_xywh['w'], segment_xywh['h']], skew) |
|
895
|
|
|
segment_coords['angle'] += skew |
|
896
|
|
|
# deskew, if (still) necessary: |
|
897
|
|
|
if not 'deskewed' in segment_coords['features']: |
|
898
|
|
|
log.info("Rotating %s by %.2f°", name, skew) |
|
899
|
|
|
segment_image = rotate_image(segment_image, skew, **kwargs) |
|
900
|
|
|
segment_coords['features'] += ',deskewed' |
|
901
|
|
|
if (segment and |
|
902
|
|
|
(not isinstance(segment, BorderType) or # always crop below page level |
|
903
|
|
|
'cropped' in segment_coords['features'])): |
|
904
|
|
|
# re-crop to new bbox (which may deviate |
|
905
|
|
|
# if segment polygon was not a rectangle) |
|
906
|
|
|
segment_image, segment_coords, segment_xywh = _crop( |
|
907
|
|
|
log, name, segment, segment_image, segment_coords, |
|
908
|
|
|
**kwargs) |
|
909
|
|
|
elif (segment and |
|
910
|
|
|
(not isinstance(segment, BorderType) or # always crop below page level |
|
911
|
|
|
'cropped' in segment_coords['features'])): |
|
912
|
|
|
# only shift coordinates as if re-cropping |
|
913
|
|
|
_, segment_coords, segment_xywh = _crop( |
|
914
|
|
|
log, name, segment, segment_image, segment_coords, |
|
915
|
|
|
**kwargs) |
|
916
|
|
|
return segment_image, segment_coords, segment_xywh |
|
917
|
|
|
|