|
@@ 899-929 (lines=31) @@
|
| 896 |
|
|
| 897 |
|
best_image = None |
| 898 |
|
alternative_images = segment.get_AlternativeImage() |
| 899 |
|
if alternative_images: |
| 900 |
|
# (e.g. from segment-level cropping, binarization, deskewing or despeckling) |
| 901 |
|
best_features = set() |
| 902 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 903 |
|
# search to the end, because by convention we always append, |
| 904 |
|
# and among multiple satisfactory images we want the most recent, |
| 905 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 906 |
|
# of those features that we cannot reproduce automatically below |
| 907 |
|
for alternative_image in alternative_images: |
| 908 |
|
if filename and filename != alternative_image.filename: |
| 909 |
|
continue |
| 910 |
|
features = alternative_image.get_comments() |
| 911 |
|
if not features: |
| 912 |
|
log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", |
| 913 |
|
alternative_images.index(alternative_image) + 1, segment.id) |
| 914 |
|
features = '' |
| 915 |
|
featureset = set(features.split(',')) |
| 916 |
|
if (all(feature in featureset |
| 917 |
|
for feature in feature_selector.split(',') if feature) and |
| 918 |
|
not any(feature in featureset |
| 919 |
|
for feature in feature_filter.split(',') if feature) and |
| 920 |
|
len(featureset.difference(auto_features)) >= \ |
| 921 |
|
len(best_features.difference(auto_features))): |
| 922 |
|
best_features = featureset |
| 923 |
|
best_image = alternative_image |
| 924 |
|
if best_image: |
| 925 |
|
log.debug("Using AlternativeImage %d %s for segment '%s'", |
| 926 |
|
alternative_images.index(best_image) + 1, |
| 927 |
|
best_features, segment.id) |
| 928 |
|
segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) |
| 929 |
|
segment_coords['features'] = best_image.get_comments() # including duplicates |
| 930 |
|
|
| 931 |
|
alternative_image_features = segment_coords['features'].split(',') |
| 932 |
|
for duplicate_feature in set([feature for feature in alternative_image_features |
|
@@ 629-659 (lines=31) @@
|
| 626 |
|
page_coords['features'] = '' |
| 627 |
|
best_image = None |
| 628 |
|
alternative_images = page.get_AlternativeImage() |
| 629 |
|
if alternative_images: |
| 630 |
|
# (e.g. from page-level cropping, binarization, deskewing or despeckling) |
| 631 |
|
best_features = set() |
| 632 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 633 |
|
# search to the end, because by convention we always append, |
| 634 |
|
# and among multiple satisfactory images we want the most recent, |
| 635 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 636 |
|
# of those features that we cannot reproduce automatically below |
| 637 |
|
for alternative_image in alternative_images: |
| 638 |
|
if filename and filename != alternative_image.filename: |
| 639 |
|
continue |
| 640 |
|
features = alternative_image.get_comments() |
| 641 |
|
if not features: |
| 642 |
|
log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", |
| 643 |
|
alternative_images.index(alternative_image) + 1, page_id) |
| 644 |
|
features = '' |
| 645 |
|
featureset = set(features.split(',')) |
| 646 |
|
if (all(feature in featureset |
| 647 |
|
for feature in feature_selector.split(',') if feature) and |
| 648 |
|
not any(feature in featureset |
| 649 |
|
for feature in feature_filter.split(',') if feature) and |
| 650 |
|
len(featureset.difference(auto_features)) >= \ |
| 651 |
|
len(best_features.difference(auto_features))): |
| 652 |
|
best_features = featureset |
| 653 |
|
best_image = alternative_image |
| 654 |
|
if best_image: |
| 655 |
|
log.debug("Using AlternativeImage %d %s for page '%s'", |
| 656 |
|
alternative_images.index(best_image) + 1, |
| 657 |
|
best_features, page_id) |
| 658 |
|
page_image = self._resolve_image_as_pil(best_image.get_filename()) |
| 659 |
|
page_coords['features'] = best_image.get_comments() # including duplicates |
| 660 |
|
|
| 661 |
|
# adjust the coord transformation to the steps applied on the image, |
| 662 |
|
# and apply steps on the existing image in case it is missing there, |