|
@@ 921-951 (lines=31) @@
|
| 918 |
|
|
| 919 |
|
best_image = None |
| 920 |
|
alternative_images = segment.get_AlternativeImage() |
| 921 |
|
if alternative_images: |
| 922 |
|
# (e.g. from segment-level cropping, binarization, deskewing or despeckling) |
| 923 |
|
best_features = set() |
| 924 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 925 |
|
# search to the end, because by convention we always append, |
| 926 |
|
# and among multiple satisfactory images we want the most recent, |
| 927 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 928 |
|
# of those features that we cannot reproduce automatically below |
| 929 |
|
for alternative_image in alternative_images: |
| 930 |
|
if filename and filename != alternative_image.filename: |
| 931 |
|
continue |
| 932 |
|
features = alternative_image.get_comments() |
| 933 |
|
if not features: |
| 934 |
|
log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", |
| 935 |
|
alternative_images.index(alternative_image) + 1, segment.id) |
| 936 |
|
features = '' |
| 937 |
|
featureset = set(features.split(',')) |
| 938 |
|
if (all(feature in featureset |
| 939 |
|
for feature in feature_selector.split(',') if feature) and |
| 940 |
|
not any(feature in featureset |
| 941 |
|
for feature in feature_filter.split(',') if feature) and |
| 942 |
|
len(featureset.difference(auto_features)) >= \ |
| 943 |
|
len(best_features.difference(auto_features))): |
| 944 |
|
best_features = featureset |
| 945 |
|
best_image = alternative_image |
| 946 |
|
if best_image: |
| 947 |
|
log.debug("Using AlternativeImage %d %s for segment '%s'", |
| 948 |
|
alternative_images.index(best_image) + 1, |
| 949 |
|
best_features, segment.id) |
| 950 |
|
segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) |
| 951 |
|
segment_coords['features'] = best_image.get_comments() # including duplicates |
| 952 |
|
|
| 953 |
|
alternative_image_features = segment_coords['features'].split(',') |
| 954 |
|
for duplicate_feature in set([feature for feature in alternative_image_features |
|
@@ 651-681 (lines=31) @@
|
| 648 |
|
page_coords['features'] = '' |
| 649 |
|
best_image = None |
| 650 |
|
alternative_images = page.get_AlternativeImage() |
| 651 |
|
if alternative_images: |
| 652 |
|
# (e.g. from page-level cropping, binarization, deskewing or despeckling) |
| 653 |
|
best_features = set() |
| 654 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 655 |
|
# search to the end, because by convention we always append, |
| 656 |
|
# and among multiple satisfactory images we want the most recent, |
| 657 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 658 |
|
# of those features that we cannot reproduce automatically below |
| 659 |
|
for alternative_image in alternative_images: |
| 660 |
|
if filename and filename != alternative_image.filename: |
| 661 |
|
continue |
| 662 |
|
features = alternative_image.get_comments() |
| 663 |
|
if not features: |
| 664 |
|
log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", |
| 665 |
|
alternative_images.index(alternative_image) + 1, page_id) |
| 666 |
|
features = '' |
| 667 |
|
featureset = set(features.split(',')) |
| 668 |
|
if (all(feature in featureset |
| 669 |
|
for feature in feature_selector.split(',') if feature) and |
| 670 |
|
not any(feature in featureset |
| 671 |
|
for feature in feature_filter.split(',') if feature) and |
| 672 |
|
len(featureset.difference(auto_features)) >= \ |
| 673 |
|
len(best_features.difference(auto_features))): |
| 674 |
|
best_features = featureset |
| 675 |
|
best_image = alternative_image |
| 676 |
|
if best_image: |
| 677 |
|
log.debug("Using AlternativeImage %d %s for page '%s'", |
| 678 |
|
alternative_images.index(best_image) + 1, |
| 679 |
|
best_features, page_id) |
| 680 |
|
page_image = self._resolve_image_as_pil(best_image.get_filename()) |
| 681 |
|
page_coords['features'] = best_image.get_comments() # including duplicates |
| 682 |
|
|
| 683 |
|
# adjust the coord transformation to the steps applied on the image, |
| 684 |
|
# and apply steps on the existing image in case it is missing there, |