|
@@ 935-965 (lines=31) @@
|
| 932 |
|
|
| 933 |
|
best_image = None |
| 934 |
|
alternative_images = segment.get_AlternativeImage() |
| 935 |
|
if alternative_images: |
| 936 |
|
# (e.g. from segment-level cropping, binarization, deskewing or despeckling) |
| 937 |
|
best_features = set() |
| 938 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 939 |
|
# search to the end, because by convention we always append, |
| 940 |
|
# and among multiple satisfactory images we want the most recent, |
| 941 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 942 |
|
# of those features that we cannot reproduce automatically below |
| 943 |
|
for alternative_image in alternative_images: |
| 944 |
|
if filename and filename != alternative_image.filename: |
| 945 |
|
continue |
| 946 |
|
features = alternative_image.get_comments() |
| 947 |
|
if not features: |
| 948 |
|
log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes", |
| 949 |
|
alternative_images.index(alternative_image) + 1, segment.id) |
| 950 |
|
features = '' |
| 951 |
|
featureset = set(features.split(',')) |
| 952 |
|
if (all(feature in featureset |
| 953 |
|
for feature in feature_selector.split(',') if feature) and |
| 954 |
|
not any(feature in featureset |
| 955 |
|
for feature in feature_filter.split(',') if feature) and |
| 956 |
|
len(featureset.difference(auto_features)) >= \ |
| 957 |
|
len(best_features.difference(auto_features))): |
| 958 |
|
best_features = featureset |
| 959 |
|
best_image = alternative_image |
| 960 |
|
if best_image: |
| 961 |
|
log.debug("Using AlternativeImage %d %s for segment '%s'", |
| 962 |
|
alternative_images.index(best_image) + 1, |
| 963 |
|
best_features, segment.id) |
| 964 |
|
segment_image = self._resolve_image_as_pil(alternative_image.get_filename()) |
| 965 |
|
segment_coords['features'] = best_image.get_comments() # including duplicates |
| 966 |
|
|
| 967 |
|
alternative_image_features = segment_coords['features'].split(',') |
| 968 |
|
for duplicate_feature in set([feature for feature in alternative_image_features |
|
@@ 665-695 (lines=31) @@
|
| 662 |
|
page_coords['features'] = '' |
| 663 |
|
best_image = None |
| 664 |
|
alternative_images = page.get_AlternativeImage() |
| 665 |
|
if alternative_images: |
| 666 |
|
# (e.g. from page-level cropping, binarization, deskewing or despeckling) |
| 667 |
|
best_features = set() |
| 668 |
|
auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'} |
| 669 |
|
# search to the end, because by convention we always append, |
| 670 |
|
# and among multiple satisfactory images we want the most recent, |
| 671 |
|
# but also ensure that we get the richest feature set, i.e. most |
| 672 |
|
# of those features that we cannot reproduce automatically below |
| 673 |
|
for alternative_image in alternative_images: |
| 674 |
|
if filename and filename != alternative_image.filename: |
| 675 |
|
continue |
| 676 |
|
features = alternative_image.get_comments() |
| 677 |
|
if not features: |
| 678 |
|
log.warning("AlternativeImage %d for page '%s' does not have any feature attributes", |
| 679 |
|
alternative_images.index(alternative_image) + 1, page_id) |
| 680 |
|
features = '' |
| 681 |
|
featureset = set(features.split(',')) |
| 682 |
|
if (all(feature in featureset |
| 683 |
|
for feature in feature_selector.split(',') if feature) and |
| 684 |
|
not any(feature in featureset |
| 685 |
|
for feature in feature_filter.split(',') if feature) and |
| 686 |
|
len(featureset.difference(auto_features)) >= \ |
| 687 |
|
len(best_features.difference(auto_features))): |
| 688 |
|
best_features = featureset |
| 689 |
|
best_image = alternative_image |
| 690 |
|
if best_image: |
| 691 |
|
log.debug("Using AlternativeImage %d %s for page '%s'", |
| 692 |
|
alternative_images.index(best_image) + 1, |
| 693 |
|
best_features, page_id) |
| 694 |
|
page_image = self._resolve_image_as_pil(best_image.get_filename()) |
| 695 |
|
page_coords['features'] = best_image.get_comments() # including duplicates |
| 696 |
|
|
| 697 |
|
# adjust the coord transformation to the steps applied on the image, |
| 698 |
|
# and apply steps on the existing image in case it is missing there, |