Code Duplication    Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations

@@ 899-929 (lines=31) @@
896
897
        best_image = None
898
        alternative_images = segment.get_AlternativeImage()
899
        if alternative_images:
900
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
901
            best_features = set()
902
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
903
            # search to the end, because by convention we always append,
904
            # and among multiple satisfactory images we want the most recent,
905
            # but also ensure that we get the richest feature set, i.e. most
906
            # of those features that we cannot reproduce automatically below
907
            for alternative_image in alternative_images:
908
                if filename and filename != alternative_image.filename:
909
                    continue
910
                features = alternative_image.get_comments()
911
                if not features:
912
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
913
                                alternative_images.index(alternative_image) + 1, segment.id)
914
                    features = ''
915
                featureset = set(features.split(','))
916
                if (all(feature in featureset
917
                        for feature in feature_selector.split(',') if feature) and
918
                    not any(feature in featureset
919
                            for feature in feature_filter.split(',') if feature) and
920
                    len(featureset.difference(auto_features)) >= \
921
                    len(best_features.difference(auto_features))):
922
                    best_features = featureset
923
                    best_image = alternative_image
924
            if best_image:
925
                log.debug("Using AlternativeImage %d %s for segment '%s'",
926
                          alternative_images.index(best_image) + 1,
927
                          best_features, segment.id)
928
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
929
                segment_coords['features'] = best_image.get_comments() # including duplicates
930
931
        alternative_image_features = segment_coords['features'].split(',')
932
        for duplicate_feature in set([feature for feature in alternative_image_features
@@ 629-659 (lines=31) @@
626
        page_coords['features'] = ''
627
        best_image = None
628
        alternative_images = page.get_AlternativeImage()
629
        if alternative_images:
630
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
631
            best_features = set()
632
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
633
            # search to the end, because by convention we always append,
634
            # and among multiple satisfactory images we want the most recent,
635
            # but also ensure that we get the richest feature set, i.e. most
636
            # of those features that we cannot reproduce automatically below
637
            for alternative_image in alternative_images:
638
                if filename and filename != alternative_image.filename:
639
                    continue
640
                features = alternative_image.get_comments()
641
                if not features:
642
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
643
                                alternative_images.index(alternative_image) + 1, page_id)
644
                    features = ''
645
                featureset = set(features.split(','))
646
                if (all(feature in featureset
647
                        for feature in feature_selector.split(',') if feature) and
648
                    not any(feature in featureset
649
                            for feature in feature_filter.split(',') if feature) and
650
                    len(featureset.difference(auto_features)) >= \
651
                    len(best_features.difference(auto_features))):
652
                    best_features = featureset
653
                    best_image = alternative_image
654
            if best_image:
655
                log.debug("Using AlternativeImage %d %s for page '%s'",
656
                          alternative_images.index(best_image) + 1,
657
                          best_features, page_id)
658
                page_image = self._resolve_image_as_pil(best_image.get_filename())
659
                page_coords['features'] = best_image.get_comments() # including duplicates
660
661
        # adjust the coord transformation to the steps applied on the image,
662
        # and apply steps on the existing image in case it is missing there,