Code Duplication    Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations

@@ 921-951 (lines=31) @@
918
919
        best_image = None
920
        alternative_images = segment.get_AlternativeImage()
921
        if alternative_images:
922
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
923
            best_features = set()
924
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
925
            # search to the end, because by convention we always append,
926
            # and among multiple satisfactory images we want the most recent,
927
            # but also ensure that we get the richest feature set, i.e. most
928
            # of those features that we cannot reproduce automatically below
929
            for alternative_image in alternative_images:
930
                if filename and filename != alternative_image.filename:
931
                    continue
932
                features = alternative_image.get_comments()
933
                if not features:
934
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
935
                                alternative_images.index(alternative_image) + 1, segment.id)
936
                    features = ''
937
                featureset = set(features.split(','))
938
                if (all(feature in featureset
939
                        for feature in feature_selector.split(',') if feature) and
940
                    not any(feature in featureset
941
                            for feature in feature_filter.split(',') if feature) and
942
                    len(featureset.difference(auto_features)) >= \
943
                    len(best_features.difference(auto_features))):
944
                    best_features = featureset
945
                    best_image = alternative_image
946
            if best_image:
947
                log.debug("Using AlternativeImage %d %s for segment '%s'",
948
                          alternative_images.index(best_image) + 1,
949
                          best_features, segment.id)
950
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
951
                segment_coords['features'] = best_image.get_comments() # including duplicates
952
953
        alternative_image_features = segment_coords['features'].split(',')
954
        for duplicate_feature in set([feature for feature in alternative_image_features
@@ 651-681 (lines=31) @@
648
        page_coords['features'] = ''
649
        best_image = None
650
        alternative_images = page.get_AlternativeImage()
651
        if alternative_images:
652
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
653
            best_features = set()
654
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
655
            # search to the end, because by convention we always append,
656
            # and among multiple satisfactory images we want the most recent,
657
            # but also ensure that we get the richest feature set, i.e. most
658
            # of those features that we cannot reproduce automatically below
659
            for alternative_image in alternative_images:
660
                if filename and filename != alternative_image.filename:
661
                    continue
662
                features = alternative_image.get_comments()
663
                if not features:
664
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
665
                                alternative_images.index(alternative_image) + 1, page_id)
666
                    features = ''
667
                featureset = set(features.split(','))
668
                if (all(feature in featureset
669
                        for feature in feature_selector.split(',') if feature) and
670
                    not any(feature in featureset
671
                            for feature in feature_filter.split(',') if feature) and
672
                    len(featureset.difference(auto_features)) >= \
673
                    len(best_features.difference(auto_features))):
674
                    best_features = featureset
675
                    best_image = alternative_image
676
            if best_image:
677
                log.debug("Using AlternativeImage %d %s for page '%s'",
678
                          alternative_images.index(best_image) + 1,
679
                          best_features, page_id)
680
                page_image = self._resolve_image_as_pil(best_image.get_filename())
681
                page_coords['features'] = best_image.get_comments() # including duplicates
682
683
        # adjust the coord transformation to the steps applied on the image,
684
        # and apply steps on the existing image in case it is missing there,