Code Duplication    Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations

@@ 935-965 (lines=31) @@
932
933
        best_image = None
934
        alternative_images = segment.get_AlternativeImage()
935
        if alternative_images:
936
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
937
            best_features = set()
938
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
939
            # search to the end, because by convention we always append,
940
            # and among multiple satisfactory images we want the most recent,
941
            # but also ensure that we get the richest feature set, i.e. most
942
            # of those features that we cannot reproduce automatically below
943
            for alternative_image in alternative_images:
944
                if filename and filename != alternative_image.filename:
945
                    continue
946
                features = alternative_image.get_comments()
947
                if not features:
948
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
949
                                alternative_images.index(alternative_image) + 1, segment.id)
950
                    features = ''
951
                featureset = set(features.split(','))
952
                if (all(feature in featureset
953
                        for feature in feature_selector.split(',') if feature) and
954
                    not any(feature in featureset
955
                            for feature in feature_filter.split(',') if feature) and
956
                    len(featureset.difference(auto_features)) >= \
957
                    len(best_features.difference(auto_features))):
958
                    best_features = featureset
959
                    best_image = alternative_image
960
            if best_image:
961
                log.debug("Using AlternativeImage %d %s for segment '%s'",
962
                          alternative_images.index(best_image) + 1,
963
                          best_features, segment.id)
964
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
965
                segment_coords['features'] = best_image.get_comments() # including duplicates
966
967
        alternative_image_features = segment_coords['features'].split(',')
968
        for duplicate_feature in set([feature for feature in alternative_image_features
@@ 665-695 (lines=31) @@
662
        page_coords['features'] = ''
663
        best_image = None
664
        alternative_images = page.get_AlternativeImage()
665
        if alternative_images:
666
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
667
            best_features = set()
668
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
669
            # search to the end, because by convention we always append,
670
            # and among multiple satisfactory images we want the most recent,
671
            # but also ensure that we get the richest feature set, i.e. most
672
            # of those features that we cannot reproduce automatically below
673
            for alternative_image in alternative_images:
674
                if filename and filename != alternative_image.filename:
675
                    continue
676
                features = alternative_image.get_comments()
677
                if not features:
678
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
679
                                alternative_images.index(alternative_image) + 1, page_id)
680
                    features = ''
681
                featureset = set(features.split(','))
682
                if (all(feature in featureset
683
                        for feature in feature_selector.split(',') if feature) and
684
                    not any(feature in featureset
685
                            for feature in feature_filter.split(',') if feature) and
686
                    len(featureset.difference(auto_features)) >= \
687
                    len(best_features.difference(auto_features))):
688
                    best_features = featureset
689
                    best_image = alternative_image
690
            if best_image:
691
                log.debug("Using AlternativeImage %d %s for page '%s'",
692
                          alternative_images.index(best_image) + 1,
693
                          best_features, page_id)
694
                page_image = self._resolve_image_as_pil(best_image.get_filename())
695
                page_coords['features'] = best_image.get_comments() # including duplicates
696
697
        # adjust the coord transformation to the steps applied on the image,
698
        # and apply steps on the existing image in case it is missing there,