Code Duplication - OCR-D/core - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations



        best_image = None
        alternative_images = segment.get_AlternativeImage()
        if alternative_images:
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, segment.id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for segment '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, segment.id)
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
                segment_coords['features'] = best_image.get_comments() # including duplicates

        alternative_image_features = segment_coords['features'].split(',')
        for duplicate_feature in set([feature for feature in alternative_image_features

        page_coords['features'] = ''
        best_image = None
        alternative_images = page.get_AlternativeImage()
        if alternative_images:
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, page_id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for page '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, page_id)
                page_image = self._resolve_image_as_pil(best_image.get_filename())
                page_coords['features'] = best_image.get_comments() # including duplicates

        # adjust the coord transformation to the steps applied on the image,
        # and apply steps on the existing image in case it is missing there,

		@@ 921-951 (lines=31) @@
918
919		best_image = None
920		alternative_images = segment.get_AlternativeImage()
921		if alternative_images:
922		# (e.g. from segment-level cropping, binarization, deskewing or despeckling)
923		best_features = set()
924		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
925		# search to the end, because by convention we always append,
926		# and among multiple satisfactory images we want the most recent,
927		# but also ensure that we get the richest feature set, i.e. most
928		# of those features that we cannot reproduce automatically below
929		for alternative_image in alternative_images:
930		if filename and filename != alternative_image.filename:
931		continue
932		features = alternative_image.get_comments()
933		if not features:
934		log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
935		alternative_images.index(alternative_image) + 1, segment.id)
936		features = ''
937		featureset = set(features.split(','))
938		if (all(feature in featureset
939		for feature in feature_selector.split(',') if feature) and
940		not any(feature in featureset
941		for feature in feature_filter.split(',') if feature) and
942		len(featureset.difference(auto_features)) >= \
943		len(best_features.difference(auto_features))):
944		best_features = featureset
945		best_image = alternative_image
946		if best_image:
947		log.debug("Using AlternativeImage %d %s for segment '%s'",
948		alternative_images.index(best_image) + 1,
949		best_features, segment.id)
950		segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
951		segment_coords['features'] = best_image.get_comments() # including duplicates
952
953		alternative_image_features = segment_coords['features'].split(',')
954		for duplicate_feature in set([feature for feature in alternative_image_features
		@@ 651-681 (lines=31) @@
648		page_coords['features'] = ''
649		best_image = None
650		alternative_images = page.get_AlternativeImage()
651		if alternative_images:
652		# (e.g. from page-level cropping, binarization, deskewing or despeckling)
653		best_features = set()
654		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
655		# search to the end, because by convention we always append,
656		# and among multiple satisfactory images we want the most recent,
657		# but also ensure that we get the richest feature set, i.e. most
658		# of those features that we cannot reproduce automatically below
659		for alternative_image in alternative_images:
660		if filename and filename != alternative_image.filename:
661		continue
662		features = alternative_image.get_comments()
663		if not features:
664		log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
665		alternative_images.index(alternative_image) + 1, page_id)
666		features = ''
667		featureset = set(features.split(','))
668		if (all(feature in featureset
669		for feature in feature_selector.split(',') if feature) and
670		not any(feature in featureset
671		for feature in feature_filter.split(',') if feature) and
672		len(featureset.difference(auto_features)) >= \
673		len(best_features.difference(auto_features))):
674		best_features = featureset
675		best_image = alternative_image
676		if best_image:
677		log.debug("Using AlternativeImage %d %s for page '%s'",
678		alternative_images.index(best_image) + 1,
679		best_features, page_id)
680		page_image = self._resolve_image_as_pil(best_image.get_filename())
681		page_coords['features'] = best_image.get_comments() # including duplicates
682
683		# adjust the coord transformation to the steps applied on the image,
684		# and apply steps on the existing image in case it is missing there,

OCR-D / core

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations