Code Duplication - OCR-D/core - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations



        best_image = None
        alternative_images = segment.get_AlternativeImage()
        if alternative_images:
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, segment.id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for segment '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, segment.id)
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
                segment_coords['features'] = best_image.get_comments() # including duplicates

        alternative_image_features = segment_coords['features'].split(',')
        for duplicate_feature in set([feature for feature in alternative_image_features

        page_coords['features'] = ''
        best_image = None
        alternative_images = page.get_AlternativeImage()
        if alternative_images:
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, page_id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for page '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, page_id)
                page_image = self._resolve_image_as_pil(best_image.get_filename())
                page_coords['features'] = best_image.get_comments() # including duplicates

        # adjust the coord transformation to the steps applied on the image,
        # and apply steps on the existing image in case it is missing there,

		@@ 899-929 (lines=31) @@
896
897		best_image = None
898		alternative_images = segment.get_AlternativeImage()
899		if alternative_images:
900		# (e.g. from segment-level cropping, binarization, deskewing or despeckling)
901		best_features = set()
902		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
903		# search to the end, because by convention we always append,
904		# and among multiple satisfactory images we want the most recent,
905		# but also ensure that we get the richest feature set, i.e. most
906		# of those features that we cannot reproduce automatically below
907		for alternative_image in alternative_images:
908		if filename and filename != alternative_image.filename:
909		continue
910		features = alternative_image.get_comments()
911		if not features:
912		log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
913		alternative_images.index(alternative_image) + 1, segment.id)
914		features = ''
915		featureset = set(features.split(','))
916		if (all(feature in featureset
917		for feature in feature_selector.split(',') if feature) and
918		not any(feature in featureset
919		for feature in feature_filter.split(',') if feature) and
920		len(featureset.difference(auto_features)) >= \
921		len(best_features.difference(auto_features))):
922		best_features = featureset
923		best_image = alternative_image
924		if best_image:
925		log.debug("Using AlternativeImage %d %s for segment '%s'",
926		alternative_images.index(best_image) + 1,
927		best_features, segment.id)
928		segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
929		segment_coords['features'] = best_image.get_comments() # including duplicates
930
931		alternative_image_features = segment_coords['features'].split(',')
932		for duplicate_feature in set([feature for feature in alternative_image_features
		@@ 629-659 (lines=31) @@
626		page_coords['features'] = ''
627		best_image = None
628		alternative_images = page.get_AlternativeImage()
629		if alternative_images:
630		# (e.g. from page-level cropping, binarization, deskewing or despeckling)
631		best_features = set()
632		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
633		# search to the end, because by convention we always append,
634		# and among multiple satisfactory images we want the most recent,
635		# but also ensure that we get the richest feature set, i.e. most
636		# of those features that we cannot reproduce automatically below
637		for alternative_image in alternative_images:
638		if filename and filename != alternative_image.filename:
639		continue
640		features = alternative_image.get_comments()
641		if not features:
642		log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
643		alternative_images.index(alternative_image) + 1, page_id)
644		features = ''
645		featureset = set(features.split(','))
646		if (all(feature in featureset
647		for feature in feature_selector.split(',') if feature) and
648		not any(feature in featureset
649		for feature in feature_filter.split(',') if feature) and
650		len(featureset.difference(auto_features)) >= \
651		len(best_features.difference(auto_features))):
652		best_features = featureset
653		best_image = alternative_image
654		if best_image:
655		log.debug("Using AlternativeImage %d %s for page '%s'",
656		alternative_images.index(best_image) + 1,
657		best_features, page_id)
658		page_image = self._resolve_image_as_pil(best_image.get_filename())
659		page_coords['features'] = best_image.get_comments() # including duplicates
660
661		# adjust the coord transformation to the steps applied on the image,
662		# and apply steps on the existing image in case it is missing there,

OCR-D / core

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations