Code Duplication - OCR-D/core - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations



        best_image = None
        alternative_images = segment.get_AlternativeImage()
        if alternative_images:
            # (e.g. from segment-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, segment.id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for segment '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, segment.id)
                segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
                segment_coords['features'] = best_image.get_comments() # including duplicates

        alternative_image_features = segment_coords['features'].split(',')
        for duplicate_feature in set([feature for feature in alternative_image_features

        page_coords['features'] = ''
        best_image = None
        alternative_images = page.get_AlternativeImage()
        if alternative_images:
            # (e.g. from page-level cropping, binarization, deskewing or despeckling)
            best_features = set()
            auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
            # search to the end, because by convention we always append,
            # and among multiple satisfactory images we want the most recent,
            # but also ensure that we get the richest feature set, i.e. most
            # of those features that we cannot reproduce automatically below
            for alternative_image in alternative_images:
                if filename and filename != alternative_image.filename:
                    continue
                features = alternative_image.get_comments()
                if not features:
                    log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
                                alternative_images.index(alternative_image) + 1, page_id)
                    features = ''
                featureset = set(features.split(','))
                if (all(feature in featureset
                        for feature in feature_selector.split(',') if feature) and
                    not any(feature in featureset
                            for feature in feature_filter.split(',') if feature) and
                    len(featureset.difference(auto_features)) >= \
                    len(best_features.difference(auto_features))):
                    best_features = featureset
                    best_image = alternative_image
            if best_image:
                log.debug("Using AlternativeImage %d %s for page '%s'",
                          alternative_images.index(best_image) + 1,
                          best_features, page_id)
                page_image = self._resolve_image_as_pil(best_image.get_filename())
                page_coords['features'] = best_image.get_comments() # including duplicates

        # adjust the coord transformation to the steps applied on the image,
        # and apply steps on the existing image in case it is missing there,

		@@ 935-965 (lines=31) @@
932
933		best_image = None
934		alternative_images = segment.get_AlternativeImage()
935		if alternative_images:
936		# (e.g. from segment-level cropping, binarization, deskewing or despeckling)
937		best_features = set()
938		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
939		# search to the end, because by convention we always append,
940		# and among multiple satisfactory images we want the most recent,
941		# but also ensure that we get the richest feature set, i.e. most
942		# of those features that we cannot reproduce automatically below
943		for alternative_image in alternative_images:
944		if filename and filename != alternative_image.filename:
945		continue
946		features = alternative_image.get_comments()
947		if not features:
948		log.warning("AlternativeImage %d for segment '%s' does not have any feature attributes",
949		alternative_images.index(alternative_image) + 1, segment.id)
950		features = ''
951		featureset = set(features.split(','))
952		if (all(feature in featureset
953		for feature in feature_selector.split(',') if feature) and
954		not any(feature in featureset
955		for feature in feature_filter.split(',') if feature) and
956		len(featureset.difference(auto_features)) >= \
957		len(best_features.difference(auto_features))):
958		best_features = featureset
959		best_image = alternative_image
960		if best_image:
961		log.debug("Using AlternativeImage %d %s for segment '%s'",
962		alternative_images.index(best_image) + 1,
963		best_features, segment.id)
964		segment_image = self._resolve_image_as_pil(alternative_image.get_filename())
965		segment_coords['features'] = best_image.get_comments() # including duplicates
966
967		alternative_image_features = segment_coords['features'].split(',')
968		for duplicate_feature in set([feature for feature in alternative_image_features
		@@ 665-695 (lines=31) @@
662		page_coords['features'] = ''
663		best_image = None
664		alternative_images = page.get_AlternativeImage()
665		if alternative_images:
666		# (e.g. from page-level cropping, binarization, deskewing or despeckling)
667		best_features = set()
668		auto_features = {'cropped', 'deskewed', 'rotated-90', 'rotated-180', 'rotated-270'}
669		# search to the end, because by convention we always append,
670		# and among multiple satisfactory images we want the most recent,
671		# but also ensure that we get the richest feature set, i.e. most
672		# of those features that we cannot reproduce automatically below
673		for alternative_image in alternative_images:
674		if filename and filename != alternative_image.filename:
675		continue
676		features = alternative_image.get_comments()
677		if not features:
678		log.warning("AlternativeImage %d for page '%s' does not have any feature attributes",
679		alternative_images.index(alternative_image) + 1, page_id)
680		features = ''
681		featureset = set(features.split(','))
682		if (all(feature in featureset
683		for feature in feature_selector.split(',') if feature) and
684		not any(feature in featureset
685		for feature in feature_filter.split(',') if feature) and
686		len(featureset.difference(auto_features)) >= \
687		len(best_features.difference(auto_features))):
688		best_features = featureset
689		best_image = alternative_image
690		if best_image:
691		log.debug("Using AlternativeImage %d %s for page '%s'",
692		alternative_images.index(best_image) + 1,
693		best_features, page_id)
694		page_image = self._resolve_image_as_pil(best_image.get_filename())
695		page_coords['features'] = best_image.get_comments() # including duplicates
696
697		# adjust the coord transformation to the steps applied on the image,
698		# and apply steps on the existing image in case it is missing there,

OCR-D / core

Code Duplication Length = 31-31 lines in 2 locations

ocrd/ocrd/workspace.py 2 locations