Code Duplication    Length = 56-56 lines in 2 locations

src/ocrd_page_user_methods/get_AllRegions.py 1 location

@@ 43-98 (lines=56) @@
40
            regionrefs.extend(self._get_recursive_reading_order(elem))
41
    return regionrefs
42
43
def get_AllRegions(self, classes=None, order='document', depth=0):
44
    """
45
    Get all the ``*Region`` elements, or only those provided by `classes`.
46
    Return in document order, unless `order` is ``reading-order``.
47
48
    Arguments:
49
        classes (list): Classes of regions that shall be returned, \
50
            e.g. ``['Text', 'Image']``
51
        order ("document"|"reading-order"|"reading-order-only"): Whether to \
52
            return regions sorted by document order (``document``, default) or by
53
            reading order with regions not in the reading order at the end of the
54
            returned list (``reading-order``) or regions not in the reading order
55
            omitted (``reading-order-only``)
56
        depth (int): Recursive depth to look for regions at, set to `0` for \
57
            all regions at any depth. Default: 0
58
59
    Returns:
60
        a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
61
            :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
62
            :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
63
            :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
64
            :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
65
            :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
66
            :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
67
            and/or :py:class:`CustomRegionType`
68
69
    For example, to get all text anywhere on the page in reading order, use:
70
    ::
71
        '\\n'.join(line.get_TextEquiv()[0].Unicode
72
                  for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
73
                  for line in region.get_TextLine())
74
    """
75
    if order not in ['document', 'reading-order', 'reading-order-only']:
76
        raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
77
    if depth < 0:
78
        raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
79
    ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
80
    if order.startswith('reading-order'):
81
        reading_order = self.get_ReadingOrder()
82
        if reading_order:
83
            reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
84
        if reading_order:
85
            reading_order = self._get_recursive_reading_order(reading_order)
86
        if reading_order:
87
            id2region = {region.id: region for region in ret}
88
            in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
89
            #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
90
            #      len(ret),
91
            #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
92
            #      len([r for r in ret if r not in in_reading_order])
93
            #      ))
94
            if order == 'reading-order-only':
95
                ret = in_reading_order
96
            else:
97
                ret = in_reading_order + [r for r in ret if r not in in_reading_order]
98
    return ret
99

src/ocrd_models/ocrd_page_generateds.py 1 location

@@ 3766-3821 (lines=56) @@
3763
                regionrefs.extend(self._get_recursive_reading_order(elem))
3764
        return regionrefs
3765
    
3766
    def get_AllRegions(self, classes=None, order='document', depth=0):
3767
        """
3768
        Get all the ``*Region`` elements, or only those provided by `classes`.
3769
        Return in document order, unless `order` is ``reading-order``.
3770
    
3771
        Arguments:
3772
            classes (list): Classes of regions that shall be returned, \
3773
                e.g. ``['Text', 'Image']``
3774
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
3775
                return regions sorted by document order (``document``, default) or by
3776
                reading order with regions not in the reading order at the end of the
3777
                returned list (``reading-order``) or regions not in the reading order
3778
                omitted (``reading-order-only``)
3779
            depth (int): Recursive depth to look for regions at, set to `0` for \
3780
                all regions at any depth. Default: 0
3781
    
3782
        Returns:
3783
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
3784
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
3785
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
3786
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
3787
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
3788
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
3789
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
3790
                and/or :py:class:`CustomRegionType`
3791
    
3792
        For example, to get all text anywhere on the page in reading order, use:
3793
        ::
3794
            '\\n'.join(line.get_TextEquiv()[0].Unicode
3795
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
3796
                      for line in region.get_TextLine())
3797
        """
3798
        if order not in ['document', 'reading-order', 'reading-order-only']:
3799
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
3800
        if depth < 0:
3801
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
3802
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
3803
        if order.startswith('reading-order'):
3804
            reading_order = self.get_ReadingOrder()
3805
            if reading_order:
3806
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
3807
            if reading_order:
3808
                reading_order = self._get_recursive_reading_order(reading_order)
3809
            if reading_order:
3810
                id2region = {region.id: region for region in ret}
3811
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
3812
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
3813
                #      len(ret),
3814
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
3815
                #      len([r for r in ret if r not in in_reading_order])
3816
                #      ))
3817
                if order == 'reading-order-only':
3818
                    ret = in_reading_order
3819
                else:
3820
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
3821
        return ret
3822
    def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, glyph=True):
3823
        """
3824
        Get all the ``pc:AlternativeImage`` in a document