Code Duplication    Length = 56-56 lines in 2 locations

src/ocrd_models/ocrd_page_generateds.py 1 location

@@ 3162-3217 (lines=56) @@
3159
                regionrefs.extend(self._get_recursive_reading_order(elem))
3160
        return regionrefs
3161
    
3162
    def get_AllRegions(self, classes=None, order='document', depth=0):
3163
        """
3164
        Get all the ``*Region`` elements, or only those provided by `classes`.
3165
        Return in document order, unless `order` is ``reading-order``.
3166
    
3167
        Arguments:
3168
            classes (list): Classes of regions that shall be returned, \
3169
                e.g. ``['Text', 'Image']``
3170
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
3171
                return regions sorted by document order (``document``, default) or by
3172
                reading order with regions not in the reading order at the end of the
3173
                returned list (``reading-order``) or regions not in the reading order
3174
                omitted (``reading-order-only``)
3175
            depth (int): Recursive depth to look for regions at, set to `0` for \
3176
                all regions at any depth. Default: 0
3177
    
3178
        Returns:
3179
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
3180
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
3181
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
3182
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
3183
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
3184
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
3185
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
3186
                and/or :py:class:`CustomRegionType`
3187
    
3188
        For example, to get all text anywhere on the page in reading order, use:
3189
        ::
3190
            '\\n'.join(line.get_TextEquiv()[0].Unicode
3191
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
3192
                      for line in region.get_TextLine())
3193
        """
3194
        if order not in ['document', 'reading-order', 'reading-order-only']:
3195
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
3196
        if depth < 0:
3197
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
3198
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
3199
        if order.startswith('reading-order'):
3200
            reading_order = self.get_ReadingOrder()
3201
            if reading_order:
3202
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
3203
            if reading_order:
3204
                reading_order = self._get_recursive_reading_order(reading_order)
3205
            if reading_order:
3206
                id2region = {region.id: region for region in ret}
3207
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
3208
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
3209
                #      len(ret),
3210
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
3211
                #      len([r for r in ret if r not in in_reading_order])
3212
                #      ))
3213
                if order == 'reading-order-only':
3214
                    ret = in_reading_order
3215
                else:
3216
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
3217
        return ret
3218
    def get_AllAlternativeImages(self, page=True, region=True, line=True, word=True, glyph=True):
3219
        """
3220
        Get all the ``pc:AlternativeImage`` in a document

src/ocrd_page_user_methods/get_AllRegions.py 1 location

@@ 43-98 (lines=56) @@
40
            regionrefs.extend(self._get_recursive_reading_order(elem))
41
    return regionrefs
42
43
def get_AllRegions(self, classes=None, order='document', depth=0):
44
    """
45
    Get all the ``*Region`` elements, or only those provided by `classes`.
46
    Return in document order, unless `order` is ``reading-order``.
47
48
    Arguments:
49
        classes (list): Classes of regions that shall be returned, \
50
            e.g. ``['Text', 'Image']``
51
        order ("document"|"reading-order"|"reading-order-only"): Whether to \
52
            return regions sorted by document order (``document``, default) or by
53
            reading order with regions not in the reading order at the end of the
54
            returned list (``reading-order``) or regions not in the reading order
55
            omitted (``reading-order-only``)
56
        depth (int): Recursive depth to look for regions at, set to `0` for \
57
            all regions at any depth. Default: 0
58
59
    Returns:
60
        a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
61
            :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
62
            :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
63
            :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
64
            :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
65
            :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
66
            :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
67
            and/or :py:class:`CustomRegionType`
68
69
    For example, to get all text anywhere on the page in reading order, use:
70
    ::
71
        '\\n'.join(line.get_TextEquiv()[0].Unicode
72
                  for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
73
                  for line in region.get_TextLine())
74
    """
75
    if order not in ['document', 'reading-order', 'reading-order-only']:
76
        raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
77
    if depth < 0:
78
        raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
79
    ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
80
    if order.startswith('reading-order'):
81
        reading_order = self.get_ReadingOrder()
82
        if reading_order:
83
            reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
84
        if reading_order:
85
            reading_order = self._get_recursive_reading_order(reading_order)
86
        if reading_order:
87
            id2region = {region.id: region for region in ret}
88
            in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
89
            #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
90
            #      len(ret),
91
            #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
92
            #      len([r for r in ret if r not in in_reading_order])
93
            #      ))
94
            if order == 'reading-order-only':
95
                ret = in_reading_order
96
            else:
97
                ret = in_reading_order + [r for r in ret if r not in in_reading_order]
98
    return ret
99