Code Duplication    Length = 25-25 lines in 2 locations

src/ocrd_page_user_methods/get_AllTextLines.py 1 location

@@ 1-25 (lines=25) @@
1
def get_AllTextLines(self, region_order='document', respect_textline_order=True):
2
    """
3
    Return all the TextLine in the document
4
5
    Arguments:
6
        region_order ("document"|"reading-order"|"reading-order-only"): Whether to \
7
            return regions sorted by document order (``document``, default) or by \
8
            reading order with regions not in the reading order at the end of the \
9
            returned list (``reading-order``) or regions not in the reading order \
10
            omitted (``reading-order-only``)
11
        respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute
12
13
    Returns:
14
        a list of :py:class:`TextLineType`
15
    """
16
    # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26
17
    ret = []
18
    for reg in self.get_AllRegions(['Text'], order=region_order):
19
        lines = reg.get_TextLine()
20
        if not respect_textline_order:
21
            ret += lines
22
        else:
23
            lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom'
24
            ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines))
25
    return ret
26
27

src/ocrd_models/ocrd_page_generateds.py 1 location

@@ 3893-3917 (lines=25) @@
3890
        """
3891
        self.invalidate_AlternativeImage(feature_selector='cropped')
3892
        self.Border = Border
3893
    def get_AllTextLines(self, region_order='document', respect_textline_order=True):
3894
        """
3895
        Return all the TextLine in the document
3896
    
3897
        Arguments:
3898
            region_order ("document"|"reading-order"|"reading-order-only"): Whether to \
3899
                return regions sorted by document order (``document``, default) or by \
3900
                reading order with regions not in the reading order at the end of the \
3901
                returned list (``reading-order``) or regions not in the reading order \
3902
                omitted (``reading-order-only``)
3903
            respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute
3904
    
3905
        Returns:
3906
            a list of :py:class:`TextLineType`
3907
        """
3908
        # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26
3909
        ret = []
3910
        for reg in self.get_AllRegions(['Text'], order=region_order):
3911
            lines = reg.get_TextLine()
3912
            if not respect_textline_order:
3913
                ret += lines
3914
            else:
3915
                lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom'
3916
                ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines))
3917
        return ret
3918
    
3919
    def get_ReadingOrderGroups(self) -> dict:
3920
        """