Code Duplication    Length = 25-25 lines in 2 locations

src/ocrd_page_user_methods/get_AllTextLines.py 1 location

@@ 1-25 (lines=25) @@
1
def get_AllTextLines(self, region_order='document', respect_textline_order=True):
2
    """
3
    Return all the TextLine in the document
4
5
    Arguments:
6
        region_order ("document"|"reading-order"|"reading-order-only"): Whether to \
7
            return regions sorted by document order (``document``, default) or by \
8
            reading order with regions not in the reading order at the end of the \
9
            returned list (``reading-order``) or regions not in the reading order \
10
            omitted (``reading-order-only``)
11
        respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute
12
13
    Returns:
14
        a list of :py:class:`TextLineType`
15
    """
16
    # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26
17
    ret = []
18
    for reg in self.get_AllRegions(['Text'], order=region_order):
19
        lines = reg.get_TextLine()
20
        if not respect_textline_order:
21
            ret += lines
22
        else:
23
            lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom'
24
            ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines))
25
    return ret
26
27

src/ocrd_models/ocrd_page_generateds.py 1 location

@@ 3895-3919 (lines=25) @@
3892
        """
3893
        self.invalidate_AlternativeImage(feature_selector='cropped')
3894
        self.Border = Border
3895
    def get_AllTextLines(self, region_order='document', respect_textline_order=True):
3896
        """
3897
        Return all the TextLine in the document
3898
    
3899
        Arguments:
3900
            region_order ("document"|"reading-order"|"reading-order-only"): Whether to \
3901
                return regions sorted by document order (``document``, default) or by \
3902
                reading order with regions not in the reading order at the end of the \
3903
                returned list (``reading-order``) or regions not in the reading order \
3904
                omitted (``reading-order-only``)
3905
            respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute
3906
    
3907
        Returns:
3908
            a list of :py:class:`TextLineType`
3909
        """
3910
        # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26
3911
        ret = []
3912
        for reg in self.get_AllRegions(['Text'], order=region_order):
3913
            lines = reg.get_TextLine()
3914
            if not respect_textline_order:
3915
                ret += lines
3916
            else:
3917
                lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom'
3918
                ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines))
3919
        return ret
3920
    
3921
    def get_ReadingOrderGroups(self) -> dict:
3922
        """