@@ 1-25 (lines=25) @@ | ||
1 | def get_AllTextLines(self, region_order='document', respect_textline_order=True): |
|
2 | """ |
|
3 | Return all the TextLine in the document |
|
4 | ||
5 | Arguments: |
|
6 | region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
|
7 | return regions sorted by document order (``document``, default) or by \ |
|
8 | reading order with regions not in the reading order at the end of the \ |
|
9 | returned list (``reading-order``) or regions not in the reading order \ |
|
10 | omitted (``reading-order-only``) |
|
11 | respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute |
|
12 | ||
13 | Returns: |
|
14 | a list of :py:class:`TextLineType` |
|
15 | """ |
|
16 | # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 |
|
17 | ret = [] |
|
18 | for reg in self.get_AllRegions(['Text'], order=region_order): |
|
19 | lines = reg.get_TextLine() |
|
20 | if not respect_textline_order: |
|
21 | ret += lines |
|
22 | else: |
|
23 | lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' |
|
24 | ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) |
|
25 | return ret |
|
26 | ||
27 |
@@ 3893-3917 (lines=25) @@ | ||
3890 | """ |
|
3891 | self.invalidate_AlternativeImage(feature_selector='cropped') |
|
3892 | self.Border = Border |
|
3893 | def get_AllTextLines(self, region_order='document', respect_textline_order=True): |
|
3894 | """ |
|
3895 | Return all the TextLine in the document |
|
3896 | ||
3897 | Arguments: |
|
3898 | region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
|
3899 | return regions sorted by document order (``document``, default) or by \ |
|
3900 | reading order with regions not in the reading order at the end of the \ |
|
3901 | returned list (``reading-order``) or regions not in the reading order \ |
|
3902 | omitted (``reading-order-only``) |
|
3903 | respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute |
|
3904 | ||
3905 | Returns: |
|
3906 | a list of :py:class:`TextLineType` |
|
3907 | """ |
|
3908 | # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 |
|
3909 | ret = [] |
|
3910 | for reg in self.get_AllRegions(['Text'], order=region_order): |
|
3911 | lines = reg.get_TextLine() |
|
3912 | if not respect_textline_order: |
|
3913 | ret += lines |
|
3914 | else: |
|
3915 | lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' |
|
3916 | ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) |
|
3917 | return ret |
|
3918 | ||
3919 | def get_ReadingOrderGroups(self) -> dict: |
|
3920 | """ |