| @@ 1-25 (lines=25) @@ | ||
| 1 | def get_AllTextLines(self, region_order='document', respect_textline_order=True): |
|
| 2 | """ |
|
| 3 | Return all the TextLine in the document |
|
| 4 | ||
| 5 | Arguments: |
|
| 6 | region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
|
| 7 | return regions sorted by document order (``document``, default) or by \ |
|
| 8 | reading order with regions not in the reading order at the end of the \ |
|
| 9 | returned list (``reading-order``) or regions not in the reading order \ |
|
| 10 | omitted (``reading-order-only``) |
|
| 11 | respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute |
|
| 12 | ||
| 13 | Returns: |
|
| 14 | a list of :py:class:`TextLineType` |
|
| 15 | """ |
|
| 16 | # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 |
|
| 17 | ret = [] |
|
| 18 | for reg in self.get_AllRegions(['Text'], order=region_order): |
|
| 19 | lines = reg.get_TextLine() |
|
| 20 | if not respect_textline_order: |
|
| 21 | ret += lines |
|
| 22 | else: |
|
| 23 | lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' |
|
| 24 | ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) |
|
| 25 | return ret |
|
| 26 | ||
| 27 | ||
| @@ 3893-3917 (lines=25) @@ | ||
| 3890 | """ |
|
| 3891 | self.invalidate_AlternativeImage(feature_selector='cropped') |
|
| 3892 | self.Border = Border |
|
| 3893 | def get_AllTextLines(self, region_order='document', respect_textline_order=True): |
|
| 3894 | """ |
|
| 3895 | Return all the TextLine in the document |
|
| 3896 | ||
| 3897 | Arguments: |
|
| 3898 | region_order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
|
| 3899 | return regions sorted by document order (``document``, default) or by \ |
|
| 3900 | reading order with regions not in the reading order at the end of the \ |
|
| 3901 | returned list (``reading-order``) or regions not in the reading order \ |
|
| 3902 | omitted (``reading-order-only``) |
|
| 3903 | respect_textline_order (boolean): Whether to respect `@textLineOrder` attribute |
|
| 3904 | ||
| 3905 | Returns: |
|
| 3906 | a list of :py:class:`TextLineType` |
|
| 3907 | """ |
|
| 3908 | # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 |
|
| 3909 | ret = [] |
|
| 3910 | for reg in self.get_AllRegions(['Text'], order=region_order): |
|
| 3911 | lines = reg.get_TextLine() |
|
| 3912 | if not respect_textline_order: |
|
| 3913 | ret += lines |
|
| 3914 | else: |
|
| 3915 | lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' |
|
| 3916 | ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) |
|
| 3917 | return ret |
|
| 3918 | ||
| 3919 | def get_ReadingOrderGroups(self) -> dict: |
|
| 3920 | """ |
|