Total Complexity | 4 |
Total Lines | 23 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | def get_AllTextLines(self, region_order='document', respect_textline_order=True): |
||
2 | """ |
||
3 | Return all the TextLine in the document |
||
4 | |||
5 | Arguments: |
||
6 | region_order ("document"|"reading-order"|"reading-order-only") Whether to |
||
7 | return regions sorted by document order (``document``, default) or by |
||
8 | reading order with regions not in the reading order at the end of the |
||
9 | returned list (``reading-order``) or regions not in the reading order |
||
10 | omitted (``reading-order-only``) |
||
11 | respect_textline_order (boolean) Whether to respect textlineOrder attribute |
||
12 | """ |
||
13 | # TODO handle textLineOrder according to https://github.com/PRImA-Research-Lab/PAGE-XML/issues/26 |
||
14 | ret = [] |
||
15 | for reg in self.get_AllRegions(['Text'], order=region_order): |
||
16 | lines = reg.get_TextLine() |
||
17 | if not respect_textline_order: |
||
18 | ret += lines |
||
19 | else: |
||
20 | lo = reg.get_textLineOrder() or self.get_textLineOrder() or 'top-to-bottom' |
||
21 | ret += lines if lo in ['top-to-bottom', 'left-to-right'] else list(reversed(lines)) |
||
22 | return ret |
||
23 | |||
24 |