| Total Complexity | 14 |
| Total Lines | 51 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | from ocrd_utils import xywh_from_points |
||
| 2 | |||
| 3 | pc_functions = [] |
||
| 4 | |||
| 5 | def _export(func): |
||
| 6 | pc_functions.append(func) |
||
| 7 | return func |
||
| 8 | |||
| 9 | @_export |
||
| 10 | def pc_pixelarea(nodes): |
||
| 11 | """ |
||
| 12 | Extract Coords/@points from all nodes, calculate the bounding |
||
| 13 | box, and accumulate areas. |
||
| 14 | """ |
||
| 15 | area = 0 |
||
| 16 | for node in nodes: |
||
| 17 | # FIXME: find out why we need to go to the parent here |
||
| 18 | node = node.parent.value |
||
| 19 | coords = node.find(f'{node.prefix}:Coords', node.nsmap) |
||
| 20 | if coords is None: |
||
| 21 | continue |
||
| 22 | points = coords.attrib['points'] |
||
| 23 | xywh = xywh_from_points(points) |
||
| 24 | area += xywh['w'] * xywh['h'] |
||
| 25 | return area |
||
| 26 | |||
| 27 | @_export |
||
| 28 | def pc_textequiv(nodes): |
||
| 29 | """ |
||
| 30 | Extract TextEquiv/Unicode from all nodes, then concatenate |
||
| 31 | (interspersed with spaces or newlines). |
||
| 32 | """ |
||
| 33 | text = '' |
||
| 34 | for node in nodes: |
||
| 35 | # FIXME: find out why we need to go to the parent here |
||
| 36 | node = node.parent.value |
||
| 37 | if text and node.tag.endswith('Region'): |
||
| 38 | text += '\n' |
||
| 39 | if text and node.tag.endswith('Line'): |
||
| 40 | text += '\n' |
||
| 41 | if text and node.tag.endswith('Word'): |
||
| 42 | text += ' ' |
||
| 43 | equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap) |
||
| 44 | if equiv is None: |
||
| 45 | continue |
||
| 46 | string = equiv.find(f'{node.prefix}:Unicode', node.nsmap) |
||
| 47 | if string is None: |
||
| 48 | continue |
||
| 49 | text += str(string.text) |
||
| 50 | return text |
||
| 51 | |||
| 52 |