| Total Complexity | 14 | 
| Total Lines | 51 | 
| Duplicated Lines | 0 % | 
| Changes | 0 | ||
| 1 | from ocrd_utils import xywh_from_points | ||
| 2 | |||
| 3 | pc_functions = [] | ||
| 4 | |||
| 5 | def _export(func): | ||
| 6 | pc_functions.append(func) | ||
| 7 | return func | ||
| 8 | |||
| 9 | @_export | ||
| 10 | def pc_pixelarea(nodes): | ||
| 11 | """ | ||
| 12 | Extract Coords/@points from all nodes, calculate the bounding | ||
| 13 | box, and accumulate areas. | ||
| 14 | """ | ||
| 15 | area = 0 | ||
| 16 | for node in nodes: | ||
| 17 | # FIXME: find out why we need to go to the parent here | ||
| 18 | node = node.parent.value | ||
| 19 |         coords = node.find(f'{node.prefix}:Coords', node.nsmap) | ||
| 20 | if coords is None: | ||
| 21 | continue | ||
| 22 | points = coords.attrib['points'] | ||
| 23 | xywh = xywh_from_points(points) | ||
| 24 | area += xywh['w'] * xywh['h'] | ||
| 25 | return area | ||
| 26 | |||
| 27 | @_export | ||
| 28 | def pc_textequiv(nodes): | ||
| 29 | """ | ||
| 30 | Extract TextEquiv/Unicode from all nodes, then concatenate | ||
| 31 | (interspersed with spaces or newlines). | ||
| 32 | """ | ||
| 33 | text = '' | ||
| 34 | for node in nodes: | ||
| 35 | # FIXME: find out why we need to go to the parent here | ||
| 36 | node = node.parent.value | ||
| 37 |         if text and node.tag.endswith('Region'): | ||
| 38 | text += '\n' | ||
| 39 |         if text and node.tag.endswith('Line'): | ||
| 40 | text += '\n' | ||
| 41 |         if text and node.tag.endswith('Word'): | ||
| 42 | text += ' ' | ||
| 43 |         equiv = node.find(f'{node.prefix}:TextEquiv', node.nsmap) | ||
| 44 | if equiv is None: | ||
| 45 | continue | ||
| 46 |         string = equiv.find(f'{node.prefix}:Unicode', node.nsmap) | ||
| 47 | if string is None: | ||
| 48 | continue | ||
| 49 | text += str(string.text) | ||
| 50 | return text | ||
| 51 | |||
| 52 |