|
@@ 15120-15424 (lines=305) @@
|
| 15117 |
|
# end class GraphicRegionType |
| 15118 |
|
|
| 15119 |
|
|
| 15120 |
|
class LineDrawingRegionType(RegionType): |
| 15121 |
|
"""LineDrawingRegionType -- |
| 15122 |
|
A line drawing is a single colour illustration without |
| 15123 |
|
solid areas. |
| 15124 |
|
|
| 15125 |
|
* orientation -- |
| 15126 |
|
The angle the rectangle encapsulating a region |
| 15127 |
|
has to be rotated in clockwise direction |
| 15128 |
|
in order to correct the present skew |
| 15129 |
|
(negative values indicate anti-clockwise rotation). |
| 15130 |
|
Range: -179.999,180 |
| 15131 |
|
|
| 15132 |
|
* penColour -- |
| 15133 |
|
The pen (foreground) colour of the region |
| 15134 |
|
|
| 15135 |
|
* bgColour -- |
| 15136 |
|
The background colour of the region |
| 15137 |
|
|
| 15138 |
|
* embText -- |
| 15139 |
|
Specifies whether the region also contains |
| 15140 |
|
text |
| 15141 |
|
|
| 15142 |
|
""" |
| 15143 |
|
__hash__ = GeneratedsSuper.__hash__ |
| 15144 |
|
member_data_items_ = [ |
| 15145 |
|
MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), |
| 15146 |
|
MemberSpec_('penColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'penColour'}), |
| 15147 |
|
MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}), |
| 15148 |
|
MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), |
| 15149 |
|
] |
| 15150 |
|
subclass = None |
| 15151 |
|
superclass = RegionType |
| 15152 |
|
def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, penColour=None, bgColour=None, embText=None, gds_collector_=None, **kwargs_): |
| 15153 |
|
self.gds_collector_ = gds_collector_ |
| 15154 |
|
self.gds_elementtree_node_ = None |
| 15155 |
|
self.original_tagname_ = None |
| 15156 |
|
self.parent_object_ = kwargs_.get('parent_object_') |
| 15157 |
|
self.ns_prefix_ = "pc" |
| 15158 |
|
super(globals().get("LineDrawingRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) |
| 15159 |
|
self.orientation = _cast(float, orientation) |
| 15160 |
|
self.orientation_nsprefix_ = "pc" |
| 15161 |
|
self.penColour = _cast(None, penColour) |
| 15162 |
|
self.penColour_nsprefix_ = "pc" |
| 15163 |
|
self.bgColour = _cast(None, bgColour) |
| 15164 |
|
self.bgColour_nsprefix_ = "pc" |
| 15165 |
|
self.embText = _cast(bool, embText) |
| 15166 |
|
self.embText_nsprefix_ = "pc" |
| 15167 |
|
def factory(*args_, **kwargs_): |
| 15168 |
|
if CurrentSubclassModule_ is not None: |
| 15169 |
|
subclass = getSubclassFromModule_( |
| 15170 |
|
CurrentSubclassModule_, LineDrawingRegionType) |
| 15171 |
|
if subclass is not None: |
| 15172 |
|
return subclass(*args_, **kwargs_) |
| 15173 |
|
if LineDrawingRegionType.subclass: |
| 15174 |
|
return LineDrawingRegionType.subclass(*args_, **kwargs_) |
| 15175 |
|
else: |
| 15176 |
|
return LineDrawingRegionType(*args_, **kwargs_) |
| 15177 |
|
factory = staticmethod(factory) |
| 15178 |
|
def get_ns_prefix_(self): |
| 15179 |
|
return self.ns_prefix_ |
| 15180 |
|
def set_ns_prefix_(self, ns_prefix): |
| 15181 |
|
self.ns_prefix_ = ns_prefix |
| 15182 |
|
def get_orientation(self): |
| 15183 |
|
return self.orientation |
| 15184 |
|
def set_orientation(self, orientation): |
| 15185 |
|
self.orientation = orientation |
| 15186 |
|
def get_penColour(self): |
| 15187 |
|
return self.penColour |
| 15188 |
|
def set_penColour(self, penColour): |
| 15189 |
|
self.penColour = penColour |
| 15190 |
|
def get_bgColour(self): |
| 15191 |
|
return self.bgColour |
| 15192 |
|
def set_bgColour(self, bgColour): |
| 15193 |
|
self.bgColour = bgColour |
| 15194 |
|
def get_embText(self): |
| 15195 |
|
return self.embText |
| 15196 |
|
def set_embText(self, embText): |
| 15197 |
|
self.embText = embText |
| 15198 |
|
def validate_ColourSimpleType(self, value): |
| 15199 |
|
# Validate type pc:ColourSimpleType, a restriction on string. |
| 15200 |
|
if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: |
| 15201 |
|
if not isinstance(value, str): |
| 15202 |
|
lineno = self.gds_get_node_lineno_() |
| 15203 |
|
self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) |
| 15204 |
|
return False |
| 15205 |
|
value = value |
| 15206 |
|
enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other'] |
| 15207 |
|
if value not in enumerations: |
| 15208 |
|
lineno = self.gds_get_node_lineno_() |
| 15209 |
|
self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) |
| 15210 |
|
result = False |
| 15211 |
|
def has__content(self): |
| 15212 |
|
if ( |
| 15213 |
|
super(LineDrawingRegionType, self).has__content() |
| 15214 |
|
): |
| 15215 |
|
return True |
| 15216 |
|
else: |
| 15217 |
|
return False |
| 15218 |
|
def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', pretty_print=True): |
| 15219 |
|
imported_ns_def_ = GenerateDSNamespaceDefs_.get('LineDrawingRegionType') |
| 15220 |
|
if imported_ns_def_ is not None: |
| 15221 |
|
namespacedef_ = imported_ns_def_ |
| 15222 |
|
if pretty_print: |
| 15223 |
|
eol_ = '\n' |
| 15224 |
|
else: |
| 15225 |
|
eol_ = '' |
| 15226 |
|
if self.original_tagname_ is not None and name_ == 'LineDrawingRegionType': |
| 15227 |
|
name_ = self.original_tagname_ |
| 15228 |
|
if UseCapturedNS_ and self.ns_prefix_: |
| 15229 |
|
namespaceprefix_ = self.ns_prefix_ + ':' |
| 15230 |
|
showIndent(outfile, level, pretty_print) |
| 15231 |
|
outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) |
| 15232 |
|
already_processed = set() |
| 15233 |
|
self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') |
| 15234 |
|
if self.has__content(): |
| 15235 |
|
outfile.write('>%s' % (eol_, )) |
| 15236 |
|
self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LineDrawingRegionType', pretty_print=pretty_print) |
| 15237 |
|
showIndent(outfile, level, pretty_print) |
| 15238 |
|
outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) |
| 15239 |
|
else: |
| 15240 |
|
outfile.write('/>%s' % (eol_, )) |
| 15241 |
|
def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LineDrawingRegionType'): |
| 15242 |
|
super(LineDrawingRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType') |
| 15243 |
|
if self.orientation is not None and 'orientation' not in already_processed: |
| 15244 |
|
already_processed.add('orientation') |
| 15245 |
|
outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) |
| 15246 |
|
if self.penColour is not None and 'penColour' not in already_processed: |
| 15247 |
|
already_processed.add('penColour') |
| 15248 |
|
outfile.write(' penColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.penColour), input_name='penColour')), )) |
| 15249 |
|
if self.bgColour is not None and 'bgColour' not in already_processed: |
| 15250 |
|
already_processed.add('bgColour') |
| 15251 |
|
outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), )) |
| 15252 |
|
if self.embText is not None and 'embText' not in already_processed: |
| 15253 |
|
already_processed.add('embText') |
| 15254 |
|
outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) |
| 15255 |
|
def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', fromsubclass_=False, pretty_print=True): |
| 15256 |
|
super(LineDrawingRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) |
| 15257 |
|
def to_etree(self, parent_element=None, name_='LineDrawingRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): |
| 15258 |
|
element = super(LineDrawingRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) |
| 15259 |
|
if self.orientation is not None: |
| 15260 |
|
element.set('orientation', self.gds_format_float(self.orientation)) |
| 15261 |
|
if self.penColour is not None: |
| 15262 |
|
element.set('penColour', self.gds_format_string(self.penColour)) |
| 15263 |
|
if self.bgColour is not None: |
| 15264 |
|
element.set('bgColour', self.gds_format_string(self.bgColour)) |
| 15265 |
|
if self.embText is not None: |
| 15266 |
|
element.set('embText', self.gds_format_boolean(self.embText)) |
| 15267 |
|
if mapping_ is not None: |
| 15268 |
|
mapping_[id(self)] = element |
| 15269 |
|
if reverse_mapping_ is not None: |
| 15270 |
|
reverse_mapping_[element] = self |
| 15271 |
|
return element |
| 15272 |
|
def build(self, node, gds_collector_=None): |
| 15273 |
|
self.gds_collector_ = gds_collector_ |
| 15274 |
|
if SaveElementTreeNode: |
| 15275 |
|
self.gds_elementtree_node_ = node |
| 15276 |
|
already_processed = set() |
| 15277 |
|
self.ns_prefix_ = node.prefix |
| 15278 |
|
self._buildAttributes(node, node.attrib, already_processed) |
| 15279 |
|
for child in node: |
| 15280 |
|
nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] |
| 15281 |
|
self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) |
| 15282 |
|
return self |
| 15283 |
|
def _buildAttributes(self, node, attrs, already_processed): |
| 15284 |
|
value = find_attr_value_('orientation', node) |
| 15285 |
|
if value is not None and 'orientation' not in already_processed: |
| 15286 |
|
already_processed.add('orientation') |
| 15287 |
|
value = self.gds_parse_float(value, node, 'orientation') |
| 15288 |
|
self.orientation = value |
| 15289 |
|
value = find_attr_value_('penColour', node) |
| 15290 |
|
if value is not None and 'penColour' not in already_processed: |
| 15291 |
|
already_processed.add('penColour') |
| 15292 |
|
self.penColour = value |
| 15293 |
|
self.validate_ColourSimpleType(self.penColour) # validate type ColourSimpleType |
| 15294 |
|
value = find_attr_value_('bgColour', node) |
| 15295 |
|
if value is not None and 'bgColour' not in already_processed: |
| 15296 |
|
already_processed.add('bgColour') |
| 15297 |
|
self.bgColour = value |
| 15298 |
|
self.validate_ColourSimpleType(self.bgColour) # validate type ColourSimpleType |
| 15299 |
|
value = find_attr_value_('embText', node) |
| 15300 |
|
if value is not None and 'embText' not in already_processed: |
| 15301 |
|
already_processed.add('embText') |
| 15302 |
|
if value in ('true', '1'): |
| 15303 |
|
self.embText = True |
| 15304 |
|
elif value in ('false', '0'): |
| 15305 |
|
self.embText = False |
| 15306 |
|
else: |
| 15307 |
|
raise_parse_error(node, 'Bad boolean attribute') |
| 15308 |
|
super(LineDrawingRegionType, self)._buildAttributes(node, attrs, already_processed) |
| 15309 |
|
def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): |
| 15310 |
|
super(LineDrawingRegionType, self)._buildChildren(child_, node, nodeName_, True) |
| 15311 |
|
pass |
| 15312 |
|
def __hash__(self): |
| 15313 |
|
return hash(self.id) |
| 15314 |
|
# pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring |
| 15315 |
|
def _region_class(self, x): # pylint: disable=unused-argument |
| 15316 |
|
return x.__class__.__name__.replace('RegionType', '') |
| 15317 |
|
|
| 15318 |
|
def _get_recursive_regions(self, regions, level, classes=None): |
| 15319 |
|
from .constants import PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel |
| 15320 |
|
if level == 1: |
| 15321 |
|
# stop recursion, filter classes |
| 15322 |
|
if classes: |
| 15323 |
|
return [r for r in regions if self._region_class(r) in classes] |
| 15324 |
|
if regions and regions[0].__class__.__name__ == 'PageType': |
| 15325 |
|
regions = regions[1:] |
| 15326 |
|
return regions |
| 15327 |
|
# find more regions recursively |
| 15328 |
|
more_regions = [] |
| 15329 |
|
for region in regions: |
| 15330 |
|
more_regions.append([]) |
| 15331 |
|
for class_ in PAGE_REGION_TYPES: |
| 15332 |
|
if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable |
| 15333 |
|
# 'Map' is not recursive in 2019 schema |
| 15334 |
|
continue |
| 15335 |
|
more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))() |
| 15336 |
|
if not any(more_regions): |
| 15337 |
|
return self._get_recursive_regions(regions, 1, classes) |
| 15338 |
|
ret = [] |
| 15339 |
|
for r, more in zip(regions, more_regions): |
| 15340 |
|
ret.append(r) |
| 15341 |
|
ret += self._get_recursive_regions(more, level - 1 if level else 0, classes) |
| 15342 |
|
return self._get_recursive_regions(ret, 1, classes) |
| 15343 |
|
|
| 15344 |
|
def _get_recursive_reading_order(self, rogroup): |
| 15345 |
|
if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable |
| 15346 |
|
elements = rogroup.get_AllIndexed() |
| 15347 |
|
if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable |
| 15348 |
|
elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup()) |
| 15349 |
|
regionrefs = list() |
| 15350 |
|
for elem in elements: |
| 15351 |
|
regionrefs.append(elem.get_regionRef()) |
| 15352 |
|
if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable |
| 15353 |
|
regionrefs.extend(self._get_recursive_reading_order(elem)) |
| 15354 |
|
return regionrefs |
| 15355 |
|
|
| 15356 |
|
def get_AllRegions(self, classes=None, order='document', depth=0): |
| 15357 |
|
""" |
| 15358 |
|
Get all the ``*Region`` elements, or only those provided by `classes`. |
| 15359 |
|
Return in document order, unless the top element is ``Page`` and |
| 15360 |
|
`order` is ``reading-order``. |
| 15361 |
|
|
| 15362 |
|
Arguments: |
| 15363 |
|
classes (list): Classes of regions that shall be returned, \ |
| 15364 |
|
e.g. ``['Text', 'Image']`` |
| 15365 |
|
order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
| 15366 |
|
return regions sorted by document order (``document``, default) or by |
| 15367 |
|
reading order with regions not in the reading order at the end of the |
| 15368 |
|
returned list (``reading-order``) or regions not in the reading order |
| 15369 |
|
omitted (``reading-order-only``). The latter two are only available |
| 15370 |
|
on page level. |
| 15371 |
|
depth (int): Recursive depth to look for regions at, set to `0` for \ |
| 15372 |
|
all regions at any depth. Default: 0 |
| 15373 |
|
|
| 15374 |
|
Returns: |
| 15375 |
|
a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \ |
| 15376 |
|
:py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \ |
| 15377 |
|
:py:class:`TableRegionType`, :py:class:`ChartRegionType`, \ |
| 15378 |
|
:py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \ |
| 15379 |
|
:py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \ |
| 15380 |
|
:py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \ |
| 15381 |
|
:py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \ |
| 15382 |
|
and/or :py:class:`CustomRegionType` |
| 15383 |
|
|
| 15384 |
|
For example, to get all text anywhere on the page in reading order, use: |
| 15385 |
|
:: |
| 15386 |
|
'\\n'.join(line.get_TextEquiv()[0].Unicode |
| 15387 |
|
for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order') |
| 15388 |
|
for line in region.get_TextLine()) |
| 15389 |
|
""" |
| 15390 |
|
if order not in ['document', 'reading-order', 'reading-order-only']: |
| 15391 |
|
raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order)) |
| 15392 |
|
if depth < 0: |
| 15393 |
|
raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth)) |
| 15394 |
|
ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes) |
| 15395 |
|
if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'): |
| 15396 |
|
reading_order = self.get_ReadingOrder() |
| 15397 |
|
if reading_order: |
| 15398 |
|
reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup() |
| 15399 |
|
if reading_order: |
| 15400 |
|
reading_order = self._get_recursive_reading_order(reading_order) |
| 15401 |
|
if reading_order: |
| 15402 |
|
id2region = {region.id: region for region in ret} |
| 15403 |
|
in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region] |
| 15404 |
|
# print("ret: {} / in_ro: {} / not-in-ro: {}".format( |
| 15405 |
|
# len(ret), |
| 15406 |
|
# len([id2region[region_id] for region_id in reading_order if region_id in id2region]), |
| 15407 |
|
# len([r for r in ret if r not in in_reading_order]) |
| 15408 |
|
# )) |
| 15409 |
|
if order == 'reading-order-only': |
| 15410 |
|
ret = in_reading_order |
| 15411 |
|
else: |
| 15412 |
|
ret = in_reading_order + [r for r in ret if r not in in_reading_order] |
| 15413 |
|
return ret |
| 15414 |
|
def set_orientation(self, orientation): |
| 15415 |
|
""" |
| 15416 |
|
Set deskewing angle to given `orientation` number. |
| 15417 |
|
Moreover, invalidate self's ``pc:AlternativeImage``s |
| 15418 |
|
(because they will have been rotated and enlarged |
| 15419 |
|
with the angle of the previous value). |
| 15420 |
|
""" |
| 15421 |
|
if hasattr(self, 'invalidate_AlternativeImage'): |
| 15422 |
|
# PageType, RegionType: |
| 15423 |
|
self.invalidate_AlternativeImage(feature_selector='deskewed') |
| 15424 |
|
self.orientation = orientation |
| 15425 |
|
# end class LineDrawingRegionType |
| 15426 |
|
|
| 15427 |
|
|
|
@@ 14812-15116 (lines=305) @@
|
| 14809 |
|
# end class TableRegionType |
| 14810 |
|
|
| 14811 |
|
|
| 14812 |
|
class GraphicRegionType(RegionType): |
| 14813 |
|
"""GraphicRegionType -- |
| 14814 |
|
Regions containing simple graphics, such as a company |
| 14815 |
|
logo, should be marked as graphic regions. |
| 14816 |
|
|
| 14817 |
|
* orientation -- |
| 14818 |
|
The angle the rectangle encapsulating a region |
| 14819 |
|
has to be rotated in clockwise direction |
| 14820 |
|
in order to correct the present skew |
| 14821 |
|
(negative values indicate anti-clockwise rotation). |
| 14822 |
|
Range: -179.999,180 |
| 14823 |
|
|
| 14824 |
|
* type -- |
| 14825 |
|
The type of graphic in the region |
| 14826 |
|
|
| 14827 |
|
* numColours -- |
| 14828 |
|
An approximation of the number of colours |
| 14829 |
|
used in the region |
| 14830 |
|
|
| 14831 |
|
* embText -- |
| 14832 |
|
Specifies whether the region also contains |
| 14833 |
|
text. |
| 14834 |
|
|
| 14835 |
|
""" |
| 14836 |
|
__hash__ = GeneratedsSuper.__hash__ |
| 14837 |
|
member_data_items_ = [ |
| 14838 |
|
MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}), |
| 14839 |
|
MemberSpec_('type_', 'pc:GraphicsTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}), |
| 14840 |
|
MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional', 'name': 'numColours'}), |
| 14841 |
|
MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}), |
| 14842 |
|
] |
| 14843 |
|
subclass = None |
| 14844 |
|
superclass = RegionType |
| 14845 |
|
def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, type_=None, numColours=None, embText=None, gds_collector_=None, **kwargs_): |
| 14846 |
|
self.gds_collector_ = gds_collector_ |
| 14847 |
|
self.gds_elementtree_node_ = None |
| 14848 |
|
self.original_tagname_ = None |
| 14849 |
|
self.parent_object_ = kwargs_.get('parent_object_') |
| 14850 |
|
self.ns_prefix_ = "pc" |
| 14851 |
|
super(globals().get("GraphicRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion, **kwargs_) |
| 14852 |
|
self.orientation = _cast(float, orientation) |
| 14853 |
|
self.orientation_nsprefix_ = "pc" |
| 14854 |
|
self.type_ = _cast(None, type_) |
| 14855 |
|
self.type__nsprefix_ = "pc" |
| 14856 |
|
self.numColours = _cast(int, numColours) |
| 14857 |
|
self.numColours_nsprefix_ = "pc" |
| 14858 |
|
self.embText = _cast(bool, embText) |
| 14859 |
|
self.embText_nsprefix_ = "pc" |
| 14860 |
|
def factory(*args_, **kwargs_): |
| 14861 |
|
if CurrentSubclassModule_ is not None: |
| 14862 |
|
subclass = getSubclassFromModule_( |
| 14863 |
|
CurrentSubclassModule_, GraphicRegionType) |
| 14864 |
|
if subclass is not None: |
| 14865 |
|
return subclass(*args_, **kwargs_) |
| 14866 |
|
if GraphicRegionType.subclass: |
| 14867 |
|
return GraphicRegionType.subclass(*args_, **kwargs_) |
| 14868 |
|
else: |
| 14869 |
|
return GraphicRegionType(*args_, **kwargs_) |
| 14870 |
|
factory = staticmethod(factory) |
| 14871 |
|
def get_ns_prefix_(self): |
| 14872 |
|
return self.ns_prefix_ |
| 14873 |
|
def set_ns_prefix_(self, ns_prefix): |
| 14874 |
|
self.ns_prefix_ = ns_prefix |
| 14875 |
|
def get_orientation(self): |
| 14876 |
|
return self.orientation |
| 14877 |
|
def set_orientation(self, orientation): |
| 14878 |
|
self.orientation = orientation |
| 14879 |
|
def get_type(self): |
| 14880 |
|
return self.type_ |
| 14881 |
|
def set_type(self, type_): |
| 14882 |
|
self.type_ = type_ |
| 14883 |
|
def get_numColours(self): |
| 14884 |
|
return self.numColours |
| 14885 |
|
def set_numColours(self, numColours): |
| 14886 |
|
self.numColours = numColours |
| 14887 |
|
def get_embText(self): |
| 14888 |
|
return self.embText |
| 14889 |
|
def set_embText(self, embText): |
| 14890 |
|
self.embText = embText |
| 14891 |
|
def validate_GraphicsTypeSimpleType(self, value): |
| 14892 |
|
# Validate type pc:GraphicsTypeSimpleType, a restriction on string. |
| 14893 |
|
if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None: |
| 14894 |
|
if not isinstance(value, str): |
| 14895 |
|
lineno = self.gds_get_node_lineno_() |
| 14896 |
|
self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, }) |
| 14897 |
|
return False |
| 14898 |
|
value = value |
| 14899 |
|
enumerations = ['logo', 'letterhead', 'decoration', 'frame', 'handwritten-annotation', 'stamp', 'signature', 'barcode', 'paper-grow', 'punch-hole', 'other'] |
| 14900 |
|
if value not in enumerations: |
| 14901 |
|
lineno = self.gds_get_node_lineno_() |
| 14902 |
|
self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GraphicsTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} ) |
| 14903 |
|
result = False |
| 14904 |
|
def has__content(self): |
| 14905 |
|
if ( |
| 14906 |
|
super(GraphicRegionType, self).has__content() |
| 14907 |
|
): |
| 14908 |
|
return True |
| 14909 |
|
else: |
| 14910 |
|
return False |
| 14911 |
|
def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', pretty_print=True): |
| 14912 |
|
imported_ns_def_ = GenerateDSNamespaceDefs_.get('GraphicRegionType') |
| 14913 |
|
if imported_ns_def_ is not None: |
| 14914 |
|
namespacedef_ = imported_ns_def_ |
| 14915 |
|
if pretty_print: |
| 14916 |
|
eol_ = '\n' |
| 14917 |
|
else: |
| 14918 |
|
eol_ = '' |
| 14919 |
|
if self.original_tagname_ is not None and name_ == 'GraphicRegionType': |
| 14920 |
|
name_ = self.original_tagname_ |
| 14921 |
|
if UseCapturedNS_ and self.ns_prefix_: |
| 14922 |
|
namespaceprefix_ = self.ns_prefix_ + ':' |
| 14923 |
|
showIndent(outfile, level, pretty_print) |
| 14924 |
|
outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', )) |
| 14925 |
|
already_processed = set() |
| 14926 |
|
self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') |
| 14927 |
|
if self.has__content(): |
| 14928 |
|
outfile.write('>%s' % (eol_, )) |
| 14929 |
|
self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphicRegionType', pretty_print=pretty_print) |
| 14930 |
|
showIndent(outfile, level, pretty_print) |
| 14931 |
|
outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_)) |
| 14932 |
|
else: |
| 14933 |
|
outfile.write('/>%s' % (eol_, )) |
| 14934 |
|
def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphicRegionType'): |
| 14935 |
|
super(GraphicRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType') |
| 14936 |
|
if self.orientation is not None and 'orientation' not in already_processed: |
| 14937 |
|
already_processed.add('orientation') |
| 14938 |
|
outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation')) |
| 14939 |
|
if self.type_ is not None and 'type_' not in already_processed: |
| 14940 |
|
already_processed.add('type_') |
| 14941 |
|
outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), )) |
| 14942 |
|
if self.numColours is not None and 'numColours' not in already_processed: |
| 14943 |
|
already_processed.add('numColours') |
| 14944 |
|
outfile.write(' numColours="%s"' % self.gds_format_integer(self.numColours, input_name='numColours')) |
| 14945 |
|
if self.embText is not None and 'embText' not in already_processed: |
| 14946 |
|
already_processed.add('embText') |
| 14947 |
|
outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText')) |
| 14948 |
|
def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', fromsubclass_=False, pretty_print=True): |
| 14949 |
|
super(GraphicRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print) |
| 14950 |
|
def to_etree(self, parent_element=None, name_='GraphicRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None): |
| 14951 |
|
element = super(GraphicRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_) |
| 14952 |
|
if self.orientation is not None: |
| 14953 |
|
element.set('orientation', self.gds_format_float(self.orientation)) |
| 14954 |
|
if self.type_ is not None: |
| 14955 |
|
element.set('type', self.gds_format_string(self.type_)) |
| 14956 |
|
if self.numColours is not None: |
| 14957 |
|
element.set('numColours', self.gds_format_integer(self.numColours)) |
| 14958 |
|
if self.embText is not None: |
| 14959 |
|
element.set('embText', self.gds_format_boolean(self.embText)) |
| 14960 |
|
if mapping_ is not None: |
| 14961 |
|
mapping_[id(self)] = element |
| 14962 |
|
if reverse_mapping_ is not None: |
| 14963 |
|
reverse_mapping_[element] = self |
| 14964 |
|
return element |
| 14965 |
|
def build(self, node, gds_collector_=None): |
| 14966 |
|
self.gds_collector_ = gds_collector_ |
| 14967 |
|
if SaveElementTreeNode: |
| 14968 |
|
self.gds_elementtree_node_ = node |
| 14969 |
|
already_processed = set() |
| 14970 |
|
self.ns_prefix_ = node.prefix |
| 14971 |
|
self._buildAttributes(node, node.attrib, already_processed) |
| 14972 |
|
for child in node: |
| 14973 |
|
nodeName_ = Tag_pattern_.match(child.tag).groups()[-1] |
| 14974 |
|
self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_) |
| 14975 |
|
return self |
| 14976 |
|
def _buildAttributes(self, node, attrs, already_processed): |
| 14977 |
|
value = find_attr_value_('orientation', node) |
| 14978 |
|
if value is not None and 'orientation' not in already_processed: |
| 14979 |
|
already_processed.add('orientation') |
| 14980 |
|
value = self.gds_parse_float(value, node, 'orientation') |
| 14981 |
|
self.orientation = value |
| 14982 |
|
value = find_attr_value_('type', node) |
| 14983 |
|
if value is not None and 'type' not in already_processed: |
| 14984 |
|
already_processed.add('type') |
| 14985 |
|
self.type_ = value |
| 14986 |
|
self.validate_GraphicsTypeSimpleType(self.type_) # validate type GraphicsTypeSimpleType |
| 14987 |
|
value = find_attr_value_('numColours', node) |
| 14988 |
|
if value is not None and 'numColours' not in already_processed: |
| 14989 |
|
already_processed.add('numColours') |
| 14990 |
|
self.numColours = self.gds_parse_integer(value, node, 'numColours') |
| 14991 |
|
value = find_attr_value_('embText', node) |
| 14992 |
|
if value is not None and 'embText' not in already_processed: |
| 14993 |
|
already_processed.add('embText') |
| 14994 |
|
if value in ('true', '1'): |
| 14995 |
|
self.embText = True |
| 14996 |
|
elif value in ('false', '0'): |
| 14997 |
|
self.embText = False |
| 14998 |
|
else: |
| 14999 |
|
raise_parse_error(node, 'Bad boolean attribute') |
| 15000 |
|
super(GraphicRegionType, self)._buildAttributes(node, attrs, already_processed) |
| 15001 |
|
def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None): |
| 15002 |
|
super(GraphicRegionType, self)._buildChildren(child_, node, nodeName_, True) |
| 15003 |
|
pass |
| 15004 |
|
def __hash__(self): |
| 15005 |
|
return hash(self.id) |
| 15006 |
|
# pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring |
| 15007 |
|
def _region_class(self, x): # pylint: disable=unused-argument |
| 15008 |
|
return x.__class__.__name__.replace('RegionType', '') |
| 15009 |
|
|
| 15010 |
|
def _get_recursive_regions(self, regions, level, classes=None): |
| 15011 |
|
from .constants import PAGE_REGION_TYPES # pylint: disable=relative-beyond-top-level,import-outside-toplevel |
| 15012 |
|
if level == 1: |
| 15013 |
|
# stop recursion, filter classes |
| 15014 |
|
if classes: |
| 15015 |
|
return [r for r in regions if self._region_class(r) in classes] |
| 15016 |
|
if regions and regions[0].__class__.__name__ == 'PageType': |
| 15017 |
|
regions = regions[1:] |
| 15018 |
|
return regions |
| 15019 |
|
# find more regions recursively |
| 15020 |
|
more_regions = [] |
| 15021 |
|
for region in regions: |
| 15022 |
|
more_regions.append([]) |
| 15023 |
|
for class_ in PAGE_REGION_TYPES: |
| 15024 |
|
if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable |
| 15025 |
|
# 'Map' is not recursive in 2019 schema |
| 15026 |
|
continue |
| 15027 |
|
more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))() |
| 15028 |
|
if not any(more_regions): |
| 15029 |
|
return self._get_recursive_regions(regions, 1, classes) |
| 15030 |
|
ret = [] |
| 15031 |
|
for r, more in zip(regions, more_regions): |
| 15032 |
|
ret.append(r) |
| 15033 |
|
ret += self._get_recursive_regions(more, level - 1 if level else 0, classes) |
| 15034 |
|
return self._get_recursive_regions(ret, 1, classes) |
| 15035 |
|
|
| 15036 |
|
def _get_recursive_reading_order(self, rogroup): |
| 15037 |
|
if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable |
| 15038 |
|
elements = rogroup.get_AllIndexed() |
| 15039 |
|
if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable |
| 15040 |
|
elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup()) |
| 15041 |
|
regionrefs = list() |
| 15042 |
|
for elem in elements: |
| 15043 |
|
regionrefs.append(elem.get_regionRef()) |
| 15044 |
|
if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable |
| 15045 |
|
regionrefs.extend(self._get_recursive_reading_order(elem)) |
| 15046 |
|
return regionrefs |
| 15047 |
|
|
| 15048 |
|
def get_AllRegions(self, classes=None, order='document', depth=0): |
| 15049 |
|
""" |
| 15050 |
|
Get all the ``*Region`` elements, or only those provided by `classes`. |
| 15051 |
|
Return in document order, unless the top element is ``Page`` and |
| 15052 |
|
`order` is ``reading-order``. |
| 15053 |
|
|
| 15054 |
|
Arguments: |
| 15055 |
|
classes (list): Classes of regions that shall be returned, \ |
| 15056 |
|
e.g. ``['Text', 'Image']`` |
| 15057 |
|
order ("document"|"reading-order"|"reading-order-only"): Whether to \ |
| 15058 |
|
return regions sorted by document order (``document``, default) or by |
| 15059 |
|
reading order with regions not in the reading order at the end of the |
| 15060 |
|
returned list (``reading-order``) or regions not in the reading order |
| 15061 |
|
omitted (``reading-order-only``). The latter two are only available |
| 15062 |
|
on page level. |
| 15063 |
|
depth (int): Recursive depth to look for regions at, set to `0` for \ |
| 15064 |
|
all regions at any depth. Default: 0 |
| 15065 |
|
|
| 15066 |
|
Returns: |
| 15067 |
|
a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \ |
| 15068 |
|
:py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \ |
| 15069 |
|
:py:class:`TableRegionType`, :py:class:`ChartRegionType`, \ |
| 15070 |
|
:py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \ |
| 15071 |
|
:py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \ |
| 15072 |
|
:py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \ |
| 15073 |
|
:py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \ |
| 15074 |
|
and/or :py:class:`CustomRegionType` |
| 15075 |
|
|
| 15076 |
|
For example, to get all text anywhere on the page in reading order, use: |
| 15077 |
|
:: |
| 15078 |
|
'\\n'.join(line.get_TextEquiv()[0].Unicode |
| 15079 |
|
for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order') |
| 15080 |
|
for line in region.get_TextLine()) |
| 15081 |
|
""" |
| 15082 |
|
if order not in ['document', 'reading-order', 'reading-order-only']: |
| 15083 |
|
raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order)) |
| 15084 |
|
if depth < 0: |
| 15085 |
|
raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth)) |
| 15086 |
|
ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes) |
| 15087 |
|
if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'): |
| 15088 |
|
reading_order = self.get_ReadingOrder() |
| 15089 |
|
if reading_order: |
| 15090 |
|
reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup() |
| 15091 |
|
if reading_order: |
| 15092 |
|
reading_order = self._get_recursive_reading_order(reading_order) |
| 15093 |
|
if reading_order: |
| 15094 |
|
id2region = {region.id: region for region in ret} |
| 15095 |
|
in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region] |
| 15096 |
|
# print("ret: {} / in_ro: {} / not-in-ro: {}".format( |
| 15097 |
|
# len(ret), |
| 15098 |
|
# len([id2region[region_id] for region_id in reading_order if region_id in id2region]), |
| 15099 |
|
# len([r for r in ret if r not in in_reading_order]) |
| 15100 |
|
# )) |
| 15101 |
|
if order == 'reading-order-only': |
| 15102 |
|
ret = in_reading_order |
| 15103 |
|
else: |
| 15104 |
|
ret = in_reading_order + [r for r in ret if r not in in_reading_order] |
| 15105 |
|
return ret |
| 15106 |
|
def set_orientation(self, orientation): |
| 15107 |
|
""" |
| 15108 |
|
Set deskewing angle to given `orientation` number. |
| 15109 |
|
Moreover, invalidate self's ``pc:AlternativeImage``s |
| 15110 |
|
(because they will have been rotated and enlarged |
| 15111 |
|
with the angle of the previous value). |
| 15112 |
|
""" |
| 15113 |
|
if hasattr(self, 'invalidate_AlternativeImage'): |
| 15114 |
|
# PageType, RegionType: |
| 15115 |
|
self.invalidate_AlternativeImage(feature_selector='deskewed') |
| 15116 |
|
self.orientation = orientation |
| 15117 |
|
# end class GraphicRegionType |
| 15118 |
|
|
| 15119 |
|
|