Code Duplication    Length = 305-305 lines in 2 locations

src/ocrd_models/ocrd_page_generateds.py 2 locations

@@ 15120-15424 (lines=305) @@
15117
# end class GraphicRegionType
15118
15119
15120
class LineDrawingRegionType(RegionType):
15121
    """LineDrawingRegionType --
15122
    A line drawing is a single colour illustration without
15123
    solid areas.
15124
      
15125
    * orientation --
15126
      The angle the rectangle encapsulating a region
15127
      has to be rotated in clockwise direction
15128
      in order to correct the present skew
15129
      (negative values indicate anti-clockwise rotation).
15130
      Range: -179.999,180
15131
      
15132
    * penColour --
15133
      The pen (foreground) colour of the region
15134
      
15135
    * bgColour --
15136
      The background colour of the region
15137
      
15138
    * embText --
15139
      Specifies whether the region also contains
15140
      text
15141
    
15142
    """
15143
    __hash__ = GeneratedsSuper.__hash__
15144
    member_data_items_ = [
15145
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
15146
        MemberSpec_('penColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'penColour'}),
15147
        MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}),
15148
        MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}),
15149
    ]
15150
    subclass = None
15151
    superclass = RegionType
15152
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, penColour=None, bgColour=None, embText=None, gds_collector_=None, **kwargs_):
15153
        self.gds_collector_ = gds_collector_
15154
        self.gds_elementtree_node_ = None
15155
        self.original_tagname_ = None
15156
        self.parent_object_ = kwargs_.get('parent_object_')
15157
        self.ns_prefix_ = "pc"
15158
        super(globals().get("LineDrawingRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
15159
        self.orientation = _cast(float, orientation)
15160
        self.orientation_nsprefix_ = "pc"
15161
        self.penColour = _cast(None, penColour)
15162
        self.penColour_nsprefix_ = "pc"
15163
        self.bgColour = _cast(None, bgColour)
15164
        self.bgColour_nsprefix_ = "pc"
15165
        self.embText = _cast(bool, embText)
15166
        self.embText_nsprefix_ = "pc"
15167
    def factory(*args_, **kwargs_):
15168
        if CurrentSubclassModule_ is not None:
15169
            subclass = getSubclassFromModule_(
15170
                CurrentSubclassModule_, LineDrawingRegionType)
15171
            if subclass is not None:
15172
                return subclass(*args_, **kwargs_)
15173
        if LineDrawingRegionType.subclass:
15174
            return LineDrawingRegionType.subclass(*args_, **kwargs_)
15175
        else:
15176
            return LineDrawingRegionType(*args_, **kwargs_)
15177
    factory = staticmethod(factory)
15178
    def get_ns_prefix_(self):
15179
        return self.ns_prefix_
15180
    def set_ns_prefix_(self, ns_prefix):
15181
        self.ns_prefix_ = ns_prefix
15182
    def get_orientation(self):
15183
        return self.orientation
15184
    def set_orientation(self, orientation):
15185
        self.orientation = orientation
15186
    def get_penColour(self):
15187
        return self.penColour
15188
    def set_penColour(self, penColour):
15189
        self.penColour = penColour
15190
    def get_bgColour(self):
15191
        return self.bgColour
15192
    def set_bgColour(self, bgColour):
15193
        self.bgColour = bgColour
15194
    def get_embText(self):
15195
        return self.embText
15196
    def set_embText(self, embText):
15197
        self.embText = embText
15198
    def validate_ColourSimpleType(self, value):
15199
        # Validate type pc:ColourSimpleType, a restriction on string.
15200
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
15201
            if not isinstance(value, str):
15202
                lineno = self.gds_get_node_lineno_()
15203
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
15204
                return False
15205
            value = value
15206
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
15207
            if value not in enumerations:
15208
                lineno = self.gds_get_node_lineno_()
15209
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
15210
                result = False
15211
    def has__content(self):
15212
        if (
15213
            super(LineDrawingRegionType, self).has__content()
15214
        ):
15215
            return True
15216
        else:
15217
            return False
15218
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', pretty_print=True):
15219
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('LineDrawingRegionType')
15220
        if imported_ns_def_ is not None:
15221
            namespacedef_ = imported_ns_def_
15222
        if pretty_print:
15223
            eol_ = '\n'
15224
        else:
15225
            eol_ = ''
15226
        if self.original_tagname_ is not None and name_ == 'LineDrawingRegionType':
15227
            name_ = self.original_tagname_
15228
        if UseCapturedNS_ and self.ns_prefix_:
15229
            namespaceprefix_ = self.ns_prefix_ + ':'
15230
        showIndent(outfile, level, pretty_print)
15231
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
15232
        already_processed = set()
15233
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType')
15234
        if self.has__content():
15235
            outfile.write('>%s' % (eol_, ))
15236
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='LineDrawingRegionType', pretty_print=pretty_print)
15237
            showIndent(outfile, level, pretty_print)
15238
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
15239
        else:
15240
            outfile.write('/>%s' % (eol_, ))
15241
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='LineDrawingRegionType'):
15242
        super(LineDrawingRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='LineDrawingRegionType')
15243
        if self.orientation is not None and 'orientation' not in already_processed:
15244
            already_processed.add('orientation')
15245
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
15246
        if self.penColour is not None and 'penColour' not in already_processed:
15247
            already_processed.add('penColour')
15248
            outfile.write(' penColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.penColour), input_name='penColour')), ))
15249
        if self.bgColour is not None and 'bgColour' not in already_processed:
15250
            already_processed.add('bgColour')
15251
            outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), ))
15252
        if self.embText is not None and 'embText' not in already_processed:
15253
            already_processed.add('embText')
15254
            outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText'))
15255
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LineDrawingRegionType', fromsubclass_=False, pretty_print=True):
15256
        super(LineDrawingRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
15257
    def to_etree(self, parent_element=None, name_='LineDrawingRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
15258
        element = super(LineDrawingRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
15259
        if self.orientation is not None:
15260
            element.set('orientation', self.gds_format_float(self.orientation))
15261
        if self.penColour is not None:
15262
            element.set('penColour', self.gds_format_string(self.penColour))
15263
        if self.bgColour is not None:
15264
            element.set('bgColour', self.gds_format_string(self.bgColour))
15265
        if self.embText is not None:
15266
            element.set('embText', self.gds_format_boolean(self.embText))
15267
        if mapping_ is not None:
15268
            mapping_[id(self)] = element
15269
        if reverse_mapping_ is not None:
15270
            reverse_mapping_[element] = self
15271
        return element
15272
    def build(self, node, gds_collector_=None):
15273
        self.gds_collector_ = gds_collector_
15274
        if SaveElementTreeNode:
15275
            self.gds_elementtree_node_ = node
15276
        already_processed = set()
15277
        self.ns_prefix_ = node.prefix
15278
        self._buildAttributes(node, node.attrib, already_processed)
15279
        for child in node:
15280
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
15281
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
15282
        return self
15283
    def _buildAttributes(self, node, attrs, already_processed):
15284
        value = find_attr_value_('orientation', node)
15285
        if value is not None and 'orientation' not in already_processed:
15286
            already_processed.add('orientation')
15287
            value = self.gds_parse_float(value, node, 'orientation')
15288
            self.orientation = value
15289
        value = find_attr_value_('penColour', node)
15290
        if value is not None and 'penColour' not in already_processed:
15291
            already_processed.add('penColour')
15292
            self.penColour = value
15293
            self.validate_ColourSimpleType(self.penColour)    # validate type ColourSimpleType
15294
        value = find_attr_value_('bgColour', node)
15295
        if value is not None and 'bgColour' not in already_processed:
15296
            already_processed.add('bgColour')
15297
            self.bgColour = value
15298
            self.validate_ColourSimpleType(self.bgColour)    # validate type ColourSimpleType
15299
        value = find_attr_value_('embText', node)
15300
        if value is not None and 'embText' not in already_processed:
15301
            already_processed.add('embText')
15302
            if value in ('true', '1'):
15303
                self.embText = True
15304
            elif value in ('false', '0'):
15305
                self.embText = False
15306
            else:
15307
                raise_parse_error(node, 'Bad boolean attribute')
15308
        super(LineDrawingRegionType, self)._buildAttributes(node, attrs, already_processed)
15309
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
15310
        super(LineDrawingRegionType, self)._buildChildren(child_, node, nodeName_, True)
15311
        pass
15312
    def __hash__(self):
15313
        return hash(self.id)
15314
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
15315
    def _region_class(self, x): # pylint: disable=unused-argument
15316
        return x.__class__.__name__.replace('RegionType', '')
15317
    
15318
    def _get_recursive_regions(self, regions, level, classes=None):
15319
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
15320
        if level == 1:
15321
            # stop recursion, filter classes
15322
            if classes:
15323
                return [r for r in regions if self._region_class(r) in classes]
15324
            if regions and regions[0].__class__.__name__ == 'PageType':
15325
                regions = regions[1:]
15326
            return regions
15327
        # find more regions recursively
15328
        more_regions = []
15329
        for region in regions:
15330
            more_regions.append([])
15331
            for class_ in PAGE_REGION_TYPES:
15332
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
15333
                    # 'Map' is not recursive in 2019 schema
15334
                    continue
15335
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
15336
        if not any(more_regions):
15337
            return self._get_recursive_regions(regions, 1, classes)
15338
        ret = []
15339
        for r, more in zip(regions, more_regions):
15340
            ret.append(r)
15341
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
15342
        return self._get_recursive_regions(ret, 1, classes)
15343
    
15344
    def _get_recursive_reading_order(self, rogroup):
15345
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
15346
            elements = rogroup.get_AllIndexed()
15347
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
15348
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
15349
        regionrefs = list()
15350
        for elem in elements:
15351
            regionrefs.append(elem.get_regionRef())
15352
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
15353
                regionrefs.extend(self._get_recursive_reading_order(elem))
15354
        return regionrefs
15355
    
15356
    def get_AllRegions(self, classes=None, order='document', depth=0):
15357
        """
15358
        Get all the ``*Region`` elements, or only those provided by `classes`.
15359
        Return in document order, unless the top element is ``Page`` and
15360
        `order` is ``reading-order``.
15361
    
15362
        Arguments:
15363
            classes (list): Classes of regions that shall be returned, \
15364
                e.g. ``['Text', 'Image']``
15365
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
15366
                return regions sorted by document order (``document``, default) or by
15367
                reading order with regions not in the reading order at the end of the
15368
                returned list (``reading-order``) or regions not in the reading order
15369
                omitted (``reading-order-only``). The latter two are only available
15370
                on page level.
15371
            depth (int): Recursive depth to look for regions at, set to `0` for \
15372
                all regions at any depth. Default: 0
15373
    
15374
        Returns:
15375
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
15376
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
15377
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
15378
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
15379
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
15380
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
15381
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
15382
                and/or :py:class:`CustomRegionType`
15383
    
15384
        For example, to get all text anywhere on the page in reading order, use:
15385
        ::
15386
            '\\n'.join(line.get_TextEquiv()[0].Unicode
15387
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
15388
                      for line in region.get_TextLine())
15389
        """
15390
        if order not in ['document', 'reading-order', 'reading-order-only']:
15391
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
15392
        if depth < 0:
15393
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
15394
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
15395
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
15396
            reading_order = self.get_ReadingOrder()
15397
            if reading_order:
15398
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
15399
            if reading_order:
15400
                reading_order = self._get_recursive_reading_order(reading_order)
15401
            if reading_order:
15402
                id2region = {region.id: region for region in ret}
15403
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
15404
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
15405
                #      len(ret),
15406
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
15407
                #      len([r for r in ret if r not in in_reading_order])
15408
                #      ))
15409
                if order == 'reading-order-only':
15410
                    ret = in_reading_order
15411
                else:
15412
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
15413
        return ret
15414
    def set_orientation(self, orientation):
15415
        """
15416
        Set deskewing angle to given `orientation` number.
15417
        Moreover, invalidate self's ``pc:AlternativeImage``s
15418
        (because they will have been rotated and enlarged
15419
        with the angle of the previous value).
15420
        """
15421
        if hasattr(self, 'invalidate_AlternativeImage'):
15422
            # PageType, RegionType:
15423
            self.invalidate_AlternativeImage(feature_selector='deskewed')
15424
        self.orientation = orientation
15425
# end class LineDrawingRegionType
15426
15427
@@ 14812-15116 (lines=305) @@
14809
# end class TableRegionType
14810
14811
14812
class GraphicRegionType(RegionType):
14813
    """GraphicRegionType --
14814
    Regions containing simple graphics, such as a company
14815
    logo, should be marked as graphic regions.
14816
      
14817
    * orientation --
14818
      The angle the rectangle encapsulating a region
14819
      has to be rotated in clockwise direction
14820
      in order to correct the present skew
14821
      (negative values indicate anti-clockwise rotation).
14822
      Range: -179.999,180
14823
      
14824
    * type --
14825
      The type of graphic in the region
14826
      
14827
    * numColours --
14828
      An approximation of the number of colours
14829
      used in the region
14830
      
14831
    * embText --
14832
      Specifies whether the region also contains
14833
      text.
14834
    
14835
    """
14836
    __hash__ = GeneratedsSuper.__hash__
14837
    member_data_items_ = [
14838
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
14839
        MemberSpec_('type_', 'pc:GraphicsTypeSimpleType', 0, 1, {'use': 'optional', 'name': 'type_'}),
14840
        MemberSpec_('numColours', 'int', 0, 1, {'use': 'optional', 'name': 'numColours'}),
14841
        MemberSpec_('embText', 'boolean', 0, 1, {'use': 'optional', 'name': 'embText'}),
14842
    ]
14843
    subclass = None
14844
    superclass = RegionType
14845
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, type_=None, numColours=None, embText=None, gds_collector_=None, **kwargs_):
14846
        self.gds_collector_ = gds_collector_
14847
        self.gds_elementtree_node_ = None
14848
        self.original_tagname_ = None
14849
        self.parent_object_ = kwargs_.get('parent_object_')
14850
        self.ns_prefix_ = "pc"
14851
        super(globals().get("GraphicRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
14852
        self.orientation = _cast(float, orientation)
14853
        self.orientation_nsprefix_ = "pc"
14854
        self.type_ = _cast(None, type_)
14855
        self.type__nsprefix_ = "pc"
14856
        self.numColours = _cast(int, numColours)
14857
        self.numColours_nsprefix_ = "pc"
14858
        self.embText = _cast(bool, embText)
14859
        self.embText_nsprefix_ = "pc"
14860
    def factory(*args_, **kwargs_):
14861
        if CurrentSubclassModule_ is not None:
14862
            subclass = getSubclassFromModule_(
14863
                CurrentSubclassModule_, GraphicRegionType)
14864
            if subclass is not None:
14865
                return subclass(*args_, **kwargs_)
14866
        if GraphicRegionType.subclass:
14867
            return GraphicRegionType.subclass(*args_, **kwargs_)
14868
        else:
14869
            return GraphicRegionType(*args_, **kwargs_)
14870
    factory = staticmethod(factory)
14871
    def get_ns_prefix_(self):
14872
        return self.ns_prefix_
14873
    def set_ns_prefix_(self, ns_prefix):
14874
        self.ns_prefix_ = ns_prefix
14875
    def get_orientation(self):
14876
        return self.orientation
14877
    def set_orientation(self, orientation):
14878
        self.orientation = orientation
14879
    def get_type(self):
14880
        return self.type_
14881
    def set_type(self, type_):
14882
        self.type_ = type_
14883
    def get_numColours(self):
14884
        return self.numColours
14885
    def set_numColours(self, numColours):
14886
        self.numColours = numColours
14887
    def get_embText(self):
14888
        return self.embText
14889
    def set_embText(self, embText):
14890
        self.embText = embText
14891
    def validate_GraphicsTypeSimpleType(self, value):
14892
        # Validate type pc:GraphicsTypeSimpleType, a restriction on string.
14893
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
14894
            if not isinstance(value, str):
14895
                lineno = self.gds_get_node_lineno_()
14896
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
14897
                return False
14898
            value = value
14899
            enumerations = ['logo', 'letterhead', 'decoration', 'frame', 'handwritten-annotation', 'stamp', 'signature', 'barcode', 'paper-grow', 'punch-hole', 'other']
14900
            if value not in enumerations:
14901
                lineno = self.gds_get_node_lineno_()
14902
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on GraphicsTypeSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
14903
                result = False
14904
    def has__content(self):
14905
        if (
14906
            super(GraphicRegionType, self).has__content()
14907
        ):
14908
            return True
14909
        else:
14910
            return False
14911
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', pretty_print=True):
14912
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('GraphicRegionType')
14913
        if imported_ns_def_ is not None:
14914
            namespacedef_ = imported_ns_def_
14915
        if pretty_print:
14916
            eol_ = '\n'
14917
        else:
14918
            eol_ = ''
14919
        if self.original_tagname_ is not None and name_ == 'GraphicRegionType':
14920
            name_ = self.original_tagname_
14921
        if UseCapturedNS_ and self.ns_prefix_:
14922
            namespaceprefix_ = self.ns_prefix_ + ':'
14923
        showIndent(outfile, level, pretty_print)
14924
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
14925
        already_processed = set()
14926
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType')
14927
        if self.has__content():
14928
            outfile.write('>%s' % (eol_, ))
14929
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='GraphicRegionType', pretty_print=pretty_print)
14930
            showIndent(outfile, level, pretty_print)
14931
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
14932
        else:
14933
            outfile.write('/>%s' % (eol_, ))
14934
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='GraphicRegionType'):
14935
        super(GraphicRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='GraphicRegionType')
14936
        if self.orientation is not None and 'orientation' not in already_processed:
14937
            already_processed.add('orientation')
14938
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
14939
        if self.type_ is not None and 'type_' not in already_processed:
14940
            already_processed.add('type_')
14941
            outfile.write(' type=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.type_), input_name='type')), ))
14942
        if self.numColours is not None and 'numColours' not in already_processed:
14943
            already_processed.add('numColours')
14944
            outfile.write(' numColours="%s"' % self.gds_format_integer(self.numColours, input_name='numColours'))
14945
        if self.embText is not None and 'embText' not in already_processed:
14946
            already_processed.add('embText')
14947
            outfile.write(' embText="%s"' % self.gds_format_boolean(self.embText, input_name='embText'))
14948
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GraphicRegionType', fromsubclass_=False, pretty_print=True):
14949
        super(GraphicRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
14950
    def to_etree(self, parent_element=None, name_='GraphicRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
14951
        element = super(GraphicRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
14952
        if self.orientation is not None:
14953
            element.set('orientation', self.gds_format_float(self.orientation))
14954
        if self.type_ is not None:
14955
            element.set('type', self.gds_format_string(self.type_))
14956
        if self.numColours is not None:
14957
            element.set('numColours', self.gds_format_integer(self.numColours))
14958
        if self.embText is not None:
14959
            element.set('embText', self.gds_format_boolean(self.embText))
14960
        if mapping_ is not None:
14961
            mapping_[id(self)] = element
14962
        if reverse_mapping_ is not None:
14963
            reverse_mapping_[element] = self
14964
        return element
14965
    def build(self, node, gds_collector_=None):
14966
        self.gds_collector_ = gds_collector_
14967
        if SaveElementTreeNode:
14968
            self.gds_elementtree_node_ = node
14969
        already_processed = set()
14970
        self.ns_prefix_ = node.prefix
14971
        self._buildAttributes(node, node.attrib, already_processed)
14972
        for child in node:
14973
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
14974
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
14975
        return self
14976
    def _buildAttributes(self, node, attrs, already_processed):
14977
        value = find_attr_value_('orientation', node)
14978
        if value is not None and 'orientation' not in already_processed:
14979
            already_processed.add('orientation')
14980
            value = self.gds_parse_float(value, node, 'orientation')
14981
            self.orientation = value
14982
        value = find_attr_value_('type', node)
14983
        if value is not None and 'type' not in already_processed:
14984
            already_processed.add('type')
14985
            self.type_ = value
14986
            self.validate_GraphicsTypeSimpleType(self.type_)    # validate type GraphicsTypeSimpleType
14987
        value = find_attr_value_('numColours', node)
14988
        if value is not None and 'numColours' not in already_processed:
14989
            already_processed.add('numColours')
14990
            self.numColours = self.gds_parse_integer(value, node, 'numColours')
14991
        value = find_attr_value_('embText', node)
14992
        if value is not None and 'embText' not in already_processed:
14993
            already_processed.add('embText')
14994
            if value in ('true', '1'):
14995
                self.embText = True
14996
            elif value in ('false', '0'):
14997
                self.embText = False
14998
            else:
14999
                raise_parse_error(node, 'Bad boolean attribute')
15000
        super(GraphicRegionType, self)._buildAttributes(node, attrs, already_processed)
15001
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
15002
        super(GraphicRegionType, self)._buildChildren(child_, node, nodeName_, True)
15003
        pass
15004
    def __hash__(self):
15005
        return hash(self.id)
15006
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
15007
    def _region_class(self, x): # pylint: disable=unused-argument
15008
        return x.__class__.__name__.replace('RegionType', '')
15009
    
15010
    def _get_recursive_regions(self, regions, level, classes=None):
15011
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
15012
        if level == 1:
15013
            # stop recursion, filter classes
15014
            if classes:
15015
                return [r for r in regions if self._region_class(r) in classes]
15016
            if regions and regions[0].__class__.__name__ == 'PageType':
15017
                regions = regions[1:]
15018
            return regions
15019
        # find more regions recursively
15020
        more_regions = []
15021
        for region in regions:
15022
            more_regions.append([])
15023
            for class_ in PAGE_REGION_TYPES:
15024
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
15025
                    # 'Map' is not recursive in 2019 schema
15026
                    continue
15027
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
15028
        if not any(more_regions):
15029
            return self._get_recursive_regions(regions, 1, classes)
15030
        ret = []
15031
        for r, more in zip(regions, more_regions):
15032
            ret.append(r)
15033
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
15034
        return self._get_recursive_regions(ret, 1, classes)
15035
    
15036
    def _get_recursive_reading_order(self, rogroup):
15037
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
15038
            elements = rogroup.get_AllIndexed()
15039
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
15040
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
15041
        regionrefs = list()
15042
        for elem in elements:
15043
            regionrefs.append(elem.get_regionRef())
15044
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
15045
                regionrefs.extend(self._get_recursive_reading_order(elem))
15046
        return regionrefs
15047
    
15048
    def get_AllRegions(self, classes=None, order='document', depth=0):
15049
        """
15050
        Get all the ``*Region`` elements, or only those provided by `classes`.
15051
        Return in document order, unless the top element is ``Page`` and
15052
        `order` is ``reading-order``.
15053
    
15054
        Arguments:
15055
            classes (list): Classes of regions that shall be returned, \
15056
                e.g. ``['Text', 'Image']``
15057
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
15058
                return regions sorted by document order (``document``, default) or by
15059
                reading order with regions not in the reading order at the end of the
15060
                returned list (``reading-order``) or regions not in the reading order
15061
                omitted (``reading-order-only``). The latter two are only available
15062
                on page level.
15063
            depth (int): Recursive depth to look for regions at, set to `0` for \
15064
                all regions at any depth. Default: 0
15065
    
15066
        Returns:
15067
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
15068
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
15069
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
15070
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
15071
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
15072
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
15073
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
15074
                and/or :py:class:`CustomRegionType`
15075
    
15076
        For example, to get all text anywhere on the page in reading order, use:
15077
        ::
15078
            '\\n'.join(line.get_TextEquiv()[0].Unicode
15079
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
15080
                      for line in region.get_TextLine())
15081
        """
15082
        if order not in ['document', 'reading-order', 'reading-order-only']:
15083
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
15084
        if depth < 0:
15085
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
15086
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
15087
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
15088
            reading_order = self.get_ReadingOrder()
15089
            if reading_order:
15090
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
15091
            if reading_order:
15092
                reading_order = self._get_recursive_reading_order(reading_order)
15093
            if reading_order:
15094
                id2region = {region.id: region for region in ret}
15095
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
15096
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
15097
                #      len(ret),
15098
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
15099
                #      len([r for r in ret if r not in in_reading_order])
15100
                #      ))
15101
                if order == 'reading-order-only':
15102
                    ret = in_reading_order
15103
                else:
15104
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
15105
        return ret
15106
    def set_orientation(self, orientation):
15107
        """
15108
        Set deskewing angle to given `orientation` number.
15109
        Moreover, invalidate self's ``pc:AlternativeImage``s
15110
        (because they will have been rotated and enlarged
15111
        with the angle of the previous value).
15112
        """
15113
        if hasattr(self, 'invalidate_AlternativeImage'):
15114
            # PageType, RegionType:
15115
            self.invalidate_AlternativeImage(feature_selector='deskewed')
15116
        self.orientation = orientation
15117
# end class GraphicRegionType
15118
15119