Code Duplication    Length = 259-261 lines in 5 locations

src/ocrd_models/ocrd_page_generateds.py 5 locations

@@ 13812-14072 (lines=261) @@
13809
# end class MathsRegionType
13810
13811
13812
class SeparatorRegionType(RegionType):
13813
    """SeparatorRegionType --
13814
    Separators are lines that lie between columns and
13815
    paragraphs and can be used to logically separate
13816
    different articles from each other.
13817
      
13818
    * orientation --
13819
      The angle the rectangle encapsulating a region
13820
      has to be rotated in clockwise direction
13821
      in order to correct the present skew
13822
      (negative values indicate anti-clockwise rotation).
13823
      Range: -179.999,180
13824
      
13825
    * colour --
13826
      The colour of the separator
13827
    
13828
    """
13829
    __hash__ = GeneratedsSuper.__hash__
13830
    member_data_items_ = [
13831
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
13832
        MemberSpec_('colour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'colour'}),
13833
    ]
13834
    subclass = None
13835
    superclass = RegionType
13836
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, colour=None, gds_collector_=None, **kwargs_):
13837
        self.gds_collector_ = gds_collector_
13838
        self.gds_elementtree_node_ = None
13839
        self.original_tagname_ = None
13840
        self.parent_object_ = kwargs_.get('parent_object_')
13841
        self.ns_prefix_ = "pc"
13842
        super(globals().get("SeparatorRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
13843
        self.orientation = _cast(float, orientation)
13844
        self.orientation_nsprefix_ = "pc"
13845
        self.colour = _cast(None, colour)
13846
        self.colour_nsprefix_ = "pc"
13847
    def factory(*args_, **kwargs_):
13848
        if CurrentSubclassModule_ is not None:
13849
            subclass = getSubclassFromModule_(
13850
                CurrentSubclassModule_, SeparatorRegionType)
13851
            if subclass is not None:
13852
                return subclass(*args_, **kwargs_)
13853
        if SeparatorRegionType.subclass:
13854
            return SeparatorRegionType.subclass(*args_, **kwargs_)
13855
        else:
13856
            return SeparatorRegionType(*args_, **kwargs_)
13857
    factory = staticmethod(factory)
13858
    def get_ns_prefix_(self):
13859
        return self.ns_prefix_
13860
    def set_ns_prefix_(self, ns_prefix):
13861
        self.ns_prefix_ = ns_prefix
13862
    def get_orientation(self):
13863
        return self.orientation
13864
    def set_orientation(self, orientation):
13865
        self.orientation = orientation
13866
    def get_colour(self):
13867
        return self.colour
13868
    def set_colour(self, colour):
13869
        self.colour = colour
13870
    def validate_ColourSimpleType(self, value):
13871
        # Validate type pc:ColourSimpleType, a restriction on string.
13872
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
13873
            if not isinstance(value, str):
13874
                lineno = self.gds_get_node_lineno_()
13875
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
13876
                return False
13877
            value = value
13878
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
13879
            if value not in enumerations:
13880
                lineno = self.gds_get_node_lineno_()
13881
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
13882
                result = False
13883
    def has__content(self):
13884
        if (
13885
            super(SeparatorRegionType, self).has__content()
13886
        ):
13887
            return True
13888
        else:
13889
            return False
13890
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='SeparatorRegionType', pretty_print=True):
13891
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('SeparatorRegionType')
13892
        if imported_ns_def_ is not None:
13893
            namespacedef_ = imported_ns_def_
13894
        if pretty_print:
13895
            eol_ = '\n'
13896
        else:
13897
            eol_ = ''
13898
        if self.original_tagname_ is not None and name_ == 'SeparatorRegionType':
13899
            name_ = self.original_tagname_
13900
        if UseCapturedNS_ and self.ns_prefix_:
13901
            namespaceprefix_ = self.ns_prefix_ + ':'
13902
        showIndent(outfile, level, pretty_print)
13903
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
13904
        already_processed = set()
13905
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType')
13906
        if self.has__content():
13907
            outfile.write('>%s' % (eol_, ))
13908
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='SeparatorRegionType', pretty_print=pretty_print)
13909
            showIndent(outfile, level, pretty_print)
13910
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
13911
        else:
13912
            outfile.write('/>%s' % (eol_, ))
13913
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='SeparatorRegionType'):
13914
        super(SeparatorRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='SeparatorRegionType')
13915
        if self.orientation is not None and 'orientation' not in already_processed:
13916
            already_processed.add('orientation')
13917
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
13918
        if self.colour is not None and 'colour' not in already_processed:
13919
            already_processed.add('colour')
13920
            outfile.write(' colour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.colour), input_name='colour')), ))
13921
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='SeparatorRegionType', fromsubclass_=False, pretty_print=True):
13922
        super(SeparatorRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
13923
    def to_etree(self, parent_element=None, name_='SeparatorRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
13924
        element = super(SeparatorRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
13925
        if self.orientation is not None:
13926
            element.set('orientation', self.gds_format_float(self.orientation))
13927
        if self.colour is not None:
13928
            element.set('colour', self.gds_format_string(self.colour))
13929
        if mapping_ is not None:
13930
            mapping_[id(self)] = element
13931
        if reverse_mapping_ is not None:
13932
            reverse_mapping_[element] = self
13933
        return element
13934
    def build(self, node, gds_collector_=None):
13935
        self.gds_collector_ = gds_collector_
13936
        if SaveElementTreeNode:
13937
            self.gds_elementtree_node_ = node
13938
        already_processed = set()
13939
        self.ns_prefix_ = node.prefix
13940
        self._buildAttributes(node, node.attrib, already_processed)
13941
        for child in node:
13942
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
13943
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
13944
        return self
13945
    def _buildAttributes(self, node, attrs, already_processed):
13946
        value = find_attr_value_('orientation', node)
13947
        if value is not None and 'orientation' not in already_processed:
13948
            already_processed.add('orientation')
13949
            value = self.gds_parse_float(value, node, 'orientation')
13950
            self.orientation = value
13951
        value = find_attr_value_('colour', node)
13952
        if value is not None and 'colour' not in already_processed:
13953
            already_processed.add('colour')
13954
            self.colour = value
13955
            self.validate_ColourSimpleType(self.colour)    # validate type ColourSimpleType
13956
        super(SeparatorRegionType, self)._buildAttributes(node, attrs, already_processed)
13957
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
13958
        super(SeparatorRegionType, self)._buildChildren(child_, node, nodeName_, True)
13959
        pass
13960
    def __hash__(self):
13961
        return hash(self.id)
13962
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
13963
    def _region_class(self, x): # pylint: disable=unused-argument
13964
        return x.__class__.__name__.replace('RegionType', '')
13965
    
13966
    def _get_recursive_regions(self, regions, level, classes=None):
13967
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
13968
        if level == 1:
13969
            # stop recursion, filter classes
13970
            if classes:
13971
                return [r for r in regions if self._region_class(r) in classes]
13972
            if regions and regions[0].__class__.__name__ == 'PageType':
13973
                regions = regions[1:]
13974
            return regions
13975
        # find more regions recursively
13976
        more_regions = []
13977
        for region in regions:
13978
            more_regions.append([])
13979
            for class_ in PAGE_REGION_TYPES:
13980
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
13981
                    # 'Map' is not recursive in 2019 schema
13982
                    continue
13983
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
13984
        if not any(more_regions):
13985
            return self._get_recursive_regions(regions, 1, classes)
13986
        ret = []
13987
        for r, more in zip(regions, more_regions):
13988
            ret.append(r)
13989
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
13990
        return self._get_recursive_regions(ret, 1, classes)
13991
    
13992
    def _get_recursive_reading_order(self, rogroup):
13993
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
13994
            elements = rogroup.get_AllIndexed()
13995
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
13996
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
13997
        regionrefs = list()
13998
        for elem in elements:
13999
            regionrefs.append(elem.get_regionRef())
14000
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
14001
                regionrefs.extend(self._get_recursive_reading_order(elem))
14002
        return regionrefs
14003
    
14004
    def get_AllRegions(self, classes=None, order='document', depth=0):
14005
        """
14006
        Get all the ``*Region`` elements, or only those provided by `classes`.
14007
        Return in document order, unless the top element is ``Page`` and
14008
        `order` is ``reading-order``.
14009
    
14010
        Arguments:
14011
            classes (list): Classes of regions that shall be returned, \
14012
                e.g. ``['Text', 'Image']``
14013
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
14014
                return regions sorted by document order (``document``, default) or by
14015
                reading order with regions not in the reading order at the end of the
14016
                returned list (``reading-order``) or regions not in the reading order
14017
                omitted (``reading-order-only``). The latter two are only available
14018
                on page level.
14019
            depth (int): Recursive depth to look for regions at, set to `0` for \
14020
                all regions at any depth. Default: 0
14021
    
14022
        Returns:
14023
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
14024
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
14025
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
14026
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
14027
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
14028
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
14029
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
14030
                and/or :py:class:`CustomRegionType`
14031
    
14032
        For example, to get all text anywhere on the page in reading order, use:
14033
        ::
14034
            '\\n'.join(line.get_TextEquiv()[0].Unicode
14035
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
14036
                      for line in region.get_TextLine())
14037
        """
14038
        if order not in ['document', 'reading-order', 'reading-order-only']:
14039
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
14040
        if depth < 0:
14041
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
14042
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
14043
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
14044
            reading_order = self.get_ReadingOrder()
14045
            if reading_order:
14046
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
14047
            if reading_order:
14048
                reading_order = self._get_recursive_reading_order(reading_order)
14049
            if reading_order:
14050
                id2region = {region.id: region for region in ret}
14051
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
14052
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
14053
                #      len(ret),
14054
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
14055
                #      len([r for r in ret if r not in in_reading_order])
14056
                #      ))
14057
                if order == 'reading-order-only':
14058
                    ret = in_reading_order
14059
                else:
14060
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
14061
        return ret
14062
    def set_orientation(self, orientation):
14063
        """
14064
        Set deskewing angle to given `orientation` number.
14065
        Moreover, invalidate self's ``pc:AlternativeImage``s
14066
        (because they will have been rotated and enlarged
14067
        with the angle of the previous value).
14068
        """
14069
        if hasattr(self, 'invalidate_AlternativeImage'):
14070
            # PageType, RegionType:
14071
            self.invalidate_AlternativeImage(feature_selector='deskewed')
14072
        self.orientation = orientation
14073
# end class SeparatorRegionType
14074
14075
@@ 13549-13808 (lines=260) @@
13546
# end class ChemRegionType
13547
13548
13549
class MathsRegionType(RegionType):
13550
    """MathsRegionType --
13551
    Regions containing equations and mathematical symbols
13552
    should be marked as maths regions.
13553
      
13554
    * orientation --
13555
      The angle the rectangle encapsulating a region
13556
      has to be rotated in clockwise direction
13557
      in order to correct the present skew
13558
      (negative values indicate anti-clockwise rotation).
13559
      Range: -179.999,180
13560
      
13561
    * bgColour --
13562
      The background colour of the region
13563
    
13564
    """
13565
    __hash__ = GeneratedsSuper.__hash__
13566
    member_data_items_ = [
13567
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
13568
        MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}),
13569
    ]
13570
    subclass = None
13571
    superclass = RegionType
13572
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, bgColour=None, gds_collector_=None, **kwargs_):
13573
        self.gds_collector_ = gds_collector_
13574
        self.gds_elementtree_node_ = None
13575
        self.original_tagname_ = None
13576
        self.parent_object_ = kwargs_.get('parent_object_')
13577
        self.ns_prefix_ = "pc"
13578
        super(globals().get("MathsRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
13579
        self.orientation = _cast(float, orientation)
13580
        self.orientation_nsprefix_ = "pc"
13581
        self.bgColour = _cast(None, bgColour)
13582
        self.bgColour_nsprefix_ = "pc"
13583
    def factory(*args_, **kwargs_):
13584
        if CurrentSubclassModule_ is not None:
13585
            subclass = getSubclassFromModule_(
13586
                CurrentSubclassModule_, MathsRegionType)
13587
            if subclass is not None:
13588
                return subclass(*args_, **kwargs_)
13589
        if MathsRegionType.subclass:
13590
            return MathsRegionType.subclass(*args_, **kwargs_)
13591
        else:
13592
            return MathsRegionType(*args_, **kwargs_)
13593
    factory = staticmethod(factory)
13594
    def get_ns_prefix_(self):
13595
        return self.ns_prefix_
13596
    def set_ns_prefix_(self, ns_prefix):
13597
        self.ns_prefix_ = ns_prefix
13598
    def get_orientation(self):
13599
        return self.orientation
13600
    def set_orientation(self, orientation):
13601
        self.orientation = orientation
13602
    def get_bgColour(self):
13603
        return self.bgColour
13604
    def set_bgColour(self, bgColour):
13605
        self.bgColour = bgColour
13606
    def validate_ColourSimpleType(self, value):
13607
        # Validate type pc:ColourSimpleType, a restriction on string.
13608
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
13609
            if not isinstance(value, str):
13610
                lineno = self.gds_get_node_lineno_()
13611
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
13612
                return False
13613
            value = value
13614
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
13615
            if value not in enumerations:
13616
                lineno = self.gds_get_node_lineno_()
13617
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
13618
                result = False
13619
    def has__content(self):
13620
        if (
13621
            super(MathsRegionType, self).has__content()
13622
        ):
13623
            return True
13624
        else:
13625
            return False
13626
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MathsRegionType', pretty_print=True):
13627
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('MathsRegionType')
13628
        if imported_ns_def_ is not None:
13629
            namespacedef_ = imported_ns_def_
13630
        if pretty_print:
13631
            eol_ = '\n'
13632
        else:
13633
            eol_ = ''
13634
        if self.original_tagname_ is not None and name_ == 'MathsRegionType':
13635
            name_ = self.original_tagname_
13636
        if UseCapturedNS_ and self.ns_prefix_:
13637
            namespaceprefix_ = self.ns_prefix_ + ':'
13638
        showIndent(outfile, level, pretty_print)
13639
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
13640
        already_processed = set()
13641
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType')
13642
        if self.has__content():
13643
            outfile.write('>%s' % (eol_, ))
13644
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MathsRegionType', pretty_print=pretty_print)
13645
            showIndent(outfile, level, pretty_print)
13646
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
13647
        else:
13648
            outfile.write('/>%s' % (eol_, ))
13649
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MathsRegionType'):
13650
        super(MathsRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MathsRegionType')
13651
        if self.orientation is not None and 'orientation' not in already_processed:
13652
            already_processed.add('orientation')
13653
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
13654
        if self.bgColour is not None and 'bgColour' not in already_processed:
13655
            already_processed.add('bgColour')
13656
            outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), ))
13657
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MathsRegionType', fromsubclass_=False, pretty_print=True):
13658
        super(MathsRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
13659
    def to_etree(self, parent_element=None, name_='MathsRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
13660
        element = super(MathsRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
13661
        if self.orientation is not None:
13662
            element.set('orientation', self.gds_format_float(self.orientation))
13663
        if self.bgColour is not None:
13664
            element.set('bgColour', self.gds_format_string(self.bgColour))
13665
        if mapping_ is not None:
13666
            mapping_[id(self)] = element
13667
        if reverse_mapping_ is not None:
13668
            reverse_mapping_[element] = self
13669
        return element
13670
    def build(self, node, gds_collector_=None):
13671
        self.gds_collector_ = gds_collector_
13672
        if SaveElementTreeNode:
13673
            self.gds_elementtree_node_ = node
13674
        already_processed = set()
13675
        self.ns_prefix_ = node.prefix
13676
        self._buildAttributes(node, node.attrib, already_processed)
13677
        for child in node:
13678
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
13679
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
13680
        return self
13681
    def _buildAttributes(self, node, attrs, already_processed):
13682
        value = find_attr_value_('orientation', node)
13683
        if value is not None and 'orientation' not in already_processed:
13684
            already_processed.add('orientation')
13685
            value = self.gds_parse_float(value, node, 'orientation')
13686
            self.orientation = value
13687
        value = find_attr_value_('bgColour', node)
13688
        if value is not None and 'bgColour' not in already_processed:
13689
            already_processed.add('bgColour')
13690
            self.bgColour = value
13691
            self.validate_ColourSimpleType(self.bgColour)    # validate type ColourSimpleType
13692
        super(MathsRegionType, self)._buildAttributes(node, attrs, already_processed)
13693
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
13694
        super(MathsRegionType, self)._buildChildren(child_, node, nodeName_, True)
13695
        pass
13696
    def __hash__(self):
13697
        return hash(self.id)
13698
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
13699
    def _region_class(self, x): # pylint: disable=unused-argument
13700
        return x.__class__.__name__.replace('RegionType', '')
13701
    
13702
    def _get_recursive_regions(self, regions, level, classes=None):
13703
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
13704
        if level == 1:
13705
            # stop recursion, filter classes
13706
            if classes:
13707
                return [r for r in regions if self._region_class(r) in classes]
13708
            if regions and regions[0].__class__.__name__ == 'PageType':
13709
                regions = regions[1:]
13710
            return regions
13711
        # find more regions recursively
13712
        more_regions = []
13713
        for region in regions:
13714
            more_regions.append([])
13715
            for class_ in PAGE_REGION_TYPES:
13716
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
13717
                    # 'Map' is not recursive in 2019 schema
13718
                    continue
13719
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
13720
        if not any(more_regions):
13721
            return self._get_recursive_regions(regions, 1, classes)
13722
        ret = []
13723
        for r, more in zip(regions, more_regions):
13724
            ret.append(r)
13725
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
13726
        return self._get_recursive_regions(ret, 1, classes)
13727
    
13728
    def _get_recursive_reading_order(self, rogroup):
13729
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
13730
            elements = rogroup.get_AllIndexed()
13731
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
13732
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
13733
        regionrefs = list()
13734
        for elem in elements:
13735
            regionrefs.append(elem.get_regionRef())
13736
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
13737
                regionrefs.extend(self._get_recursive_reading_order(elem))
13738
        return regionrefs
13739
    
13740
    def get_AllRegions(self, classes=None, order='document', depth=0):
13741
        """
13742
        Get all the ``*Region`` elements, or only those provided by `classes`.
13743
        Return in document order, unless the top element is ``Page`` and
13744
        `order` is ``reading-order``.
13745
    
13746
        Arguments:
13747
            classes (list): Classes of regions that shall be returned, \
13748
                e.g. ``['Text', 'Image']``
13749
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
13750
                return regions sorted by document order (``document``, default) or by
13751
                reading order with regions not in the reading order at the end of the
13752
                returned list (``reading-order``) or regions not in the reading order
13753
                omitted (``reading-order-only``). The latter two are only available
13754
                on page level.
13755
            depth (int): Recursive depth to look for regions at, set to `0` for \
13756
                all regions at any depth. Default: 0
13757
    
13758
        Returns:
13759
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
13760
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
13761
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
13762
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
13763
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
13764
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
13765
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
13766
                and/or :py:class:`CustomRegionType`
13767
    
13768
        For example, to get all text anywhere on the page in reading order, use:
13769
        ::
13770
            '\\n'.join(line.get_TextEquiv()[0].Unicode
13771
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
13772
                      for line in region.get_TextLine())
13773
        """
13774
        if order not in ['document', 'reading-order', 'reading-order-only']:
13775
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
13776
        if depth < 0:
13777
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
13778
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
13779
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
13780
            reading_order = self.get_ReadingOrder()
13781
            if reading_order:
13782
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
13783
            if reading_order:
13784
                reading_order = self._get_recursive_reading_order(reading_order)
13785
            if reading_order:
13786
                id2region = {region.id: region for region in ret}
13787
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
13788
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
13789
                #      len(ret),
13790
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
13791
                #      len([r for r in ret if r not in in_reading_order])
13792
                #      ))
13793
                if order == 'reading-order-only':
13794
                    ret = in_reading_order
13795
                else:
13796
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
13797
        return ret
13798
    def set_orientation(self, orientation):
13799
        """
13800
        Set deskewing angle to given `orientation` number.
13801
        Moreover, invalidate self's ``pc:AlternativeImage``s
13802
        (because they will have been rotated and enlarged
13803
        with the angle of the previous value).
13804
        """
13805
        if hasattr(self, 'invalidate_AlternativeImage'):
13806
            # PageType, RegionType:
13807
            self.invalidate_AlternativeImage(feature_selector='deskewed')
13808
        self.orientation = orientation
13809
# end class MathsRegionType
13810
13811
@@ 13286-13545 (lines=260) @@
13283
# end class MapRegionType
13284
13285
13286
class ChemRegionType(RegionType):
13287
    """ChemRegionType --
13288
    Regions containing chemical formulas.
13289
      
13290
    * orientation --
13291
      The angle the rectangle encapsulating a
13292
      region has to be rotated in clockwise
13293
      direction in order to correct the present
13294
      skew (negative values indicate
13295
      anti-clockwise rotation). Range:
13296
      -179.999,180
13297
      
13298
    * bgColour --
13299
      The background colour of the region
13300
    
13301
    """
13302
    __hash__ = GeneratedsSuper.__hash__
13303
    member_data_items_ = [
13304
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
13305
        MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}),
13306
    ]
13307
    subclass = None
13308
    superclass = RegionType
13309
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, bgColour=None, gds_collector_=None, **kwargs_):
13310
        self.gds_collector_ = gds_collector_
13311
        self.gds_elementtree_node_ = None
13312
        self.original_tagname_ = None
13313
        self.parent_object_ = kwargs_.get('parent_object_')
13314
        self.ns_prefix_ = "pc"
13315
        super(globals().get("ChemRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
13316
        self.orientation = _cast(float, orientation)
13317
        self.orientation_nsprefix_ = "pc"
13318
        self.bgColour = _cast(None, bgColour)
13319
        self.bgColour_nsprefix_ = "pc"
13320
    def factory(*args_, **kwargs_):
13321
        if CurrentSubclassModule_ is not None:
13322
            subclass = getSubclassFromModule_(
13323
                CurrentSubclassModule_, ChemRegionType)
13324
            if subclass is not None:
13325
                return subclass(*args_, **kwargs_)
13326
        if ChemRegionType.subclass:
13327
            return ChemRegionType.subclass(*args_, **kwargs_)
13328
        else:
13329
            return ChemRegionType(*args_, **kwargs_)
13330
    factory = staticmethod(factory)
13331
    def get_ns_prefix_(self):
13332
        return self.ns_prefix_
13333
    def set_ns_prefix_(self, ns_prefix):
13334
        self.ns_prefix_ = ns_prefix
13335
    def get_orientation(self):
13336
        return self.orientation
13337
    def set_orientation(self, orientation):
13338
        self.orientation = orientation
13339
    def get_bgColour(self):
13340
        return self.bgColour
13341
    def set_bgColour(self, bgColour):
13342
        self.bgColour = bgColour
13343
    def validate_ColourSimpleType(self, value):
13344
        # Validate type pc:ColourSimpleType, a restriction on string.
13345
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
13346
            if not isinstance(value, str):
13347
                lineno = self.gds_get_node_lineno_()
13348
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
13349
                return False
13350
            value = value
13351
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
13352
            if value not in enumerations:
13353
                lineno = self.gds_get_node_lineno_()
13354
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
13355
                result = False
13356
    def has__content(self):
13357
        if (
13358
            super(ChemRegionType, self).has__content()
13359
        ):
13360
            return True
13361
        else:
13362
            return False
13363
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChemRegionType', pretty_print=True):
13364
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('ChemRegionType')
13365
        if imported_ns_def_ is not None:
13366
            namespacedef_ = imported_ns_def_
13367
        if pretty_print:
13368
            eol_ = '\n'
13369
        else:
13370
            eol_ = ''
13371
        if self.original_tagname_ is not None and name_ == 'ChemRegionType':
13372
            name_ = self.original_tagname_
13373
        if UseCapturedNS_ and self.ns_prefix_:
13374
            namespaceprefix_ = self.ns_prefix_ + ':'
13375
        showIndent(outfile, level, pretty_print)
13376
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
13377
        already_processed = set()
13378
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType')
13379
        if self.has__content():
13380
            outfile.write('>%s' % (eol_, ))
13381
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='ChemRegionType', pretty_print=pretty_print)
13382
            showIndent(outfile, level, pretty_print)
13383
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
13384
        else:
13385
            outfile.write('/>%s' % (eol_, ))
13386
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='ChemRegionType'):
13387
        super(ChemRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='ChemRegionType')
13388
        if self.orientation is not None and 'orientation' not in already_processed:
13389
            already_processed.add('orientation')
13390
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
13391
        if self.bgColour is not None and 'bgColour' not in already_processed:
13392
            already_processed.add('bgColour')
13393
            outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), ))
13394
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ChemRegionType', fromsubclass_=False, pretty_print=True):
13395
        super(ChemRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
13396
    def to_etree(self, parent_element=None, name_='ChemRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
13397
        element = super(ChemRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
13398
        if self.orientation is not None:
13399
            element.set('orientation', self.gds_format_float(self.orientation))
13400
        if self.bgColour is not None:
13401
            element.set('bgColour', self.gds_format_string(self.bgColour))
13402
        if mapping_ is not None:
13403
            mapping_[id(self)] = element
13404
        if reverse_mapping_ is not None:
13405
            reverse_mapping_[element] = self
13406
        return element
13407
    def build(self, node, gds_collector_=None):
13408
        self.gds_collector_ = gds_collector_
13409
        if SaveElementTreeNode:
13410
            self.gds_elementtree_node_ = node
13411
        already_processed = set()
13412
        self.ns_prefix_ = node.prefix
13413
        self._buildAttributes(node, node.attrib, already_processed)
13414
        for child in node:
13415
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
13416
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
13417
        return self
13418
    def _buildAttributes(self, node, attrs, already_processed):
13419
        value = find_attr_value_('orientation', node)
13420
        if value is not None and 'orientation' not in already_processed:
13421
            already_processed.add('orientation')
13422
            value = self.gds_parse_float(value, node, 'orientation')
13423
            self.orientation = value
13424
        value = find_attr_value_('bgColour', node)
13425
        if value is not None and 'bgColour' not in already_processed:
13426
            already_processed.add('bgColour')
13427
            self.bgColour = value
13428
            self.validate_ColourSimpleType(self.bgColour)    # validate type ColourSimpleType
13429
        super(ChemRegionType, self)._buildAttributes(node, attrs, already_processed)
13430
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
13431
        super(ChemRegionType, self)._buildChildren(child_, node, nodeName_, True)
13432
        pass
13433
    def __hash__(self):
13434
        return hash(self.id)
13435
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
13436
    def _region_class(self, x): # pylint: disable=unused-argument
13437
        return x.__class__.__name__.replace('RegionType', '')
13438
    
13439
    def _get_recursive_regions(self, regions, level, classes=None):
13440
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
13441
        if level == 1:
13442
            # stop recursion, filter classes
13443
            if classes:
13444
                return [r for r in regions if self._region_class(r) in classes]
13445
            if regions and regions[0].__class__.__name__ == 'PageType':
13446
                regions = regions[1:]
13447
            return regions
13448
        # find more regions recursively
13449
        more_regions = []
13450
        for region in regions:
13451
            more_regions.append([])
13452
            for class_ in PAGE_REGION_TYPES:
13453
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
13454
                    # 'Map' is not recursive in 2019 schema
13455
                    continue
13456
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
13457
        if not any(more_regions):
13458
            return self._get_recursive_regions(regions, 1, classes)
13459
        ret = []
13460
        for r, more in zip(regions, more_regions):
13461
            ret.append(r)
13462
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
13463
        return self._get_recursive_regions(ret, 1, classes)
13464
    
13465
    def _get_recursive_reading_order(self, rogroup):
13466
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
13467
            elements = rogroup.get_AllIndexed()
13468
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
13469
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
13470
        regionrefs = list()
13471
        for elem in elements:
13472
            regionrefs.append(elem.get_regionRef())
13473
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
13474
                regionrefs.extend(self._get_recursive_reading_order(elem))
13475
        return regionrefs
13476
    
13477
    def get_AllRegions(self, classes=None, order='document', depth=0):
13478
        """
13479
        Get all the ``*Region`` elements, or only those provided by `classes`.
13480
        Return in document order, unless the top element is ``Page`` and
13481
        `order` is ``reading-order``.
13482
    
13483
        Arguments:
13484
            classes (list): Classes of regions that shall be returned, \
13485
                e.g. ``['Text', 'Image']``
13486
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
13487
                return regions sorted by document order (``document``, default) or by
13488
                reading order with regions not in the reading order at the end of the
13489
                returned list (``reading-order``) or regions not in the reading order
13490
                omitted (``reading-order-only``). The latter two are only available
13491
                on page level.
13492
            depth (int): Recursive depth to look for regions at, set to `0` for \
13493
                all regions at any depth. Default: 0
13494
    
13495
        Returns:
13496
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
13497
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
13498
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
13499
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
13500
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
13501
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
13502
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
13503
                and/or :py:class:`CustomRegionType`
13504
    
13505
        For example, to get all text anywhere on the page in reading order, use:
13506
        ::
13507
            '\\n'.join(line.get_TextEquiv()[0].Unicode
13508
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
13509
                      for line in region.get_TextLine())
13510
        """
13511
        if order not in ['document', 'reading-order', 'reading-order-only']:
13512
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
13513
        if depth < 0:
13514
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
13515
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
13516
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
13517
            reading_order = self.get_ReadingOrder()
13518
            if reading_order:
13519
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
13520
            if reading_order:
13521
                reading_order = self._get_recursive_reading_order(reading_order)
13522
            if reading_order:
13523
                id2region = {region.id: region for region in ret}
13524
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
13525
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
13526
                #      len(ret),
13527
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
13528
                #      len([r for r in ret if r not in in_reading_order])
13529
                #      ))
13530
                if order == 'reading-order-only':
13531
                    ret = in_reading_order
13532
                else:
13533
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
13534
        return ret
13535
    def set_orientation(self, orientation):
13536
        """
13537
        Set deskewing angle to given `orientation` number.
13538
        Moreover, invalidate self's ``pc:AlternativeImage``s
13539
        (because they will have been rotated and enlarged
13540
        with the angle of the previous value).
13541
        """
13542
        if hasattr(self, 'invalidate_AlternativeImage'):
13543
            # PageType, RegionType:
13544
            self.invalidate_AlternativeImage(feature_selector='deskewed')
13545
        self.orientation = orientation
13546
# end class ChemRegionType
13547
13548
@@ 12794-13052 (lines=259) @@
12791
# end class AdvertRegionType
12792
12793
12794
class MusicRegionType(RegionType):
12795
    """MusicRegionType --
12796
    Regions containing musical notations.
12797
      
12798
    * orientation --
12799
      The angle the rectangle encapsulating a region
12800
      has to be rotated in clockwise direction
12801
      in order to correct the present skew
12802
      (negative values indicate anti-clockwise rotation).
12803
      Range: -179.999,180
12804
      
12805
    * bgColour --
12806
      The background colour of the region
12807
    
12808
    """
12809
    __hash__ = GeneratedsSuper.__hash__
12810
    member_data_items_ = [
12811
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
12812
        MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}),
12813
    ]
12814
    subclass = None
12815
    superclass = RegionType
12816
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, bgColour=None, gds_collector_=None, **kwargs_):
12817
        self.gds_collector_ = gds_collector_
12818
        self.gds_elementtree_node_ = None
12819
        self.original_tagname_ = None
12820
        self.parent_object_ = kwargs_.get('parent_object_')
12821
        self.ns_prefix_ = "pc"
12822
        super(globals().get("MusicRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
12823
        self.orientation = _cast(float, orientation)
12824
        self.orientation_nsprefix_ = "pc"
12825
        self.bgColour = _cast(None, bgColour)
12826
        self.bgColour_nsprefix_ = "pc"
12827
    def factory(*args_, **kwargs_):
12828
        if CurrentSubclassModule_ is not None:
12829
            subclass = getSubclassFromModule_(
12830
                CurrentSubclassModule_, MusicRegionType)
12831
            if subclass is not None:
12832
                return subclass(*args_, **kwargs_)
12833
        if MusicRegionType.subclass:
12834
            return MusicRegionType.subclass(*args_, **kwargs_)
12835
        else:
12836
            return MusicRegionType(*args_, **kwargs_)
12837
    factory = staticmethod(factory)
12838
    def get_ns_prefix_(self):
12839
        return self.ns_prefix_
12840
    def set_ns_prefix_(self, ns_prefix):
12841
        self.ns_prefix_ = ns_prefix
12842
    def get_orientation(self):
12843
        return self.orientation
12844
    def set_orientation(self, orientation):
12845
        self.orientation = orientation
12846
    def get_bgColour(self):
12847
        return self.bgColour
12848
    def set_bgColour(self, bgColour):
12849
        self.bgColour = bgColour
12850
    def validate_ColourSimpleType(self, value):
12851
        # Validate type pc:ColourSimpleType, a restriction on string.
12852
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
12853
            if not isinstance(value, str):
12854
                lineno = self.gds_get_node_lineno_()
12855
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
12856
                return False
12857
            value = value
12858
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
12859
            if value not in enumerations:
12860
                lineno = self.gds_get_node_lineno_()
12861
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
12862
                result = False
12863
    def has__content(self):
12864
        if (
12865
            super(MusicRegionType, self).has__content()
12866
        ):
12867
            return True
12868
        else:
12869
            return False
12870
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MusicRegionType', pretty_print=True):
12871
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('MusicRegionType')
12872
        if imported_ns_def_ is not None:
12873
            namespacedef_ = imported_ns_def_
12874
        if pretty_print:
12875
            eol_ = '\n'
12876
        else:
12877
            eol_ = ''
12878
        if self.original_tagname_ is not None and name_ == 'MusicRegionType':
12879
            name_ = self.original_tagname_
12880
        if UseCapturedNS_ and self.ns_prefix_:
12881
            namespaceprefix_ = self.ns_prefix_ + ':'
12882
        showIndent(outfile, level, pretty_print)
12883
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
12884
        already_processed = set()
12885
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType')
12886
        if self.has__content():
12887
            outfile.write('>%s' % (eol_, ))
12888
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='MusicRegionType', pretty_print=pretty_print)
12889
            showIndent(outfile, level, pretty_print)
12890
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
12891
        else:
12892
            outfile.write('/>%s' % (eol_, ))
12893
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='MusicRegionType'):
12894
        super(MusicRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='MusicRegionType')
12895
        if self.orientation is not None and 'orientation' not in already_processed:
12896
            already_processed.add('orientation')
12897
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
12898
        if self.bgColour is not None and 'bgColour' not in already_processed:
12899
            already_processed.add('bgColour')
12900
            outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), ))
12901
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MusicRegionType', fromsubclass_=False, pretty_print=True):
12902
        super(MusicRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
12903
    def to_etree(self, parent_element=None, name_='MusicRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
12904
        element = super(MusicRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
12905
        if self.orientation is not None:
12906
            element.set('orientation', self.gds_format_float(self.orientation))
12907
        if self.bgColour is not None:
12908
            element.set('bgColour', self.gds_format_string(self.bgColour))
12909
        if mapping_ is not None:
12910
            mapping_[id(self)] = element
12911
        if reverse_mapping_ is not None:
12912
            reverse_mapping_[element] = self
12913
        return element
12914
    def build(self, node, gds_collector_=None):
12915
        self.gds_collector_ = gds_collector_
12916
        if SaveElementTreeNode:
12917
            self.gds_elementtree_node_ = node
12918
        already_processed = set()
12919
        self.ns_prefix_ = node.prefix
12920
        self._buildAttributes(node, node.attrib, already_processed)
12921
        for child in node:
12922
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
12923
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
12924
        return self
12925
    def _buildAttributes(self, node, attrs, already_processed):
12926
        value = find_attr_value_('orientation', node)
12927
        if value is not None and 'orientation' not in already_processed:
12928
            already_processed.add('orientation')
12929
            value = self.gds_parse_float(value, node, 'orientation')
12930
            self.orientation = value
12931
        value = find_attr_value_('bgColour', node)
12932
        if value is not None and 'bgColour' not in already_processed:
12933
            already_processed.add('bgColour')
12934
            self.bgColour = value
12935
            self.validate_ColourSimpleType(self.bgColour)    # validate type ColourSimpleType
12936
        super(MusicRegionType, self)._buildAttributes(node, attrs, already_processed)
12937
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
12938
        super(MusicRegionType, self)._buildChildren(child_, node, nodeName_, True)
12939
        pass
12940
    def __hash__(self):
12941
        return hash(self.id)
12942
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
12943
    def _region_class(self, x): # pylint: disable=unused-argument
12944
        return x.__class__.__name__.replace('RegionType', '')
12945
    
12946
    def _get_recursive_regions(self, regions, level, classes=None):
12947
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
12948
        if level == 1:
12949
            # stop recursion, filter classes
12950
            if classes:
12951
                return [r for r in regions if self._region_class(r) in classes]
12952
            if regions and regions[0].__class__.__name__ == 'PageType':
12953
                regions = regions[1:]
12954
            return regions
12955
        # find more regions recursively
12956
        more_regions = []
12957
        for region in regions:
12958
            more_regions.append([])
12959
            for class_ in PAGE_REGION_TYPES:
12960
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
12961
                    # 'Map' is not recursive in 2019 schema
12962
                    continue
12963
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
12964
        if not any(more_regions):
12965
            return self._get_recursive_regions(regions, 1, classes)
12966
        ret = []
12967
        for r, more in zip(regions, more_regions):
12968
            ret.append(r)
12969
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
12970
        return self._get_recursive_regions(ret, 1, classes)
12971
    
12972
    def _get_recursive_reading_order(self, rogroup):
12973
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
12974
            elements = rogroup.get_AllIndexed()
12975
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
12976
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
12977
        regionrefs = list()
12978
        for elem in elements:
12979
            regionrefs.append(elem.get_regionRef())
12980
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
12981
                regionrefs.extend(self._get_recursive_reading_order(elem))
12982
        return regionrefs
12983
    
12984
    def get_AllRegions(self, classes=None, order='document', depth=0):
12985
        """
12986
        Get all the ``*Region`` elements, or only those provided by `classes`.
12987
        Return in document order, unless the top element is ``Page`` and
12988
        `order` is ``reading-order``.
12989
    
12990
        Arguments:
12991
            classes (list): Classes of regions that shall be returned, \
12992
                e.g. ``['Text', 'Image']``
12993
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
12994
                return regions sorted by document order (``document``, default) or by
12995
                reading order with regions not in the reading order at the end of the
12996
                returned list (``reading-order``) or regions not in the reading order
12997
                omitted (``reading-order-only``). The latter two are only available
12998
                on page level.
12999
            depth (int): Recursive depth to look for regions at, set to `0` for \
13000
                all regions at any depth. Default: 0
13001
    
13002
        Returns:
13003
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
13004
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
13005
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
13006
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
13007
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
13008
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
13009
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
13010
                and/or :py:class:`CustomRegionType`
13011
    
13012
        For example, to get all text anywhere on the page in reading order, use:
13013
        ::
13014
            '\\n'.join(line.get_TextEquiv()[0].Unicode
13015
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
13016
                      for line in region.get_TextLine())
13017
        """
13018
        if order not in ['document', 'reading-order', 'reading-order-only']:
13019
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
13020
        if depth < 0:
13021
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
13022
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
13023
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
13024
            reading_order = self.get_ReadingOrder()
13025
            if reading_order:
13026
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
13027
            if reading_order:
13028
                reading_order = self._get_recursive_reading_order(reading_order)
13029
            if reading_order:
13030
                id2region = {region.id: region for region in ret}
13031
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
13032
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
13033
                #      len(ret),
13034
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
13035
                #      len([r for r in ret if r not in in_reading_order])
13036
                #      ))
13037
                if order == 'reading-order-only':
13038
                    ret = in_reading_order
13039
                else:
13040
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
13041
        return ret
13042
    def set_orientation(self, orientation):
13043
        """
13044
        Set deskewing angle to given `orientation` number.
13045
        Moreover, invalidate self's ``pc:AlternativeImage``s
13046
        (because they will have been rotated and enlarged
13047
        with the angle of the previous value).
13048
        """
13049
        if hasattr(self, 'invalidate_AlternativeImage'):
13050
            # PageType, RegionType:
13051
            self.invalidate_AlternativeImage(feature_selector='deskewed')
13052
        self.orientation = orientation
13053
# end class MusicRegionType
13054
13055
@@ 12532-12790 (lines=259) @@
12529
# end class NoiseRegionType
12530
12531
12532
class AdvertRegionType(RegionType):
12533
    """AdvertRegionType --
12534
    Regions containing advertisements.
12535
      
12536
    * orientation --
12537
      The angle the rectangle encapsulating a region
12538
      has to be rotated in clockwise direction
12539
      in order to correct the present skew
12540
      (negative values indicate anti-clockwise rotation).
12541
      Range: -179.999,180
12542
      
12543
    * bgColour --
12544
      The background colour of the region
12545
    
12546
    """
12547
    __hash__ = GeneratedsSuper.__hash__
12548
    member_data_items_ = [
12549
        MemberSpec_('orientation', 'float', 0, 1, {'use': 'optional', 'name': 'orientation'}),
12550
        MemberSpec_('bgColour', 'pc:ColourSimpleType', 0, 1, {'use': 'optional', 'name': 'bgColour'}),
12551
    ]
12552
    subclass = None
12553
    superclass = RegionType
12554
    def __init__(self, id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, bgColour=None, gds_collector_=None, **kwargs_):
12555
        self.gds_collector_ = gds_collector_
12556
        self.gds_elementtree_node_ = None
12557
        self.original_tagname_ = None
12558
        self.parent_object_ = kwargs_.get('parent_object_')
12559
        self.ns_prefix_ = "pc"
12560
        super(globals().get("AdvertRegionType"), self).__init__(id, custom, comments, continuation, AlternativeImage, Coords, UserDefined, Labels, Roles, TextRegion, ImageRegion, LineDrawingRegion, GraphicRegion, TableRegion, ChartRegion, SeparatorRegion, MathsRegion, ChemRegion, MusicRegion, AdvertRegion, NoiseRegion, UnknownRegion, CustomRegion,  **kwargs_)
12561
        self.orientation = _cast(float, orientation)
12562
        self.orientation_nsprefix_ = "pc"
12563
        self.bgColour = _cast(None, bgColour)
12564
        self.bgColour_nsprefix_ = "pc"
12565
    def factory(*args_, **kwargs_):
12566
        if CurrentSubclassModule_ is not None:
12567
            subclass = getSubclassFromModule_(
12568
                CurrentSubclassModule_, AdvertRegionType)
12569
            if subclass is not None:
12570
                return subclass(*args_, **kwargs_)
12571
        if AdvertRegionType.subclass:
12572
            return AdvertRegionType.subclass(*args_, **kwargs_)
12573
        else:
12574
            return AdvertRegionType(*args_, **kwargs_)
12575
    factory = staticmethod(factory)
12576
    def get_ns_prefix_(self):
12577
        return self.ns_prefix_
12578
    def set_ns_prefix_(self, ns_prefix):
12579
        self.ns_prefix_ = ns_prefix
12580
    def get_orientation(self):
12581
        return self.orientation
12582
    def set_orientation(self, orientation):
12583
        self.orientation = orientation
12584
    def get_bgColour(self):
12585
        return self.bgColour
12586
    def set_bgColour(self, bgColour):
12587
        self.bgColour = bgColour
12588
    def validate_ColourSimpleType(self, value):
12589
        # Validate type pc:ColourSimpleType, a restriction on string.
12590
        if value is not None and Validate_simpletypes_ and self.gds_collector_ is not None:
12591
            if not isinstance(value, str):
12592
                lineno = self.gds_get_node_lineno_()
12593
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s is not of the correct base simple type (str)' % {"value": value, "lineno": lineno, })
12594
                return False
12595
            value = value
12596
            enumerations = ['black', 'blue', 'brown', 'cyan', 'green', 'grey', 'indigo', 'magenta', 'orange', 'pink', 'red', 'turquoise', 'violet', 'white', 'yellow', 'other']
12597
            if value not in enumerations:
12598
                lineno = self.gds_get_node_lineno_()
12599
                self.gds_collector_.add_message('Value "%(value)s"%(lineno)s does not match xsd enumeration restriction on ColourSimpleType' % {"value" : encode_str_2_3(value), "lineno": lineno} )
12600
                result = False
12601
    def has__content(self):
12602
        if (
12603
            super(AdvertRegionType, self).has__content()
12604
        ):
12605
            return True
12606
        else:
12607
            return False
12608
    def export(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AdvertRegionType', pretty_print=True):
12609
        imported_ns_def_ = GenerateDSNamespaceDefs_.get('AdvertRegionType')
12610
        if imported_ns_def_ is not None:
12611
            namespacedef_ = imported_ns_def_
12612
        if pretty_print:
12613
            eol_ = '\n'
12614
        else:
12615
            eol_ = ''
12616
        if self.original_tagname_ is not None and name_ == 'AdvertRegionType':
12617
            name_ = self.original_tagname_
12618
        if UseCapturedNS_ and self.ns_prefix_:
12619
            namespaceprefix_ = self.ns_prefix_ + ':'
12620
        showIndent(outfile, level, pretty_print)
12621
        outfile.write('<%s%s%s' % (namespaceprefix_, name_, namespacedef_ and ' ' + namespacedef_ or '', ))
12622
        already_processed = set()
12623
        self._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType')
12624
        if self.has__content():
12625
            outfile.write('>%s' % (eol_, ))
12626
            self._exportChildren(outfile, level + 1, namespaceprefix_, namespacedef_, name_='AdvertRegionType', pretty_print=pretty_print)
12627
            showIndent(outfile, level, pretty_print)
12628
            outfile.write('</%s%s>%s' % (namespaceprefix_, name_, eol_))
12629
        else:
12630
            outfile.write('/>%s' % (eol_, ))
12631
    def _exportAttributes(self, outfile, level, already_processed, namespaceprefix_='', name_='AdvertRegionType'):
12632
        super(AdvertRegionType, self)._exportAttributes(outfile, level, already_processed, namespaceprefix_, name_='AdvertRegionType')
12633
        if self.orientation is not None and 'orientation' not in already_processed:
12634
            already_processed.add('orientation')
12635
            outfile.write(' orientation="%s"' % self.gds_format_float(self.orientation, input_name='orientation'))
12636
        if self.bgColour is not None and 'bgColour' not in already_processed:
12637
            already_processed.add('bgColour')
12638
            outfile.write(' bgColour=%s' % (self.gds_encode(self.gds_format_string(quote_attrib(self.bgColour), input_name='bgColour')), ))
12639
    def _exportChildren(self, outfile, level, namespaceprefix_='', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AdvertRegionType', fromsubclass_=False, pretty_print=True):
12640
        super(AdvertRegionType, self)._exportChildren(outfile, level, namespaceprefix_, namespacedef_, name_, True, pretty_print=pretty_print)
12641
    def to_etree(self, parent_element=None, name_='AdvertRegionType', mapping_=None, reverse_mapping_=None, nsmap_=None):
12642
        element = super(AdvertRegionType, self).to_etree(parent_element, name_, mapping_, reverse_mapping_, nsmap_)
12643
        if self.orientation is not None:
12644
            element.set('orientation', self.gds_format_float(self.orientation))
12645
        if self.bgColour is not None:
12646
            element.set('bgColour', self.gds_format_string(self.bgColour))
12647
        if mapping_ is not None:
12648
            mapping_[id(self)] = element
12649
        if reverse_mapping_ is not None:
12650
            reverse_mapping_[element] = self
12651
        return element
12652
    def build(self, node, gds_collector_=None):
12653
        self.gds_collector_ = gds_collector_
12654
        if SaveElementTreeNode:
12655
            self.gds_elementtree_node_ = node
12656
        already_processed = set()
12657
        self.ns_prefix_ = node.prefix
12658
        self._buildAttributes(node, node.attrib, already_processed)
12659
        for child in node:
12660
            nodeName_ = Tag_pattern_.match(child.tag).groups()[-1]
12661
            self._buildChildren(child, node, nodeName_, gds_collector_=gds_collector_)
12662
        return self
12663
    def _buildAttributes(self, node, attrs, already_processed):
12664
        value = find_attr_value_('orientation', node)
12665
        if value is not None and 'orientation' not in already_processed:
12666
            already_processed.add('orientation')
12667
            value = self.gds_parse_float(value, node, 'orientation')
12668
            self.orientation = value
12669
        value = find_attr_value_('bgColour', node)
12670
        if value is not None and 'bgColour' not in already_processed:
12671
            already_processed.add('bgColour')
12672
            self.bgColour = value
12673
            self.validate_ColourSimpleType(self.bgColour)    # validate type ColourSimpleType
12674
        super(AdvertRegionType, self)._buildAttributes(node, attrs, already_processed)
12675
    def _buildChildren(self, child_, node, nodeName_, fromsubclass_=False, gds_collector_=None):
12676
        super(AdvertRegionType, self)._buildChildren(child_, node, nodeName_, True)
12677
        pass
12678
    def __hash__(self):
12679
        return hash(self.id)
12680
    # pylint: disable=line-too-long,invalid-name,protected-access,missing-module-docstring
12681
    def _region_class(self, x): # pylint: disable=unused-argument
12682
        return x.__class__.__name__.replace('RegionType', '')
12683
    
12684
    def _get_recursive_regions(self, regions, level, classes=None):
12685
        from .constants import PAGE_REGION_TYPES  # pylint: disable=relative-beyond-top-level,import-outside-toplevel
12686
        if level == 1:
12687
            # stop recursion, filter classes
12688
            if classes:
12689
                return [r for r in regions if self._region_class(r) in classes]
12690
            if regions and regions[0].__class__.__name__ == 'PageType':
12691
                regions = regions[1:]
12692
            return regions
12693
        # find more regions recursively
12694
        more_regions = []
12695
        for region in regions:
12696
            more_regions.append([])
12697
            for class_ in PAGE_REGION_TYPES:
12698
                if class_ == 'Map' and not isinstance(region, PageType): # pylint: disable=undefined-variable
12699
                    # 'Map' is not recursive in 2019 schema
12700
                    continue
12701
                more_regions[-1] += getattr(region, 'get_{}Region'.format(class_))()
12702
        if not any(more_regions):
12703
            return self._get_recursive_regions(regions, 1, classes)
12704
        ret = []
12705
        for r, more in zip(regions, more_regions):
12706
            ret.append(r)
12707
            ret += self._get_recursive_regions(more, level - 1 if level else 0, classes)
12708
        return self._get_recursive_regions(ret, 1, classes)
12709
    
12710
    def _get_recursive_reading_order(self, rogroup):
12711
        if isinstance(rogroup, (OrderedGroupType, OrderedGroupIndexedType)): # pylint: disable=undefined-variable
12712
            elements = rogroup.get_AllIndexed()
12713
        if isinstance(rogroup, (UnorderedGroupType, UnorderedGroupIndexedType)): # pylint: disable=undefined-variable
12714
            elements = (rogroup.get_RegionRef() + rogroup.get_OrderedGroup() + rogroup.get_UnorderedGroup())
12715
        regionrefs = list()
12716
        for elem in elements:
12717
            regionrefs.append(elem.get_regionRef())
12718
            if not isinstance(elem, (RegionRefType, RegionRefIndexedType)): # pylint: disable=undefined-variable
12719
                regionrefs.extend(self._get_recursive_reading_order(elem))
12720
        return regionrefs
12721
    
12722
    def get_AllRegions(self, classes=None, order='document', depth=0):
12723
        """
12724
        Get all the ``*Region`` elements, or only those provided by `classes`.
12725
        Return in document order, unless the top element is ``Page`` and
12726
        `order` is ``reading-order``.
12727
    
12728
        Arguments:
12729
            classes (list): Classes of regions that shall be returned, \
12730
                e.g. ``['Text', 'Image']``
12731
            order ("document"|"reading-order"|"reading-order-only"): Whether to \
12732
                return regions sorted by document order (``document``, default) or by
12733
                reading order with regions not in the reading order at the end of the
12734
                returned list (``reading-order``) or regions not in the reading order
12735
                omitted (``reading-order-only``). The latter two are only available
12736
                on page level.
12737
            depth (int): Recursive depth to look for regions at, set to `0` for \
12738
                all regions at any depth. Default: 0
12739
    
12740
        Returns:
12741
            a list of :py:class:`TextRegionType`, :py:class:`ImageRegionType`, \
12742
                :py:class:`LineDrawingRegionType`, :py:class:`GraphicRegionType`, \
12743
                :py:class:`TableRegionType`, :py:class:`ChartRegionType`, \
12744
                :py:class:`MapRegionType`, :py:class:`SeparatorRegionType`, \
12745
                :py:class:`MathsRegionType`, :py:class:`ChemRegionType`, \
12746
                :py:class:`MusicRegionType`, :py:class:`AdvertRegionType`, \
12747
                :py:class:`NoiseRegionType`, :py:class:`UnknownRegionType`, \
12748
                and/or :py:class:`CustomRegionType`
12749
    
12750
        For example, to get all text anywhere on the page in reading order, use:
12751
        ::
12752
            '\\n'.join(line.get_TextEquiv()[0].Unicode
12753
                      for region in page.get_AllRegions(classes=['Text'], depth=0, order='reading-order')
12754
                      for line in region.get_TextLine())
12755
        """
12756
        if order not in ['document', 'reading-order', 'reading-order-only']:
12757
            raise Exception("Argument 'order' must be either 'document', 'reading-order' or 'reading-order-only', not '{}'".format(order))
12758
        if depth < 0:
12759
            raise Exception("Argument 'depth' must be an integer greater-or-equal 0, not '{}'".format(depth))
12760
        ret = self._get_recursive_regions([self], depth + 1 if depth else 0, classes)
12761
        if self.__class__.__name__ == 'PageType' and order.startswith('reading-order'):
12762
            reading_order = self.get_ReadingOrder()
12763
            if reading_order:
12764
                reading_order = reading_order.get_OrderedGroup() or reading_order.get_UnorderedGroup()
12765
            if reading_order:
12766
                reading_order = self._get_recursive_reading_order(reading_order)
12767
            if reading_order:
12768
                id2region = {region.id: region for region in ret}
12769
                in_reading_order = [id2region[region_id] for region_id in reading_order if region_id in id2region]
12770
                #  print("ret: {} / in_ro: {} / not-in-ro: {}".format(
12771
                #      len(ret),
12772
                #      len([id2region[region_id] for region_id in reading_order if region_id in id2region]),
12773
                #      len([r for r in ret if r not in in_reading_order])
12774
                #      ))
12775
                if order == 'reading-order-only':
12776
                    ret = in_reading_order
12777
                else:
12778
                    ret = in_reading_order + [r for r in ret if r not in in_reading_order]
12779
        return ret
12780
    def set_orientation(self, orientation):
12781
        """
12782
        Set deskewing angle to given `orientation` number.
12783
        Moreover, invalidate self's ``pc:AlternativeImage``s
12784
        (because they will have been rotated and enlarged
12785
        with the angle of the previous value).
12786
        """
12787
        if hasattr(self, 'invalidate_AlternativeImage'):
12788
            # PageType, RegionType:
12789
            self.invalidate_AlternativeImage(feature_selector='deskewed')
12790
        self.orientation = orientation
12791
# end class AdvertRegionType
12792
12793