Completed
Push — master ( 11a6df...bffa60 )
by Satoru
01:09
created

_elem_strip_text()   A

Complexity

Conditions 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
dl 0
loc 6
rs 9.4285
c 1
b 0
f 0
1
#
2
# Copyright (C) 2011 - 2017 Satoru SATOH <ssato @ redhat.com>
3
# License: MIT
4
#
5
# Some XML modules may be missing and Base.{load,dumps}_impl are not overriden:
6
# pylint: disable=import-error, duplicate-except
7
"""XML files parser backend, should be available always.
8
9
- Format to support: XML, e.g. http://www.w3.org/TR/xml11/
10
- Requirements: one of the followings
11
12
  - xml.etree.cElementTree in standard lib if python >= 2.5
13
  - xml.etree.ElementTree in standard lib if python >= 2.5
14
  - elementtree.ElementTree (otherwise)
15
16
- Development Status: 3 - Alpha
17
- Limitations:
18
19
  - '<prefix>attrs', '<prefix>text' and '<prefix>children' are used as special
20
    parameter to keep XML structure of original data. You have to cusomize
21
    <prefix> (default: '@') if any config parameters conflict with some of
22
    them.
23
24
  - Some data or structures of original XML file may be lost if make it backed
25
    to XML file; XML file - (anyconfig.load) -> config - (anyconfig.dump) ->
26
    XML file
27
28
  - XML specific features (namespace, etc.) may not be processed correctly.
29
30
- Special Options:
31
32
  - tags: A dict provide special parameter names to distinguish between
33
    attributes, text and children nodes.
34
35
  - merge_attrs: Merge attributes and mix with children nodes. Please note that
36
    information of attributes are lost after loaded.
37
38
  - ac_parse_value: Try to parse values, elements' text and attributes.
39
40
History:
41
42
.. versionchanged:: 0.8.99
43
44
   - Add special options, tags, merge_attrs and ac_parse_value
45
   - Remove special option, pprefix which conflicts another option tags
46
47
.. versionchanged:: 0.8.0
48
49
   - Try to make a nested dict w/o extra dict having keys of attrs, text and
50
     children from XML string/file as much as possible.
51
   - Support namespaces partially.
52
53
.. versionchanged:: 0.1.0
54
55
   - Added XML dump support.
56
"""
57
from __future__ import absolute_import
58
from io import BytesIO
59
60
import operator
61
import re
62
try:
63
    import xml.etree.cElementTree as ET
64
except ImportError:
65
    import xml.etree.ElementTree as ET
66
except ImportError:
67
    import elementtree.ElementTree as ET
68
69
import anyconfig.backend.base
70
import anyconfig.compat
71
import anyconfig.mdicts
72
import anyconfig.utils
73
import anyconfig.parser
74
75
76
_TAGS = dict(attrs="@attrs", text="@text", children="@children")
77
_ET_NS_RE = re.compile(r"^{(\S+)}(\S+)$")
78
79
80
def _iterparse(xmlfile):
81
    """
82
    Avoid bug in python 3.{2,3}. See http://bugs.python.org/issue9257.
83
84
    :param xmlfile: XML file or file-like object
85
    """
86
    try:
87
        return ET.iterparse(xmlfile, events=("start-ns", ))
88
    except TypeError:
89
        return ET.iterparse(xmlfile, events=(b"start-ns", ))
90
91
92
def flip(tpl):
93
    """
94
    >>> flip((1, 2))
95
    (2, 1)
96
    """
97
    return (tpl[1], tpl[0])
98
99
100
def _namespaces_from_file(xmlfile):
101
    """
102
    :param xmlfile: XML file or file-like object
103
    :return: {namespace_uri: namespace_prefix} or {}
104
    """
105
    return dict(flip(t) for _, t in _iterparse(xmlfile))
106
107
108
def _tweak_ns(tag, nspaces=None, **options):
109
    """
110
    :param tag: XML tag element
111
    :param nspaces: A namespaces dict, {uri: prefix} or None
112
    :param options: Extra keyword options
113
114
    >>> _tweak_ns("a", {})
115
    'a'
116
    >>> _tweak_ns("a", {"http://example.com/ns/val/": "val"})
117
    'a'
118
    >>> _tweak_ns("{http://example.com/ns/val/}a",
119
    ...           {"http://example.com/ns/val/": "val"})
120
    'val:a'
121
    """
122
    if nspaces:
123
        matched = _ET_NS_RE.match(tag)
124
        if matched:
125
            (uri, tag) = matched.groups()
126
            prefix = nspaces.get(uri, False)
127
            if prefix:
128
                return "%s:%s" % (prefix, tag)
129
130
    return tag
131
132
133
def _dicts_have_unique_keys(dics):
134
    """
135
    :param dics: [<dict or dict-like object>], must not be [] or [{...}]
136
    :return: True if all keys of each dict of `dics` are unique
137
138
    # Enable the followings if to allow dics is [], [{...}]:
139
    # >>> all(_dicts_have_unique_keys([d]) for [d]
140
    # ...     in ({}, {'a': 0}, {'a': 1, 'b': 0}))
141
    # True
142
    # >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2, 'c': 0}])
143
    # True
144
145
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'a': 2}])
146
    False
147
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2}, {'b': 3, 'c': 0}])
148
    False
149
    >>> _dicts_have_unique_keys([{}, {}])
150
    True
151
    """
152
    key_itr = anyconfig.compat.from_iterable(d.keys() for d in dics)
153
    return len(set(key_itr)) == sum(len(d) for d in dics)
154
155
156
def _merge_dicts(dics, to_container=dict):
157
    """
158
    :param dics: [<dict/-like object must not have same keys each other>]
159
    :param to_container: callble to make a container object
160
    :return: <container> object
161
162
    >>> _merge_dicts(({}, ))
163
    {}
164
    >>> _merge_dicts(({'a': 1}, ))
165
    {'a': 1}
166
    >>> sorted(kv for kv in _merge_dicts(({'a': 1}, {'b': 2})).items())
167
    [('a', 1), ('b', 2)]
168
    """
169
    dic_itr = anyconfig.compat.from_iterable(d.items() for d in dics)
170
    return to_container(anyconfig.compat.OrderedDict(dic_itr))
171
172
173
def _parse_text(val, **options):
174
    """
175
    :return: Parsed value or value itself depends on `ac_parse_value`
176
    """
177
    if val and options.get("ac_parse_value", False):
178
        return anyconfig.parser.parse_single(val)
179
    else:
180
        return val
181
182
183
def _process_elem_text(elem, dic, subdic, text="@text", **options):
184
    """
185
    :param elem: ET Element object which has elem.text
186
    :param dic: <container> (dict[-like]) object converted from elem
187
    :param subdic: Sub <container> object converted from elem
188
    :param options:
189
        Keyword options, see the description of :func:`elem_to_container` for
190
        more details.
191
192
    :return: None but updating elem.text, dic and subdic as side effects
193
    """
194
    elem.text = elem.text.strip()
195
    if elem.text:
196
        etext = _parse_text(elem.text, **options)
197
        if len(elem) or elem.attrib:
198
            subdic[text] = etext
199
        else:
200
            dic[elem.tag] = etext  # Only text, e.g. <a>text</a>
201
202
203
def _parse_attrs(elem, to_container=dict, **options):
204
    """
205
    :param elem: ET Element object has attributes (elem.attrib)
206
    :param to_container: callble to make a container object
207
    :return: Parsed value or value itself depends on `ac_parse_value`
208
    """
209
    if options.get("ac_parse_value", False):
210
        return to_container(dict((k, anyconfig.parser.parse_single(v))
211
                                 for k, v in elem.attrib.items()))
212
    else:
213
        return to_container(elem.attrib)
214
215
216
def _process_elem_attrs(elem, dic, subdic, to_container=dict, attrs="@attrs",
217
                        **options):
218
    """
219
    :param elem: ET Element object or None
220
    :param dic: <container> (dict[-like]) object converted from elem
221
    :param subdic: Sub <container> object converted from elem
222
    :param options:
223
        Keyword options, see the description of :func:`elem_to_container` for
224
        more details.
225
226
    :return: None but updating dic and subdic as side effects
227
    """
228
    adic = _parse_attrs(elem, to_container=to_container, **options)
229
    if not elem.text and not len(elem) and options.get("merge_attrs"):
230
        dic[elem.tag] = adic
231
    else:
232
        subdic[attrs] = adic
233
234
235
def _process_children_elems(elem, dic, subdic, to_container=dict,
236
                            children="@children", **options):
237
    """
238
    :param elem: ET Element object or None
239
    :param dic: <container> (dict[-like]) object converted from elem
240
    :param subdic: Sub <container> object converted from elem
241
    :param to_container: callble to make a container object
242
    :param children: Tag for children nodes
243
    :param options:
244
        Keyword options, see the description of :func:`elem_to_container` for
245
        more details.
246
247
    :return: None but updating dic and subdic as side effects
248
    """
249
    cdics = [elem_to_container(c, to_container=to_container, **options)
250
             for c in elem]
251
    merge_attrs = options.get("merge_attrs", False)
252
    sdics = [to_container(elem.attrib) if merge_attrs else subdic] + cdics
253
254
    if _dicts_have_unique_keys(sdics):  # ex. <a><b>1</b><c>c</c></a>
255
        dic[elem.tag] = _merge_dicts(sdics, to_container)
256
    elif not subdic:  # There are no attrs nor text and only these children.
257
        dic[elem.tag] = cdics
258
    else:
259
        subdic[children] = cdics
260
261
262
def elem_to_container(elem, to_container=dict, **options):
263
    """
264
    Convert XML ElementTree Element to a collection of container objects.
265
266
    Elements are transformed to a node under special tagged nodes, attrs, text
267
    and children, to store the type of these elements basically, however, in
268
    some special cases like the followings, these nodes are attached to the
269
    parent node directly for later convenience.
270
271
    - There is only text element
272
    - There are only children elements each has unique keys among all
273
274
    :param elem: ET Element object or None
275
    :param to_container: callble to make a container object
276
    :param options: Keyword options
277
278
        - nspaces: A namespaces dict, {uri: prefix} or None
279
        - attrs, text, children: Tags for special nodes to keep XML info
280
        - merge_attrs: Merge attributes and mix with children nodes, and the
281
          information of attributes are lost after its transformation.
282
    """
283
    dic = to_container()
284
    if elem is None:
285
        return dic
286
287
    subdic = dic[_tweak_ns(elem.tag, **options)] = to_container()
288
    options["to_container"] = to_container
289
290
    if elem.text:
291
        _process_elem_text(elem, dic, subdic, **options)
292
293
    if elem.attrib:
294
        _process_elem_attrs(elem, dic, subdic, **options)
295
296
    if len(elem):
297
        _process_children_elems(elem, dic, subdic, **options)
298
    elif not elem.text and not elem.attrib:  # ex. <tag/>.
299
        dic[elem.tag] = None
300
301
    return dic
302
303
304
def _complement_tag_options(options):
305
    """
306
    :param options: Keyword options :: dict
307
    """
308
    if not all(nt in options for nt in _TAGS.keys()):
309
        tags = options.get("tags", {})
310
        for ntype, tag in _TAGS.items():
311
            options[ntype] = (tags if ntype in tags else _TAGS)[ntype]
312
313
    return options
314
315
316
def root_to_container(root, to_container=dict, nspaces=None, **options):
317
    """
318
    Convert XML ElementTree Root Element to a collection of container objects.
319
320
    :param root: etree root object or None
321
    :param to_container: callble to make a container object
322
    :param nspaces: A namespaces dict, {uri: prefix} or None
323
    :param options: Keyword options,
324
325
        - tags: Dict of tags for special nodes to keep XML info, attributes,
326
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
327
    """
328
    tree = to_container()
329
    if root is None:
330
        return tree
331
332
    if nspaces:
333
        for uri, prefix in nspaces.items():
334
            root.attrib["xmlns:" + prefix if prefix else "xmlns"] = uri
335
336
    return elem_to_container(root, to_container=to_container, nspaces=nspaces,
337
                             **_complement_tag_options(options))
338
339
340
def _elem_from_descendants(children_nodes, **options):
341
    """
342
    :param children_nodes: A list of child dict objects
343
    :param options: Keyword options, see :func:`container_to_etree`
344
    """
345
    for child in children_nodes:  # child should be a dict-like object.
346
        for ckey, cval in anyconfig.compat.iteritems(child):
347
            celem = ET.Element(ckey)
348
            container_to_etree(cval, parent=celem, **options)
349
            yield celem
350
351
352
def _get_or_update_parent(key, val, parent=None, **options):
353
    """
354
    :param key: Key of current child (dict{,-like} object)
355
    :param val: Value of current child (dict{,-like} object or [dict{,...}])
356
    :param parent: XML ElementTree parent node object or None
357
    :param options: Keyword options, see :func:`container_to_etree`
358
    """
359
    elem = ET.Element(key)
360
361
    vals = val if anyconfig.utils.is_iterable(val) else [val]
362
    for val in vals:
363
        container_to_etree(val, parent=elem, **options)
364
365
    if parent is None:  # 'elem' is the top level etree.
366
        return elem
367
    else:
368
        parent.append(elem)
369
        return parent
370
371
372
_ATC = ("attrs", "text", "children")
373
374
375
def container_to_etree(obj, parent=None, **options):
376
    """
377
    Convert a dict-like object to XML ElementTree.
378
379
    :param obj: Container instance to convert to
380
    :param parent: XML ElementTree parent node object or None
381
    :param options: Keyword options,
382
383
        - tags: Dict of tags for special nodes to keep XML info, attributes,
384
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
385
    """
386
    _str = str if options.get("ac_parse_value") else anyconfig.utils.noop
387
388
    if not anyconfig.mdicts.is_dict_like(obj):
389
        obj = False if obj is None else _str(obj)
390
        if parent is not None and obj:
391
            parent.text = obj  # Parent is a leaf text node.
392
        return  # All attributes and text should be set already.
393
394
    options = _complement_tag_options(options)
395
    (attrs, text, children) = operator.itemgetter(*_ATC)(options)
396
397
    for key, val in anyconfig.compat.iteritems(obj):
398
        if key == attrs:
399
            for attr, aval in anyconfig.compat.iteritems(val):
400
                parent.set(attr, _str(aval))
401
        elif key == text:
402
            parent.text = _str(val)
403
        elif key == children:
404
            for celem in _elem_from_descendants(val, **options):
405
                parent.append(celem)
406
        else:
407
            parent = _get_or_update_parent(key, val, parent=parent, **options)
408
409
    return ET.ElementTree(parent)
410
411
412
def etree_write(tree, stream):
413
    """
414
    Write XML ElementTree `root` content into `stream`.
415
416
    .. note:
417
       It seems that ET.ElementTree.write() cannot process a parameter
418
       'xml_declaration' in python 2.6.
419
420
    :param tree: XML ElementTree object
421
    :param stream: File or file-like object can write to
422
    """
423
    if anyconfig.compat.IS_PYTHON_2_6:
424
        tree.write(stream, encoding='UTF-8')
425
    else:
426
        tree.write(stream, encoding='UTF-8', xml_declaration=True)
427
428
429
class Parser(anyconfig.backend.base.ToStreamDumper):
430
    """
431
    Parser for XML files.
432
    """
433
    _type = "xml"
434
    _extensions = ["xml"]
435
    _open_flags = ('rb', 'wb')
436
    _load_opts = _dump_opts = ["tags", "merge_attrs", "ac_parse_value"]
437
438
    def load_from_string(self, content, to_container, **opts):
439
        """
440
        Load config from XML snippet (a string `content`).
441
442
        :param content:
443
            XML snippet string of str (python 2) or bytes (python 3) type
444
        :param to_container: callble to make a container object
445
        :param opts: optional keyword parameters passed to
446
447
        :return: Dict-like object holding config parameters
448
        """
449
        root = ET.fromstring(content)
450
        if anyconfig.compat.IS_PYTHON_3:
451
            stream = BytesIO(content)
452
        else:
453
            stream = anyconfig.compat.StringIO(content)
454
        nspaces = _namespaces_from_file(stream)
455
        return root_to_container(root, to_container=to_container,
456
                                 nspaces=nspaces, **opts)
457
458
    def load_from_path(self, filepath, to_container, **opts):
459
        """
460
        :param filepath: XML file path
461
        :param to_container: callble to make a container object
462
        :param opts: optional keyword parameters to be sanitized
463
464
        :return: Dict-like object holding config parameters
465
        """
466
        root = ET.parse(filepath).getroot()
467
        nspaces = _namespaces_from_file(filepath)
468
        return root_to_container(root, to_container=to_container,
469
                                 nspaces=nspaces, **opts)
470
471
    def load_from_stream(self, stream, to_container, **opts):
472
        """
473
        :param stream: XML file or file-like object
474
        :param to_container: callble to make a container object
475
        :param opts: optional keyword parameters to be sanitized
476
477
        :return: Dict-like object holding config parameters
478
        """
479
        root = ET.parse(stream).getroot()
480
        path = anyconfig.utils.get_path_from_stream(stream)
481
        nspaces = _namespaces_from_file(path)
482
        return root_to_container(root, to_container=to_container,
483
                                 nspaces=nspaces, **opts)
484
485
    def dump_to_string(self, cnf, **opts):
486
        """
487
        :param cnf: Configuration data to dump
488
        :param opts: optional keyword parameters
489
490
        :return: string represents the configuration
491
        """
492
        tree = container_to_etree(cnf, **opts)
493
        buf = BytesIO()
494
        etree_write(tree, buf)
495
        return buf.getvalue()
496
497
    def dump_to_stream(self, cnf, stream, **opts):
498
        """
499
        :param cnf: Configuration data to dump
500
        :param stream: Config file or file like object write to
501
        :param opts: optional keyword parameters
502
        """
503
        tree = container_to_etree(cnf, **opts)
504
        etree_write(tree, stream)
505
506
# vim:sw=4:ts=4:et:
507