Completed
Push — master ( 47c497...529eac )
by Satoru
26s
created

Parser.load_from_string()   A

Complexity

Conditions 2

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 19
rs 9.4285
c 0
b 0
f 0
1
#
2
# Copyright (C) 2011 - 2018 Satoru SATOH <ssato @ redhat.com>
3
# License: MIT
4
#
5
# Some XML modules may be missing and Base.{load,dumps}_impl are not overriden:
6
# pylint: disable=import-error, duplicate-except
7
# len(elem) is necessary to check that ET.Element object has children.
8
# pylint: disable=len-as-condition
9
r"""XML backend:
10
11
- Format to support: XML, e.g. http://www.w3.org/TR/xml11/
12
- Requirements: one of the followings
13
14
  - xml.etree.cElementTree in standard lib if python >= 2.5
15
  - xml.etree.ElementTree in standard lib if python >= 2.5
16
  - elementtree.ElementTree (otherwise)
17
18
- Development Status :: 4 - Beta
19
- Limitations:
20
21
  - special node '@attrs', '@text' and '@children' are used to keep XML
22
    structure of original data. You have to cusomize them with 'tags' keyword
23
    option to avoid any config parameters conflict with some of them.
24
25
  - Some data or structures of original XML file may be lost if make it backed
26
    to XML file; XML file - (anyconfig.load) -> config - (anyconfig.dump) ->
27
    XML file
28
29
  - XML specific features (namespace, etc.) may not be processed correctly.
30
31
- Special Options:
32
33
  - ac_parse_value: Try to parse values, elements' text and attributes.
34
35
  - merge_attrs: Merge attributes and mix with children nodes. Please note that
36
    information of attributes are lost after load if this option is used.
37
38
  - tags: A dict provide special parameter names to distinguish between
39
    attributes, text and children nodes. Default is {"attrs": "@attrs", "text":
40
    "@text", "children": "@children"}.
41
42
Changelog:
43
44
.. versionchanged:: 0.8.2
45
46
   - Add special options, tags, merge_attrs and ac_parse_value
47
   - Remove special option, pprefix which conflicts with another option tags
48
49
.. versionchanged:: 0.8.0
50
51
   - Try to make a nested dict w/o extra dict having keys of attrs, text and
52
     children from XML string/file as much as possible.
53
   - Support namespaces partially.
54
55
.. versionchanged:: 0.1.0
56
57
   - Added XML dump support.
58
"""
59
from __future__ import absolute_import
60
from io import BytesIO
61
62
import operator
63
import re
64
try:
65
    import xml.etree.cElementTree as ET
66
except ImportError:
67
    import xml.etree.ElementTree as ET
68
except ImportError:
69
    import elementtree.ElementTree as ET
70
71
import anyconfig.backend.base
72
import anyconfig.compat
73
import anyconfig.utils
74
import anyconfig.parser
75
76
77
_TAGS = dict(attrs="@attrs", text="@text", children="@children")
78
_ET_NS_RE = re.compile(r"^{(\S+)}(\S+)$")
79
80
81
def _iterparse(xmlfile):
82
    """
83
    Avoid bug in python 3.{2,3}. See http://bugs.python.org/issue9257.
84
85
    :param xmlfile: XML file or file-like object
86
    """
87
    try:
88
        return ET.iterparse(xmlfile, events=("start-ns", ))
89
    except TypeError:
90
        return ET.iterparse(xmlfile, events=(b"start-ns", ))
91
92
93
def flip(tpl):
94
    """
95
    >>> flip((1, 2))
96
    (2, 1)
97
    """
98
    return (tpl[1], tpl[0])
99
100
101
def _namespaces_from_file(xmlfile):
102
    """
103
    :param xmlfile: XML file or file-like object
104
    :return: {namespace_uri: namespace_prefix} or {}
105
    """
106
    return dict(flip(t) for _, t in _iterparse(xmlfile))
107
108
109
def _tweak_ns(tag, **options):
110
    """
111
    :param tag: XML tag element
112
    :param nspaces: A namespaces dict, {uri: prefix}
113
    :param options: Extra keyword options may contain 'nspaces' keyword option
114
        provide a namespace dict, {uri: prefix}
115
116
    >>> _tweak_ns("a", nspaces={})
117
    'a'
118
    >>> _tweak_ns("a", nspaces={"http://example.com/ns/val/": "val"})
119
    'a'
120
    >>> _tweak_ns("{http://example.com/ns/val/}a",
121
    ...           nspaces={"http://example.com/ns/val/": "val"})
122
    'val:a'
123
    """
124
    nspaces = options.get("nspaces", None)
125
    if nspaces is not None:
126
        matched = _ET_NS_RE.match(tag)
127
        if matched:
128
            (uri, tag) = matched.groups()
129
            prefix = nspaces.get(uri, False)
130
            if prefix:
131
                return "%s:%s" % (prefix, tag)
132
133
    return tag
134
135
136
def _dicts_have_unique_keys(dics):
137
    """
138
    :param dics: [<dict or dict-like object>], must not be [] or [{...}]
139
    :return: True if all keys of each dict of `dics` are unique
140
141
    # Enable the followings if to allow dics is [], [{...}]:
142
    # >>> all(_dicts_have_unique_keys([d]) for [d]
143
    # ...     in ({}, {'a': 0}, {'a': 1, 'b': 0}))
144
    # True
145
    # >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2, 'c': 0}])
146
    # True
147
148
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'a': 2}])
149
    False
150
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2}, {'b': 3, 'c': 0}])
151
    False
152
    >>> _dicts_have_unique_keys([{}, {}])
153
    True
154
    """
155
    key_itr = anyconfig.compat.from_iterable(d.keys() for d in dics)
156
    return len(set(key_itr)) == sum(len(d) for d in dics)
157
158
159
def _merge_dicts(dics, container=dict):
160
    """
161
    :param dics: [<dict/-like object must not have same keys each other>]
162
    :param container: callble to make a container object
163
    :return: <container> object
164
165
    >>> _merge_dicts(({}, ))
166
    {}
167
    >>> _merge_dicts(({'a': 1}, ))
168
    {'a': 1}
169
    >>> sorted(kv for kv in _merge_dicts(({'a': 1}, {'b': 2})).items())
170
    [('a', 1), ('b', 2)]
171
    """
172
    dic_itr = anyconfig.compat.from_iterable(d.items() for d in dics)
173
    return container(anyconfig.compat.OrderedDict(dic_itr))
174
175
176
def _parse_text(val, **options):
177
    """
178
    :return: Parsed value or value itself depends on `ac_parse_value`
179
    """
180
    if val and options.get("ac_parse_value", False):
181
        return anyconfig.parser.parse_single(val)
182
183
    return val
184
185
186
def _process_elem_text(elem, dic, subdic, text="@text", **options):
187
    """
188
    :param elem: ET Element object which has elem.text
189
    :param dic: <container> (dict[-like]) object converted from elem
190
    :param subdic: Sub <container> object converted from elem
191
    :param options:
192
        Keyword options, see the description of :func:`elem_to_container` for
193
        more details.
194
195
    :return: None but updating elem.text, dic and subdic as side effects
196
    """
197
    elem.text = elem.text.strip()
198
    if elem.text:
199
        etext = _parse_text(elem.text, **options)
200
        if len(elem) or elem.attrib:
201
            subdic[text] = etext
202
        else:
203
            dic[elem.tag] = etext  # Only text, e.g. <a>text</a>
204
205
206
def _parse_attrs(elem, container=dict, **options):
207
    """
208
    :param elem: ET Element object has attributes (elem.attrib)
209
    :param container: callble to make a container object
210
    :return: Parsed value or value itself depends on `ac_parse_value`
211
    """
212
    adic = dict((_tweak_ns(a, **options), v) for a, v in elem.attrib.items())
213
    if options.get("ac_parse_value", False):
214
        return container(dict((k, anyconfig.parser.parse_single(v))
215
                              for k, v in adic.items()))
216
217
    return container(adic)
218
219
220
def _process_elem_attrs(elem, dic, subdic, container=dict, attrs="@attrs",
221
                        **options):
222
    """
223
    :param elem: ET Element object or None
224
    :param dic: <container> (dict[-like]) object converted from elem
225
    :param subdic: Sub <container> object converted from elem
226
    :param options:
227
        Keyword options, see the description of :func:`elem_to_container` for
228
        more details.
229
230
    :return: None but updating dic and subdic as side effects
231
    """
232
    adic = _parse_attrs(elem, container=container, **options)
233
    if not elem.text and not len(elem) and options.get("merge_attrs"):
234
        dic[elem.tag] = adic
235
    else:
236
        subdic[attrs] = adic
237
238
239
def _process_children_elems(elem, dic, subdic, container=dict,
240
                            children="@children", **options):
241
    """
242
    :param elem: ET Element object or None
243
    :param dic: <container> (dict[-like]) object converted from elem
244
    :param subdic: Sub <container> object converted from elem
245
    :param container: callble to make a container object
246
    :param children: Tag for children nodes
247
    :param options:
248
        Keyword options, see the description of :func:`elem_to_container` for
249
        more details.
250
251
    :return: None but updating dic and subdic as side effects
252
    """
253
    cdics = [elem_to_container(c, container=container, **options)
254
             for c in elem]
255
    merge_attrs = options.get("merge_attrs", False)
256
    sdics = [container(elem.attrib) if merge_attrs else subdic] + cdics
257
258
    if _dicts_have_unique_keys(sdics):  # ex. <a><b>1</b><c>c</c></a>
259
        dic[elem.tag] = _merge_dicts(sdics, container)
260
    elif not subdic:  # There are no attrs nor text and only these children.
261
        dic[elem.tag] = cdics
262
    else:
263
        subdic[children] = cdics
264
265
266
def elem_to_container(elem, container=dict, **options):
267
    """
268
    Convert XML ElementTree Element to a collection of container objects.
269
270
    Elements are transformed to a node under special tagged nodes, attrs, text
271
    and children, to store the type of these elements basically, however, in
272
    some special cases like the followings, these nodes are attached to the
273
    parent node directly for later convenience.
274
275
    - There is only text element
276
    - There are only children elements each has unique keys among all
277
278
    :param elem: ET Element object or None
279
    :param container: callble to make a container object
280
    :param options: Keyword options
281
282
        - nspaces: A namespaces dict, {uri: prefix} or None
283
        - attrs, text, children: Tags for special nodes to keep XML info
284
        - merge_attrs: Merge attributes and mix with children nodes, and the
285
          information of attributes are lost after its transformation.
286
    """
287
    dic = container()
288
    if elem is None:
289
        return dic
290
291
    elem.tag = _tweak_ns(elem.tag, **options)  # {ns}tag -> ns_prefix:tag
292
    subdic = dic[elem.tag] = container()
293
    options["container"] = container
294
295
    if elem.text:
296
        _process_elem_text(elem, dic, subdic, **options)
297
298
    if elem.attrib:
299
        _process_elem_attrs(elem, dic, subdic, **options)
300
301
    if len(elem):
302
        _process_children_elems(elem, dic, subdic, **options)
303
    elif not elem.text and not elem.attrib:  # ex. <tag/>.
304
        dic[elem.tag] = None
305
306
    return dic
307
308
309
def _complement_tag_options(options):
310
    """
311
    :param options: Keyword options :: dict
312
313
    >>> ref = _TAGS.copy()
314
    >>> ref["text"] = "#text"
315
    >>> opts = _complement_tag_options({"tags": {"text": ref["text"]}})
316
    >>> del opts["tags"]  # To simplify comparison.
317
    >>> sorted(opts.items())
318
    [('attrs', '@attrs'), ('children', '@children'), ('text', '#text')]
319
    """
320
    if not all(nt in options for nt in _TAGS):
321
        tags = options.get("tags", {})
322
        for ntype, tag in _TAGS.items():
323
            options[ntype] = tags.get(ntype, tag)
324
325
    return options
326
327
328
def root_to_container(root, container=dict, nspaces=None, **options):
329
    """
330
    Convert XML ElementTree Root Element to a collection of container objects.
331
332
    :param root: etree root object or None
333
    :param container: callble to make a container object
334
    :param nspaces: A namespaces dict, {uri: prefix} or None
335
    :param options: Keyword options,
336
337
        - tags: Dict of tags for special nodes to keep XML info, attributes,
338
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
339
    """
340
    tree = container()
341
    if root is None:
342
        return tree
343
344
    if nspaces is not None:
345
        for uri, prefix in nspaces.items():
346
            root.attrib["xmlns:" + prefix if prefix else "xmlns"] = uri
347
348
    return elem_to_container(root, container=container, nspaces=nspaces,
349
                             **_complement_tag_options(options))
350
351
352
def _to_str_fn(**options):
353
    """
354
    :param options: Keyword options might have 'ac_parse_value' key
355
    :param to_str: Callable to convert value to string
356
    """
357
    return str if options.get("ac_parse_value") else anyconfig.utils.noop
358
359
360
def _elem_set_attrs(obj, parent, to_str):
361
    """
362
    :param obj: Container instance gives attributes of XML Element
363
    :param parent: XML ElementTree parent node object
364
    :param to_str: Callable to convert value to string or None
365
    :param options: Keyword options, see :func:`container_to_etree`
366
367
    :return: None but parent will be modified
368
    """
369
    for attr, val in anyconfig.compat.iteritems(obj):
370
        parent.set(attr, to_str(val))
371
372
373
def _elem_from_descendants(children_nodes, **options):
374
    """
375
    :param children_nodes: A list of child dict objects
376
    :param options: Keyword options, see :func:`container_to_etree`
377
    """
378
    for child in children_nodes:  # child should be a dict-like object.
379
        for ckey, cval in anyconfig.compat.iteritems(child):
380
            celem = ET.Element(ckey)
381
            container_to_etree(cval, parent=celem, **options)
382
            yield celem
383
384
385
def _get_or_update_parent(key, val, to_str, parent=None, **options):
386
    """
387
    :param key: Key of current child (dict{,-like} object)
388
    :param val: Value of current child (dict{,-like} object or [dict{,...}])
389
    :param to_str: Callable to convert value to string
390
    :param parent: XML ElementTree parent node object or None
391
    :param options: Keyword options, see :func:`container_to_etree`
392
    """
393
    elem = ET.Element(key)
394
395
    vals = val if anyconfig.utils.is_iterable(val) else [val]
396
    for val_ in vals:
397
        container_to_etree(val_, parent=elem, to_str=to_str, **options)
398
399
    if parent is None:  # 'elem' is the top level etree.
400
        return elem
401
402
    parent.append(elem)
403
    return parent
404
405
406
_ATC = ("attrs", "text", "children")
407
408
409
def container_to_etree(obj, parent=None, to_str=None, **options):
410
    """
411
    Convert a dict-like object to XML ElementTree.
412
413
    :param obj: Container instance to convert to
414
    :param parent: XML ElementTree parent node object or None
415
    :param to_str: Callable to convert value to string or None
416
    :param options: Keyword options,
417
418
        - tags: Dict of tags for special nodes to keep XML info, attributes,
419
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
420
    """
421
    if to_str is None:
422
        to_str = _to_str_fn(**options)
423
424
    if not anyconfig.utils.is_dict_like(obj):
425
        if parent is not None and obj:
426
            parent.text = to_str(obj)  # Parent is a leaf text node.
427
        return parent  # All attributes and text should be set already.
428
429
    options = _complement_tag_options(options)
430
    (attrs, text, children) = operator.itemgetter(*_ATC)(options)
431
432
    for key, val in anyconfig.compat.iteritems(obj):
433
        if key == attrs:
434
            _elem_set_attrs(val, parent, to_str)
435
        elif key == text:
436
            parent.text = to_str(val)
437
        elif key == children:
438
            for celem in _elem_from_descendants(val, **options):
439
                parent.append(celem)
440
        else:
441
            parent = _get_or_update_parent(key, val, to_str, parent=parent,
442
                                           **options)
443
444
    return ET.ElementTree(parent)
445
446
447
def etree_write(tree, stream):
448
    """
449
    Write XML ElementTree `root` content into `stream`.
450
451
    .. note:
452
       It seems that ET.ElementTree.write() cannot process a parameter
453
       'xml_declaration' in python 2.6.
454
455
    :param tree: XML ElementTree object
456
    :param stream: File or file-like object can write to
457
    """
458
    if anyconfig.compat.IS_PYTHON_2_6:
459
        tree.write(stream, encoding='UTF-8')
460
    else:
461
        tree.write(stream, encoding='UTF-8', xml_declaration=True)
462
463
464
class Parser(anyconfig.backend.base.Parser,
465
             anyconfig.backend.base.ToStreamDumperMixin,
466
             anyconfig.backend.base.BinaryFilesMixin):
467
    """
468
    Parser for XML files.
469
    """
470
    _type = "xml"
471
    _extensions = ["xml"]
472
    _load_opts = _dump_opts = ["tags", "merge_attrs", "ac_parse_value"]
473
    _ordered = True
474
    _dict_opts = ["ac_dict"]
475
476
    def load_from_string(self, content, container, **opts):
477
        """
478
        Load config from XML snippet (a string `content`).
479
480
        :param content:
481
            XML snippet string of str (python 2) or bytes (python 3) type
482
        :param container: callble to make a container object
483
        :param opts: optional keyword parameters passed to
484
485
        :return: Dict-like object holding config parameters
486
        """
487
        root = ET.fromstring(content)
488
        if anyconfig.compat.IS_PYTHON_3:
489
            stream = BytesIO(content)
490
        else:
491
            stream = anyconfig.compat.StringIO(content)
492
        nspaces = _namespaces_from_file(stream)
493
        return root_to_container(root, container=container,
494
                                 nspaces=nspaces, **opts)
495
496
    def load_from_path(self, filepath, container, **opts):
497
        """
498
        :param filepath: XML file path
499
        :param container: callble to make a container object
500
        :param opts: optional keyword parameters to be sanitized
501
502
        :return: Dict-like object holding config parameters
503
        """
504
        root = ET.parse(filepath).getroot()
505
        nspaces = _namespaces_from_file(filepath)
506
        return root_to_container(root, container=container,
507
                                 nspaces=nspaces, **opts)
508
509
    def load_from_stream(self, stream, container, **opts):
510
        """
511
        :param stream: XML file or file-like object
512
        :param container: callble to make a container object
513
        :param opts: optional keyword parameters to be sanitized
514
515
        :return: Dict-like object holding config parameters
516
        """
517
        root = ET.parse(stream).getroot()
518
        path = anyconfig.utils.get_path_from_stream(stream)
519
        nspaces = _namespaces_from_file(path)
520
        return root_to_container(root, container=container,
521
                                 nspaces=nspaces, **opts)
522
523
    def dump_to_string(self, cnf, **opts):
524
        """
525
        :param cnf: Configuration data to dump
526
        :param opts: optional keyword parameters
527
528
        :return: string represents the configuration
529
        """
530
        tree = container_to_etree(cnf, **opts)
531
        buf = BytesIO()
532
        etree_write(tree, buf)
533
        return buf.getvalue()
534
535
    def dump_to_stream(self, cnf, stream, **opts):
536
        """
537
        :param cnf: Configuration data to dump
538
        :param stream: Config file or file like object write to
539
        :param opts: optional keyword parameters
540
        """
541
        tree = container_to_etree(cnf, **opts)
542
        etree_write(tree, stream)
543
544
# vim:sw=4:ts=4:et:
545