Completed
Push — master ( c1cf5e...538be5 )
by Satoru
01:07
created

_to_str_fn()   A

Complexity

Conditions 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 6
rs 9.4285
1
#
2
# Copyright (C) 2011 - 2017 Satoru SATOH <ssato @ redhat.com>
3
# License: MIT
4
#
5
# Some XML modules may be missing and Base.{load,dumps}_impl are not overriden:
6
# pylint: disable=import-error, duplicate-except
7
r"""XML backend:
8
9
- Format to support: XML, e.g. http://www.w3.org/TR/xml11/
10
- Requirements: one of the followings
11
12
  - xml.etree.cElementTree in standard lib if python >= 2.5
13
  - xml.etree.ElementTree in standard lib if python >= 2.5
14
  - elementtree.ElementTree (otherwise)
15
16
- Development Status :: 4 - Beta
17
- Limitations:
18
19
  - special node '@attrs', '@text' and '@children' are used to keep XML
20
    structure of original data. You have to cusomize them with 'tags' keyword
21
    option to avoid any config parameters conflict with some of them.
22
23
  - Some data or structures of original XML file may be lost if make it backed
24
    to XML file; XML file - (anyconfig.load) -> config - (anyconfig.dump) ->
25
    XML file
26
27
  - XML specific features (namespace, etc.) may not be processed correctly.
28
29
- Special Options:
30
31
  - ac_parse_value: Try to parse values, elements' text and attributes.
32
33
  - merge_attrs: Merge attributes and mix with children nodes. Please note that
34
    information of attributes are lost after load if this option is used.
35
36
  - tags: A dict provide special parameter names to distinguish between
37
    attributes, text and children nodes. Default is {"attrs": "@attrs", "text":
38
    "@text", "children": "@children"}.
39
40
Changelog:
41
42
.. versionchanged:: 0.8.2
43
44
   - Add special options, tags, merge_attrs and ac_parse_value
45
   - Remove special option, pprefix which conflicts with another option tags
46
47
.. versionchanged:: 0.8.0
48
49
   - Try to make a nested dict w/o extra dict having keys of attrs, text and
50
     children from XML string/file as much as possible.
51
   - Support namespaces partially.
52
53
.. versionchanged:: 0.1.0
54
55
   - Added XML dump support.
56
"""
57
from __future__ import absolute_import
58
from io import BytesIO
59
60
import operator
61
import re
62
try:
63
    import xml.etree.cElementTree as ET
64
except ImportError:
65
    import xml.etree.ElementTree as ET
66
except ImportError:
67
    import elementtree.ElementTree as ET
68
69
import anyconfig.backend.base
70
import anyconfig.compat
71
import anyconfig.utils
72
import anyconfig.parser
73
74
75
_TAGS = dict(attrs="@attrs", text="@text", children="@children")
76
_ET_NS_RE = re.compile(r"^{(\S+)}(\S+)$")
77
78
79
def _iterparse(xmlfile):
80
    """
81
    Avoid bug in python 3.{2,3}. See http://bugs.python.org/issue9257.
82
83
    :param xmlfile: XML file or file-like object
84
    """
85
    try:
86
        return ET.iterparse(xmlfile, events=("start-ns", ))
87
    except TypeError:
88
        return ET.iterparse(xmlfile, events=(b"start-ns", ))
89
90
91
def flip(tpl):
92
    """
93
    >>> flip((1, 2))
94
    (2, 1)
95
    """
96
    return (tpl[1], tpl[0])
97
98
99
def _namespaces_from_file(xmlfile):
100
    """
101
    :param xmlfile: XML file or file-like object
102
    :return: {namespace_uri: namespace_prefix} or {}
103
    """
104
    return dict(flip(t) for _, t in _iterparse(xmlfile))
105
106
107
def _tweak_ns(tag, nspaces=None, **options):
108
    """
109
    :param tag: XML tag element
110
    :param nspaces: A namespaces dict, {uri: prefix} or None
111
    :param options: Extra keyword options
112
113
    >>> _tweak_ns("a", {})
114
    'a'
115
    >>> _tweak_ns("a", {"http://example.com/ns/val/": "val"})
116
    'a'
117
    >>> _tweak_ns("{http://example.com/ns/val/}a",
118
    ...           {"http://example.com/ns/val/": "val"})
119
    'val:a'
120
    """
121
    if nspaces:
122
        matched = _ET_NS_RE.match(tag)
123
        if matched:
124
            (uri, tag) = matched.groups()
125
            prefix = nspaces.get(uri, False)
126
            if prefix:
127
                return "%s:%s" % (prefix, tag)
128
129
    return tag
130
131
132
def _dicts_have_unique_keys(dics):
133
    """
134
    :param dics: [<dict or dict-like object>], must not be [] or [{...}]
135
    :return: True if all keys of each dict of `dics` are unique
136
137
    # Enable the followings if to allow dics is [], [{...}]:
138
    # >>> all(_dicts_have_unique_keys([d]) for [d]
139
    # ...     in ({}, {'a': 0}, {'a': 1, 'b': 0}))
140
    # True
141
    # >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2, 'c': 0}])
142
    # True
143
144
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'a': 2}])
145
    False
146
    >>> _dicts_have_unique_keys([{}, {'a': 1}, {'b': 2}, {'b': 3, 'c': 0}])
147
    False
148
    >>> _dicts_have_unique_keys([{}, {}])
149
    True
150
    """
151
    key_itr = anyconfig.compat.from_iterable(d.keys() for d in dics)
152
    return len(set(key_itr)) == sum(len(d) for d in dics)
153
154
155
def _merge_dicts(dics, container=dict):
156
    """
157
    :param dics: [<dict/-like object must not have same keys each other>]
158
    :param container: callble to make a container object
159
    :return: <container> object
160
161
    >>> _merge_dicts(({}, ))
162
    {}
163
    >>> _merge_dicts(({'a': 1}, ))
164
    {'a': 1}
165
    >>> sorted(kv for kv in _merge_dicts(({'a': 1}, {'b': 2})).items())
166
    [('a', 1), ('b', 2)]
167
    """
168
    dic_itr = anyconfig.compat.from_iterable(d.items() for d in dics)
169
    return container(anyconfig.compat.OrderedDict(dic_itr))
170
171
172
def _parse_text(val, **options):
173
    """
174
    :return: Parsed value or value itself depends on `ac_parse_value`
175
    """
176
    if val and options.get("ac_parse_value", False):
177
        return anyconfig.parser.parse_single(val)
178
    else:
179
        return val
180
181
182
def _process_elem_text(elem, dic, subdic, text="@text", **options):
183
    """
184
    :param elem: ET Element object which has elem.text
185
    :param dic: <container> (dict[-like]) object converted from elem
186
    :param subdic: Sub <container> object converted from elem
187
    :param options:
188
        Keyword options, see the description of :func:`elem_to_container` for
189
        more details.
190
191
    :return: None but updating elem.text, dic and subdic as side effects
192
    """
193
    elem.text = elem.text.strip()
194
    if elem.text:
195
        etext = _parse_text(elem.text, **options)
196
        if len(elem) or elem.attrib:
197
            subdic[text] = etext
198
        else:
199
            dic[elem.tag] = etext  # Only text, e.g. <a>text</a>
200
201
202
def _parse_attrs(elem, container=dict, **options):
203
    """
204
    :param elem: ET Element object has attributes (elem.attrib)
205
    :param container: callble to make a container object
206
    :return: Parsed value or value itself depends on `ac_parse_value`
207
    """
208
    adic = dict((_tweak_ns(a, **options), v) for a, v in elem.attrib.items())
209
    if options.get("ac_parse_value", False):
210
        return container(dict((k, anyconfig.parser.parse_single(v))
211
                              for k, v in adic.items()))
212
    else:
213
        return container(adic)
214
215
216
def _process_elem_attrs(elem, dic, subdic, container=dict, attrs="@attrs",
217
                        **options):
218
    """
219
    :param elem: ET Element object or None
220
    :param dic: <container> (dict[-like]) object converted from elem
221
    :param subdic: Sub <container> object converted from elem
222
    :param options:
223
        Keyword options, see the description of :func:`elem_to_container` for
224
        more details.
225
226
    :return: None but updating dic and subdic as side effects
227
    """
228
    adic = _parse_attrs(elem, container=container, **options)
229
    if not elem.text and not len(elem) and options.get("merge_attrs"):
230
        dic[elem.tag] = adic
231
    else:
232
        subdic[attrs] = adic
233
234
235
def _process_children_elems(elem, dic, subdic, container=dict,
236
                            children="@children", **options):
237
    """
238
    :param elem: ET Element object or None
239
    :param dic: <container> (dict[-like]) object converted from elem
240
    :param subdic: Sub <container> object converted from elem
241
    :param container: callble to make a container object
242
    :param children: Tag for children nodes
243
    :param options:
244
        Keyword options, see the description of :func:`elem_to_container` for
245
        more details.
246
247
    :return: None but updating dic and subdic as side effects
248
    """
249
    cdics = [elem_to_container(c, container=container, **options)
250
             for c in elem]
251
    merge_attrs = options.get("merge_attrs", False)
252
    sdics = [container(elem.attrib) if merge_attrs else subdic] + cdics
253
254
    if _dicts_have_unique_keys(sdics):  # ex. <a><b>1</b><c>c</c></a>
255
        dic[elem.tag] = _merge_dicts(sdics, container)
256
    elif not subdic:  # There are no attrs nor text and only these children.
257
        dic[elem.tag] = cdics
258
    else:
259
        subdic[children] = cdics
260
261
262
def elem_to_container(elem, container=dict, **options):
263
    """
264
    Convert XML ElementTree Element to a collection of container objects.
265
266
    Elements are transformed to a node under special tagged nodes, attrs, text
267
    and children, to store the type of these elements basically, however, in
268
    some special cases like the followings, these nodes are attached to the
269
    parent node directly for later convenience.
270
271
    - There is only text element
272
    - There are only children elements each has unique keys among all
273
274
    :param elem: ET Element object or None
275
    :param container: callble to make a container object
276
    :param options: Keyword options
277
278
        - nspaces: A namespaces dict, {uri: prefix} or None
279
        - attrs, text, children: Tags for special nodes to keep XML info
280
        - merge_attrs: Merge attributes and mix with children nodes, and the
281
          information of attributes are lost after its transformation.
282
    """
283
    dic = container()
284
    if elem is None:
285
        return dic
286
287
    elem.tag = _tweak_ns(elem.tag, **options)  # {ns}tag -> ns_prefix:tag
288
    subdic = dic[elem.tag] = container()
289
    options["container"] = container
290
291
    if elem.text:
292
        _process_elem_text(elem, dic, subdic, **options)
293
294
    if elem.attrib:
295
        _process_elem_attrs(elem, dic, subdic, **options)
296
297
    if len(elem):
298
        _process_children_elems(elem, dic, subdic, **options)
299
    elif not elem.text and not elem.attrib:  # ex. <tag/>.
300
        dic[elem.tag] = None
301
302
    return dic
303
304
305
def _complement_tag_options(options):
306
    """
307
    :param options: Keyword options :: dict
308
309
    >>> ref = _TAGS.copy()
310
    >>> ref["text"] = "#text"
311
    >>> opts = _complement_tag_options({"tags": {"text": ref["text"]}})
312
    >>> del opts["tags"]  # To simplify comparison.
313
    >>> sorted(opts.items())
314
    [('attrs', '@attrs'), ('children', '@children'), ('text', '#text')]
315
    """
316
    if not all(nt in options for nt in _TAGS.keys()):
317
        tags = options.get("tags", {})
318
        for ntype, tag in _TAGS.items():
319
            options[ntype] = tags.get(ntype, tag)
320
321
    return options
322
323
324
def root_to_container(root, container=dict, nspaces=None, **options):
325
    """
326
    Convert XML ElementTree Root Element to a collection of container objects.
327
328
    :param root: etree root object or None
329
    :param container: callble to make a container object
330
    :param nspaces: A namespaces dict, {uri: prefix} or None
331
    :param options: Keyword options,
332
333
        - tags: Dict of tags for special nodes to keep XML info, attributes,
334
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
335
    """
336
    tree = container()
337
    if root is None:
338
        return tree
339
340
    if nspaces:
341
        for uri, prefix in nspaces.items():
342
            root.attrib["xmlns:" + prefix if prefix else "xmlns"] = uri
343
344
    return elem_to_container(root, container=container, nspaces=nspaces,
345
                             **_complement_tag_options(options))
346
347
348
def _to_str_fn(**options):
349
    """
350
    :param options: Keyword options might have 'ac_parse_value' key
351
    :param to_str: Callable to convert value to string
352
    """
353
    return str if options.get("ac_parse_value") else anyconfig.utils.noop
354
355
356
def _elem_set_attrs(obj, parent, to_str):
357
    """
358
    :param obj: Container instance gives attributes of XML Element
359
    :param parent: XML ElementTree parent node object
360
    :param to_str: Callable to convert value to string or None
361
    :param options: Keyword options, see :func:`container_to_etree`
362
363
    :return: None but parent will be modified
364
    """
365
    for attr, val in anyconfig.compat.iteritems(obj):
366
        parent.set(attr, to_str(val))
367
368
369
def _elem_from_descendants(children_nodes, **options):
370
    """
371
    :param children_nodes: A list of child dict objects
372
    :param options: Keyword options, see :func:`container_to_etree`
373
    """
374
    for child in children_nodes:  # child should be a dict-like object.
375
        for ckey, cval in anyconfig.compat.iteritems(child):
376
            celem = ET.Element(ckey)
377
            container_to_etree(cval, parent=celem, **options)
378
            yield celem
379
380
381
def _get_or_update_parent(key, val, to_str, parent=None, **options):
382
    """
383
    :param key: Key of current child (dict{,-like} object)
384
    :param val: Value of current child (dict{,-like} object or [dict{,...}])
385
    :param to_str: Callable to convert value to string
386
    :param parent: XML ElementTree parent node object or None
387
    :param options: Keyword options, see :func:`container_to_etree`
388
    """
389
    elem = ET.Element(key)
390
391
    vals = val if anyconfig.utils.is_iterable(val) else [val]
392
    for val in vals:
393
        container_to_etree(val, parent=elem, to_str=to_str, **options)
394
395
    if parent is None:  # 'elem' is the top level etree.
396
        return elem
397
    else:
398
        parent.append(elem)
399
        return parent
400
401
402
_ATC = ("attrs", "text", "children")
403
404
405
def container_to_etree(obj, parent=None, to_str=None, **options):
406
    """
407
    Convert a dict-like object to XML ElementTree.
408
409
    :param obj: Container instance to convert to
410
    :param parent: XML ElementTree parent node object or None
411
    :param to_str: Callable to convert value to string or None
412
    :param options: Keyword options,
413
414
        - tags: Dict of tags for special nodes to keep XML info, attributes,
415
          text and children nodes, e.g. {"attrs": "@attrs", "text": "#text"}
416
    """
417
    if to_str is None:
418
        to_str = _to_str_fn(**options)
419
420
    if not anyconfig.utils.is_dict_like(obj):
421
        obj = False if obj is None else to_str(obj)
422
        if parent is not None and obj:
423
            parent.text = obj  # Parent is a leaf text node.
424
        return  # All attributes and text should be set already.
425
426
    options = _complement_tag_options(options)
427
    (attrs, text, children) = operator.itemgetter(*_ATC)(options)
428
429
    for key, val in anyconfig.compat.iteritems(obj):
430
        if key == attrs:
431
            _elem_set_attrs(val, parent, to_str)
432
        elif key == text:
433
            parent.text = to_str(val)
434
        elif key == children:
435
            for celem in _elem_from_descendants(val, **options):
436
                parent.append(celem)
437
        else:
438
            parent = _get_or_update_parent(key, val, to_str, parent=parent,
439
                                           **options)
440
441
    return ET.ElementTree(parent)
442
443
444
def etree_write(tree, stream):
445
    """
446
    Write XML ElementTree `root` content into `stream`.
447
448
    .. note:
449
       It seems that ET.ElementTree.write() cannot process a parameter
450
       'xml_declaration' in python 2.6.
451
452
    :param tree: XML ElementTree object
453
    :param stream: File or file-like object can write to
454
    """
455
    if anyconfig.compat.IS_PYTHON_2_6:
456
        tree.write(stream, encoding='UTF-8')
457
    else:
458
        tree.write(stream, encoding='UTF-8', xml_declaration=True)
459
460
461
class Parser(anyconfig.backend.base.ToStreamDumper,
462
             anyconfig.backend.base.BinaryFilesMixin):
463
    """
464
    Parser for XML files.
465
    """
466
    _type = "xml"
467
    _extensions = ["xml"]
468
    _load_opts = _dump_opts = ["tags", "merge_attrs", "ac_parse_value"]
469
    _ordered = True
470
    _dict_opts = ["ac_dict"]
471
472
    def load_from_string(self, content, container, **opts):
473
        """
474
        Load config from XML snippet (a string `content`).
475
476
        :param content:
477
            XML snippet string of str (python 2) or bytes (python 3) type
478
        :param container: callble to make a container object
479
        :param opts: optional keyword parameters passed to
480
481
        :return: Dict-like object holding config parameters
482
        """
483
        root = ET.fromstring(content)
484
        if anyconfig.compat.IS_PYTHON_3:
485
            stream = BytesIO(content)
486
        else:
487
            stream = anyconfig.compat.StringIO(content)
488
        nspaces = _namespaces_from_file(stream)
489
        return root_to_container(root, container=container,
490
                                 nspaces=nspaces, **opts)
491
492
    def load_from_path(self, filepath, container, **opts):
493
        """
494
        :param filepath: XML file path
495
        :param container: callble to make a container object
496
        :param opts: optional keyword parameters to be sanitized
497
498
        :return: Dict-like object holding config parameters
499
        """
500
        root = ET.parse(filepath).getroot()
501
        nspaces = _namespaces_from_file(filepath)
502
        return root_to_container(root, container=container,
503
                                 nspaces=nspaces, **opts)
504
505
    def load_from_stream(self, stream, container, **opts):
506
        """
507
        :param stream: XML file or file-like object
508
        :param container: callble to make a container object
509
        :param opts: optional keyword parameters to be sanitized
510
511
        :return: Dict-like object holding config parameters
512
        """
513
        root = ET.parse(stream).getroot()
514
        path = anyconfig.utils.get_path_from_stream(stream)
515
        nspaces = _namespaces_from_file(path)
516
        return root_to_container(root, container=container,
517
                                 nspaces=nspaces, **opts)
518
519
    def dump_to_string(self, cnf, **opts):
520
        """
521
        :param cnf: Configuration data to dump
522
        :param opts: optional keyword parameters
523
524
        :return: string represents the configuration
525
        """
526
        tree = container_to_etree(cnf, **opts)
527
        buf = BytesIO()
528
        etree_write(tree, buf)
529
        return buf.getvalue()
530
531
    def dump_to_stream(self, cnf, stream, **opts):
532
        """
533
        :param cnf: Configuration data to dump
534
        :param stream: Config file or file like object write to
535
        :param opts: optional keyword parameters
536
        """
537
        tree = container_to_etree(cnf, **opts)
538
        etree_write(tree, stream)
539
540
# vim:sw=4:ts=4:et:
541