Completed
Push — master ( 0a8a0f...9c0acb )
by Satoru
01:01
created

_elem_strip_text()   A

Complexity

Conditions 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
dl 0
loc 6
rs 9.4285
c 1
b 0
f 0
1
#
2
# Copyright (C) 2011 - 2017 Satoru SATOH <ssato @ redhat.com>
3
# License: MIT
4
#
5
# Some XML modules may be missing and Base.{load,dumps}_impl are not overriden:
6
# pylint: disable=import-error
7
"""XML files parser backend, should be available always.
8
9
- Format to support: XML, e.g. http://www.w3.org/TR/xml11/
10
- Requirements: one of the followings
11
12
  - lxml2.etree if available
13
  - xml.etree.ElementTree in standard lib if python >= 2.5
14
  - elementtree.ElementTree (otherwise)
15
16
- Development Status: 3 - Alpha
17
- Limitations:
18
19
  - '<prefix>attrs', '<prefix>text' and '<prefix>children' are used as special
20
    parameter to keep XML structure of original data. You have to cusomize
21
    <prefix> (default: '@') if any config parameters conflict with some of
22
    them.
23
24
  - Some data or structures of original XML file may be lost if make it backed
25
    to XML file; XML file - (anyconfig.load) -> config - (anyconfig.dump) ->
26
    XML file
27
28
  - XML specific features (namespace, etc.) may not be processed correctly.
29
30
- Special Options:
31
  - pprefix: Specify parameter prefix for attributes, text and children nodes.
32
33
History:
34
35
.. versionchanged:: 0.7.99
36
37
   - Try to make a nested dict w/o extra dict having keys of attrs, text and
38
     children from XML string/file as much as possible.
39
   - Support namespaces partially.
40
41
.. versionchanged:: 0.1.0
42
43
   - Added XML dump support.
44
"""
45
from __future__ import absolute_import
46
from io import BytesIO
47
48
import re
49
try:
50
    # First, try lxml which is compatible with elementtree and looks faster a
51
    # lot. See also: http://getpython3.com/diveintopython3/xml.html
52
    from lxml2 import etree as ET
53
except ImportError:
54
    try:
55
        import xml.etree.cElementTree as ET
56
    except ImportError:
57
        import xml.etree.ElementTree as ET
58
    except ImportError:
59
        import elementtree.ElementTree as ET
60
61
import anyconfig.backend.base
62
import anyconfig.compat
63
import anyconfig.mdicts
64
import anyconfig.utils
65
66
67
_PREFIX = "@"
68
69
_ET_NS_RE = re.compile(r"^{(\S+)}(\S+)$")
70
71
72
def _iterparse(xmlfile):
73
    """
74
    Avoid bug in python 3.{2,3}. See http://bugs.python.org/issue9257.
75
76
    :param xmlfile: XML file or file-like object
77
    """
78
    try:
79
        return ET.iterparse(xmlfile, events=("start-ns", ))
80
    except TypeError:
81
        return ET.iterparse(xmlfile, events=(b"start-ns", ))
82
83
84
def flip(tpl):
85
    """
86
    >>> flip((1, 2))
87
    (2, 1)
88
    """
89
    return (tpl[1], tpl[0])
90
91
92
def _namespaces_from_file(xmlfile):
93
    """
94
    :param xmlfile: XML file or file-like object
95
    :return: {namespace_uri: namespace_prefix} or {}
96
    """
97
    return dict(flip(t) for _, t in _iterparse(xmlfile))
98
99
100
def _gen_tags(pprefix=_PREFIX):
101
    """
102
    Generate special prefixed tags.
103
104
    :param pprefix: Special parameter name prefix
105
    :return: A tuple of prefixed (attributes, text, children)
106
    """
107
    return tuple(pprefix + x for x in ("attrs", "text", "children"))
108
109
110
def _tweak_ns(tag, nspaces):
111
    """
112
    :param tag: XML tag element
113
    :param nspaces: A namespaces dict, {uri: prefix}
114
115
    >>> _tweak_ns("a", {})
116
    'a'
117
    >>> _tweak_ns("a", {"http://example.com/ns/val/": "val"})
118
    'a'
119
    >>> _tweak_ns("{http://example.com/ns/val/}a",
120
    ...           {"http://example.com/ns/val/": "val"})
121
    'val:a'
122
    """
123
    if nspaces:
124
        matched = _ET_NS_RE.match(tag)
125
        if matched:
126
            (uri, tag) = matched.groups()
127
            prefix = nspaces.get(uri, False)
128
            if prefix:
129
                return "%s:%s" % (prefix, tag)
130
131
    return tag
132
133
134
def _elem_strip_text(elem):
135
    """
136
    :param elem: etree elem object
137
    """
138
    if elem.text:
139
        elem.text = elem.text.strip()
140
141
142
def elem_to_container(elem, to_container, nspaces, tags=False):
143
    """
144
    Convert XML ElementTree Element to a collection of container objects.
145
146
    :param elem: etree elem object or None
147
    :param to_container: callble to make a container object
148
    :param nspaces: A namespaces dict, {uri: prefix}
149
    :param tags: (attrs, text, children) parameter names
150
    """
151
    tree = to_container()
152
    if elem is None:
153
        return tree
154
155
    subtree = tree[_tweak_ns(elem.tag, nspaces)] = to_container()
156
    (attrs, text, children) = tags if tags else _gen_tags()
157
    _num_of_children = len(elem)
158
    _elem_strip_text(elem)
159
160
    if elem.attrib:
161
        subtree[attrs] = to_container(elem.attrib)
162
163
    if elem.text:
164
        if _num_of_children or elem.attrib:
165
            subtree[text] = elem.text
166
        else:
167
            # .. note:: Treat as special case for later convenience.
168
            tree[elem.tag] = elem.text
169
170
    if _num_of_children:
171
        # Note: Configuration item cannot have both attributes and values
172
        # (list) at the same time in current implementation:
173
        args = (to_container, nspaces, tags)
174
        if _num_of_children == 1:  # .. note:: Another special case.
175
            tree[elem.tag] = [elem_to_container(c, *args) for c in elem][0]
176
        else:
177
            subtree[children] = [elem_to_container(c, *args) for c in elem]
178
179
    return tree
180
181
182
def root_to_container(root, to_container, nspaces, pprefix=_PREFIX):
183
    """
184
    Convert XML ElementTree Root Element to a collection of container objects.
185
186
    :param root: etree root object or None
187
    :param to_container: callble to make a container object
188
    :param nspaces: A namespaces dict, {uri: prefix}
189
    :param pprefix: Special parameter name prefix
190
    """
191
    tree = to_container()
192
    if root is None:
193
        return tree
194
195
    if nspaces is None:
196
        nspaces = dict()
197
198
    if nspaces:
199
        for uri, prefix in nspaces.items():
200
            root.attrib["xmlns:" + prefix if prefix else "xmlns"] = uri
201
202
    return elem_to_container(root, to_container, nspaces, _gen_tags(pprefix))
203
204
205
def _elem_from_descendants(children, pprefix=_PREFIX):
206
    """
207
    :param children: A list of child dict objects
208
    :param pprefix: Special parameter name prefix
209
    """
210
    for child in children:  # child should be a dict-like object.
211
        for ckey, cval in anyconfig.compat.iteritems(child):
212
            celem = ET.Element(ckey)
213
            container_to_etree(cval, parent=celem, pprefix=pprefix)
214
            yield celem
215
216
217
def _make_etree(key, val, parent=None, pprefix=_PREFIX):
218
    """
219
    :param key: Key of current child (dict{,-like} object)
220
    :param val: Value of current child (dict{,-like} object)
221
    :param parent: XML ElementTree parent node object or None
222
    :param pprefix: Special parameter name prefix
223
    """
224
    elem = ET.Element(key)
225
    container_to_etree(val, parent=elem, pprefix=pprefix)
226
    if parent is None:  # 'elem' is the top level etree.
227
        return ET.ElementTree(elem)
228
    else:
229
        parent.append(elem)
230
        return ET.ElementTree(parent)
231
232
233
def container_to_etree(obj, parent=None, pprefix=_PREFIX):
234
    """
235
    Convert a dict-like object to XML ElementTree.
236
237
    :param obj: Container instance to convert to
238
    :param parent: XML ElementTree parent node object or None
239
    :param pprefix: Special parameter name prefix
240
    """
241
    if not anyconfig.mdicts.is_dict_like(obj):
242
        if parent is not None and obj:
243
            parent.text = obj  # Parent is a leaf text node.
244
        return  # All attributes and text should be set already.
245
246
    (attrs, text, children) = _gen_tags(pprefix)
247
    for key, val in anyconfig.compat.iteritems(obj):
248
        if key == attrs:
249
            for attr, aval in anyconfig.compat.iteritems(val):
250
                parent.set(attr, aval)
251
        elif key == text:
252
            parent.text = val
253
        elif key == children:
254
            for celem in _elem_from_descendants(val, pprefix=pprefix):
255
                parent.append(celem)
256
        else:
257
            return _make_etree(key, val, parent=parent, pprefix=pprefix)
258
259
260
def etree_write(tree, stream):
261
    """
262
    Write XML ElementTree `root` content into `stream`.
263
264
    .. note:
265
       It seems that ET.ElementTree.write() cannot process a parameter
266
       'xml_declaration' in python 2.6.
267
268
    :param tree: XML ElementTree object
269
    :param stream: File or file-like object can write to
270
    """
271
    if anyconfig.compat.IS_PYTHON_2_6:
272
        tree.write(stream, encoding='UTF-8')
273
    else:
274
        tree.write(stream, encoding='UTF-8', xml_declaration=True)
275
276
277
class Parser(anyconfig.backend.base.ToStreamDumper):
278
    """
279
    Parser for XML files.
280
    """
281
    _type = "xml"
282
    _extensions = ["xml"]
283
    _open_flags = ('rb', 'wb')
284
    _load_opts = _dump_opts = ["pprefix"]
285
286
    def load_from_string(self, content, to_container, **kwargs):
287
        """
288
        Load config from XML snippet (a string `content`).
289
290
        :param content:
291
            XML snippet string of str (python 2) or bytes (python 3) type
292
        :param to_container: callble to make a container object
293
        :param kwargs: optional keyword parameters passed to
294
295
        :return: Dict-like object holding config parameters
296
        """
297
        root = ET.fromstring(content)
298
        if anyconfig.compat.IS_PYTHON_3:
299
            stream = BytesIO(content)
300
        else:
301
            stream = anyconfig.compat.StringIO(content)
302
        nspaces = _namespaces_from_file(stream)
303
        return root_to_container(root, to_container, nspaces, **kwargs)
304
305
    def load_from_path(self, filepath, to_container, **kwargs):
306
        """
307
        :param filepath: XML file path
308
        :param to_container: callble to make a container object
309
        :param kwargs: optional keyword parameters to be sanitized
310
311
        :return: Dict-like object holding config parameters
312
        """
313
        root = ET.parse(filepath).getroot()
314
        nspaces = _namespaces_from_file(filepath)
315
        return root_to_container(root, to_container, nspaces, **kwargs)
316
317
    def load_from_stream(self, stream, to_container, **kwargs):
318
        """
319
        :param stream: XML file or file-like object
320
        :param to_container: callble to make a container object
321
        :param kwargs: optional keyword parameters to be sanitized
322
323
        :return: Dict-like object holding config parameters
324
        """
325
        root = ET.parse(stream).getroot()
326
        path = anyconfig.utils.get_path_from_stream(stream)
327
        nspaces = _namespaces_from_file(path)
328
        return root_to_container(root, to_container, nspaces, **kwargs)
329
330
    def dump_to_string(self, cnf, **kwargs):
331
        """
332
        :param cnf: Configuration data to dump
333
        :param kwargs: optional keyword parameters
334
335
        :return: string represents the configuration
336
        """
337
        tree = container_to_etree(cnf, **kwargs)
338
        buf = BytesIO()
339
        etree_write(tree, buf)
340
        return buf.getvalue()
341
342
    def dump_to_stream(self, cnf, stream, **kwargs):
343
        """
344
        :param cnf: Configuration data to dump
345
        :param stream: Config file or file like object write to
346
        :param kwargs: optional keyword parameters
347
        """
348
        tree = container_to_etree(cnf, **kwargs)
349
        etree_write(tree, stream)
350
351
# vim:sw=4:ts=4:et:
352