Completed
Push — feature/xmlns-support ( 56436e...570432 )
by Rasmus
02:47
created

Parser::onCharacterData()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 9.4285
cc 1
eloc 2
nc 1
nop 2
crap 1
1
<?php
2
3
namespace mindplay\easyxml;
4
use RuntimeException;
5
6
/**
7
 * This class implements parsing of XML files and content.
8
 */
9
class Parser extends Visitor
10
{
11
    const ENCODING_UTF8 = 'UTF-8';
12
    const ENCODING_ISO = 'ISO-8859-1';
13
    const ENCODING_ASCII = 'US-ASCII';
14
15
    /**
16
     * @var string input character set encoding (defaults to UTF-8)
17
     *
18
     * @see Parser::ENCODING_UTF8
19
     * @see Parser::ENCODING_ISO
20
     * @see Parser::ENCODING_ASCII
21
     *
22
     * @see createParser()
23
     */
24
    public $encoding = self::ENCODING_UTF8;
25
26
    /**
27
     * @var bool if true, enable case-folding (read all element/attribute-names in lower-case)
28
     */
29
    public $case_folding = false;
30
31
    /**
32
     * @var bool if true, ignore whitespace between elements
33
     */
34
    public $skip_white = true;
35
36
    /**
37
     * @var bool if true, trim leading/trailing whitespace in text nodes
38
     */
39
    public $trim_text = true;
40
41
    /**
42
     * @var int buffer size in bytes (when reading XML files)
43
     *
44
     * @see parseFile()
45
     */
46
    public $buffer_size = 4096;
47
48
    /**
49
     * @var Visitor[] $visitors node visitor stack
50
     */
51
    protected $visitors;
52
53
    /**
54
     * @var Visitor $visitor most recent Visitor
55
     */
56
    protected $visitor;
57
58
    /**
59
     * @var string character data buffer
60
     */
61
    private $_buffer;
62
63
    /**
64
     * @var string[][] map where namespace xmlns-prefix => stack of namespace URIs
65
     */
66
    private $ns_uri = array();
67
68
    /**
69
     * @var string[] map where namespace URI => user-defined namespace prefix
70
     */
71
    private $ns_prefix = array();
72
73
    /**
74
     * @var string[][] stack where each entry is a list of namespace prefixes started at the corresponding depth
75
     */
76
    private $ns_stack = array();
77
78
    /**
79
     * @param string $input XML input
80
     *
81
     * @return void
82
     *
83
     * @throws ParserException if the XML input contains error
84
     */
85 1
    public function parse($input)
86
    {
87
        /** @var resource $parser */
88 1
        $parser = $this->createParser();
89
90 1
        if (xml_parse($parser, $input, true) !== 1) {
91 1
            throw ParserException::create($parser);
92
        }
93
94 1
        xml_parser_free($parser);
95 1
    }
96
97
    /**
98
     * Set the alias used for a namespace URI in Visitors.
99
     *
100
     * @param string $uri namespace URI
101
     * @param string $alias
102
     */
103 1
    public function setPrefix($uri, $alias)
104
    {
105 1
        $this->ns_prefix[$uri] = $alias;
106 1
    }
107
108
    /**
109
     * @param string $path absolute path to XML file
110
     *
111
     * @return void
112
     *
113
     * @throws RuntimeException if the XML file was not found
114
     * @throws ParserException if the XML file contains error
115
     */
116 1
    public function parseFile($path)
117
    {
118
        /** @var resource $parser */
119 1
        $parser = $this->createParser();
120
121 1
        $file = @fopen($path, "r");
122
123 1
        if ($file === false) {
124 1
            throw new RuntimeException("could not open XML file: {$path}");
125
        }
126
127 1
        while ($data = fread($file, $this->buffer_size)) {
128 1
            if (xml_parse($parser, $data, feof($file)) !== 1) {
129 1
                throw ParserException::create($parser, $path);
130
            }
131 1
        }
132
133 1
        xml_parser_free($parser);
134 1
    }
135
136
    /**
137
     * Create and configure the XML parser.
138
     *
139
     * @return resource
140
     */
141 1
    protected function createParser()
142
    {
143
        // reset the stack:
144 1
        $this->visitor = $this;
145 1
        $this->visitors = array($this);
146
147
        // reset the character data buffer:
148 1
        $this->_buffer = '';
149
150
        // create and configure the parser:
151 1
        $parser = xml_parser_create($this->encoding);
152
153
        // skip whitespace-only values:
154 1
        xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, $this->skip_white);
155
156
        // disable case-folding - read XML element/attribute names as-is:
157 1
        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
158
159
        // handle element start/end:
160 1
        xml_set_element_handler($parser, array($this, 'onStartElement'), array($this, 'onEndElement'));
161
162
        // handle character data:
163 1
        xml_set_character_data_handler($parser, array($this, 'onCharacterData'));
164
165 1
        return $parser;
166
    }
167
168
    /**
169
     * @param resource $parser XML parser
170
     * @param string   $name   element name
171
     * @param string[] $attr   map of attributes
172
     *
173
     * @return void
174
     *
175
     * @see parse()
176
     * @see xml_set_element_handler()
177
     */
178 1
    protected function onStartElement($parser, $name, $attr)
179
    {
180
        // Flush the character data buffer:
181
182 1
        $this->_flushBuffer();
183
184
        // Apply case folding:
185
186 1
        if ($this->case_folding === true) {
187 1
            $name = strtolower($name);
188
189 1 View Code Duplication
            if (count($attr)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
190 1
                $attr = array_combine(
191 1
                    array_map('strtolower', array_keys($attr)),
192 1
                    array_values($attr)
193 1
                );
194 1
            }
195 1
        }
196
197 1 View Code Duplication
        if (count($attr)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
198 1
            $attr = array_combine(
199 1
                array_map(array($this, "applyUserPrefix"), array_keys($attr)),
200 1
                array_values($attr)
201 1
            );
202 1
        }
203
204
        // Handle XML namespace declarations:
205
206 1
        $this->ns_stack[] = array();
207
208 1
        foreach ($attr as $attr_name => $value) {
209 1
            if (strncmp($attr_name, "xmlns:", 6) === 0) {
210 1
                $prefix = substr($attr_name, 6);
211
212 1
                $this->ns_uri[$prefix][] = $value; // URI
213
214 1
                $this->ns_stack[count($this->ns_stack) - 1][] = $prefix;
215 1
            }
216 1
        }
217
218
        // Notify current Visitor and push the next Visitor onto the stack:
219
220 1
        $next_visitor = $this->visitor->startElement($this->applyUserPrefix($name, ":"), $attr);
221
222 1
        $this->visitor = $next_visitor ?: $this->visitor;
223
224 1
        $this->visitors[] = $next_visitor;
225 1
    }
226
227
    /**
228
     * @param resource $parser XML parser
229
     * @param string   $name   element name
230
     *
231
     * @return void
232
     *
233
     * @see parse()
234
     * @see xml_set_element_handler()
235
     */
236 1
    protected function onEndElement($parser, $name)
237
    {
238
        // Flush the character data buffer:
239
240 1
        $this->_flushBuffer();
241
242
        // Apply case folding:
243
244 1
        if ($this->case_folding === true) {
245 1
            $name = strtolower($name);
246 1
        }
247
248
        // Handle XML namespaces falling out of scope:
249
250 1
        $prefixes = array_pop($this->ns_stack);
251
252 1
        foreach ($prefixes as $prefix) {
0 ignored issues
show
Bug introduced by
The expression $prefixes of type array<integer,string>|null is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
253 1
            array_pop($this->ns_uri[$prefix]);
254 1
        }
255
256
        // Get previous Visitor from stack and notify:
257
258 1
        array_pop($this->visitors);
259
260 1
        $this->visitor = null;
261
262 1
        for ($n=count($this->visitors) - 1; $n >= 0 && !$this->visitor; $n--) {
263 1
            $this->visitor = $this->visitors[$n];
264 1
        }
265
266 1
        $this->visitor->endElement($this->applyUserPrefix($name, ":"));
267 1
    }
268
269
    /**
270
     * @param resource $parser XML parser
271
     * @param string   $data   partial text node content
272
     *
273
     * @return void
274
     *
275
     * @see parse()
276
     * @see xml_set_character_data_handler()
277
     */
278 1
    protected function onCharacterData($parser, $data)
279
    {
280
        // Buffer the character data:
281
282 1
        $this->_buffer .= $data;
283 1
    }
284
285
    /**
286
     * Map namespace prefix defined in XML (by xmlns-attribute) to a user-defined prefix.
287
     *
288
     * For example, `a:foo`, where `a` resolves to `http://foo/`, and a user-defined alias has been
289
     * defined for that URI as `b`, the resolved name is `b_foo` - e.g. suitable for parameter injection.
290
     *
291
     * @param string $name
292
     * @param string $separator
293
     *
294
     * @return string
295
     */
296 1
    private function applyUserPrefix($name, $separator = "_")
297
    {
298 1
        $pos = strpos($name, ":");
299
300 1
        if ($pos === false) {
301 1
            return $name; // name isn't namespaced
302
        }
303
304 1
        $prefix = substr($name, 0, $pos);
305
306 1
        if (empty($this->ns_uri[$prefix])) {
307 1
            return $name; // TODO QA: throw for undefined namespace in file?
308
        }
309
310 1
        $uri = $this->ns_uri[$prefix][count($this->ns_uri[$prefix]) - 1];
311
312 1
        if (!isset($this->ns_prefix[$uri])) {
313
            return $name; // TODO QA: throw for namespace with no user-defined alias?
314
        }
315
316 1
        $user_prefix = $this->ns_prefix[$uri];
317
318 1
        return "{$user_prefix}{$separator}" . substr($name, $pos + 1);
319
    }
320
321
    /**
322
     * Flush any buffered text node content to the current visitor.
323
     *
324
     * @return void
325
     */
326 1
    private function _flushBuffer()
327
    {
328 1
        if ($this->trim_text) {
329 1
            $this->_buffer = trim($this->_buffer);
330 1
        }
331
332 1
        if ($this->_buffer === '') {
333 1
            return;
334
        }
335
336
        // Notify top-most handler on current stack:
337
338 1
        $this->visitor->characterData($this->_buffer);
339
340
        // Clear the character data buffer:
341
342 1
        $this->_buffer = '';
343 1
    }
344
}
345