1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace mindplay\easyxml; |
4
|
|
|
use RuntimeException; |
5
|
|
|
|
6
|
|
|
/** |
7
|
|
|
* This class implements parsing of XML files and content. |
8
|
|
|
*/ |
9
|
|
|
class Parser extends Visitor |
10
|
|
|
{ |
11
|
|
|
const ENCODING_UTF8 = 'UTF-8'; |
12
|
|
|
const ENCODING_ISO = 'ISO-8859-1'; |
13
|
|
|
const ENCODING_ASCII = 'US-ASCII'; |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @var string input character set encoding (defaults to UTF-8) |
17
|
|
|
* |
18
|
|
|
* @see Parser::ENCODING_UTF8 |
19
|
|
|
* @see Parser::ENCODING_ISO |
20
|
|
|
* @see Parser::ENCODING_ASCII |
21
|
|
|
* |
22
|
|
|
* @see createParser() |
23
|
|
|
*/ |
24
|
|
|
public $encoding = self::ENCODING_UTF8; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* @var bool if true, enable case-folding (read all element/attribute-names in lower-case) |
28
|
|
|
*/ |
29
|
|
|
public $case_folding = false; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @var bool if true, ignore whitespace between elements |
33
|
|
|
*/ |
34
|
|
|
public $skip_white = true; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* @var bool if true, trim leading/trailing whitespace in text nodes |
38
|
|
|
*/ |
39
|
|
|
public $trim_text = true; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @var int buffer size in bytes (when reading XML files) |
43
|
|
|
* |
44
|
|
|
* @see parseFile() |
45
|
|
|
*/ |
46
|
|
|
public $buffer_size = 4096; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* @var Visitor[] $visitors node visitor stack |
50
|
|
|
*/ |
51
|
|
|
protected $visitors; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* @var Visitor $visitor most recent Visitor |
55
|
|
|
*/ |
56
|
|
|
protected $visitor; |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @var string character data buffer |
60
|
|
|
*/ |
61
|
|
|
private $_buffer; |
62
|
|
|
|
63
|
|
|
/** |
64
|
|
|
* @var string[][] map where namespace xmlns-prefix => stack of namespace URIs |
65
|
|
|
*/ |
66
|
|
|
private $ns_uri = array(); |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* @var string[] map where namespace URI => user-defined namespace prefix |
70
|
|
|
*/ |
71
|
|
|
private $ns_prefix = array(); |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* @var string[][] stack where each entry is a list of namespace prefixes started at the corresponding depth |
75
|
|
|
*/ |
76
|
|
|
private $ns_stack = array(); |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* @param string $input XML input |
80
|
|
|
* |
81
|
|
|
* @return void |
82
|
|
|
* |
83
|
|
|
* @throws ParserException if the XML input contains error |
84
|
|
|
*/ |
85
|
1 |
|
public function parse($input) |
86
|
|
|
{ |
87
|
|
|
/** @var resource $parser */ |
88
|
1 |
|
$parser = $this->createParser(); |
89
|
|
|
|
90
|
1 |
|
if (xml_parse($parser, $input, true) !== 1) { |
91
|
1 |
|
throw ParserException::create($parser); |
92
|
|
|
} |
93
|
|
|
|
94
|
1 |
|
xml_parser_free($parser); |
95
|
1 |
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Set the alias used for a namespace URI in Visitors. |
99
|
|
|
* |
100
|
|
|
* @param string $uri namespace URI |
101
|
|
|
* @param string $alias |
102
|
|
|
*/ |
103
|
1 |
|
public function setPrefix($uri, $alias) |
104
|
|
|
{ |
105
|
1 |
|
$this->ns_prefix[$uri] = $alias; |
106
|
1 |
|
} |
107
|
|
|
|
108
|
|
|
/** |
109
|
|
|
* @param string $path absolute path to XML file |
110
|
|
|
* |
111
|
|
|
* @return void |
112
|
|
|
* |
113
|
|
|
* @throws RuntimeException if the XML file was not found |
114
|
|
|
* @throws ParserException if the XML file contains error |
115
|
|
|
*/ |
116
|
1 |
|
public function parseFile($path) |
117
|
|
|
{ |
118
|
|
|
/** @var resource $parser */ |
119
|
1 |
|
$parser = $this->createParser(); |
120
|
|
|
|
121
|
1 |
|
$file = @fopen($path, "r"); |
122
|
|
|
|
123
|
1 |
|
if ($file === false) { |
124
|
1 |
|
throw new RuntimeException("could not open XML file: {$path}"); |
125
|
|
|
} |
126
|
|
|
|
127
|
1 |
|
while ($data = fread($file, $this->buffer_size)) { |
128
|
1 |
|
if (xml_parse($parser, $data, feof($file)) !== 1) { |
129
|
1 |
|
throw ParserException::create($parser, $path); |
130
|
|
|
} |
131
|
1 |
|
} |
132
|
|
|
|
133
|
1 |
|
xml_parser_free($parser); |
134
|
1 |
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Create and configure the XML parser. |
138
|
|
|
* |
139
|
|
|
* @return resource |
140
|
|
|
*/ |
141
|
1 |
|
protected function createParser() |
142
|
|
|
{ |
143
|
|
|
// reset the stack: |
144
|
1 |
|
$this->visitor = $this; |
145
|
1 |
|
$this->visitors = array($this); |
146
|
|
|
|
147
|
|
|
// reset the character data buffer: |
148
|
1 |
|
$this->_buffer = ''; |
149
|
|
|
|
150
|
|
|
// create and configure the parser: |
151
|
1 |
|
$parser = xml_parser_create($this->encoding); |
152
|
|
|
|
153
|
|
|
// skip whitespace-only values: |
154
|
1 |
|
xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, $this->skip_white); |
155
|
|
|
|
156
|
|
|
// disable case-folding - read XML element/attribute names as-is: |
157
|
1 |
|
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); |
158
|
|
|
|
159
|
|
|
// handle element start/end: |
160
|
1 |
|
xml_set_element_handler($parser, array($this, 'onStartElement'), array($this, 'onEndElement')); |
161
|
|
|
|
162
|
|
|
// handle character data: |
163
|
1 |
|
xml_set_character_data_handler($parser, array($this, 'onCharacterData')); |
164
|
|
|
|
165
|
1 |
|
return $parser; |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* @param resource $parser XML parser |
170
|
|
|
* @param string $name element name |
171
|
|
|
* @param string[] $attr map of attributes |
172
|
|
|
* |
173
|
|
|
* @return void |
174
|
|
|
* |
175
|
|
|
* @see parse() |
176
|
|
|
* @see xml_set_element_handler() |
177
|
|
|
*/ |
178
|
1 |
|
protected function onStartElement($parser, $name, $attr) |
179
|
|
|
{ |
180
|
|
|
// Flush the character data buffer: |
181
|
|
|
|
182
|
1 |
|
$this->_flushBuffer(); |
183
|
|
|
|
184
|
|
|
// Apply case folding: |
185
|
|
|
|
186
|
1 |
|
if ($this->case_folding === true) { |
187
|
1 |
|
$name = strtolower($name); |
188
|
|
|
|
189
|
1 |
View Code Duplication |
if (count($attr)) { |
|
|
|
|
190
|
1 |
|
$attr = array_combine( |
191
|
1 |
|
array_map('strtolower', array_keys($attr)), |
192
|
1 |
|
array_values($attr) |
193
|
1 |
|
); |
194
|
1 |
|
} |
195
|
1 |
|
} |
196
|
|
|
|
197
|
1 |
View Code Duplication |
if (count($attr)) { |
|
|
|
|
198
|
1 |
|
$attr = array_combine( |
199
|
1 |
|
array_map(array($this, "applyUserPrefix"), array_keys($attr)), |
200
|
1 |
|
array_values($attr) |
201
|
1 |
|
); |
202
|
1 |
|
} |
203
|
|
|
|
204
|
|
|
// Handle XML namespace declarations: |
205
|
|
|
|
206
|
1 |
|
$this->ns_stack[] = array(); |
207
|
|
|
|
208
|
1 |
|
foreach ($attr as $attr_name => $value) { |
209
|
1 |
|
if (strncmp($attr_name, "xmlns:", 6) === 0) { |
210
|
1 |
|
$prefix = substr($attr_name, 6); |
211
|
|
|
|
212
|
1 |
|
$this->ns_uri[$prefix][] = $value; // URI |
213
|
|
|
|
214
|
1 |
|
$this->ns_stack[count($this->ns_stack) - 1][] = $prefix; |
215
|
1 |
|
} |
216
|
1 |
|
} |
217
|
|
|
|
218
|
|
|
// Notify current Visitor and push the next Visitor onto the stack: |
219
|
|
|
|
220
|
1 |
|
$next_visitor = $this->visitor->startElement($this->applyUserPrefix($name, ":"), $attr); |
221
|
|
|
|
222
|
1 |
|
$this->visitor = $next_visitor ?: $this->visitor; |
223
|
|
|
|
224
|
1 |
|
$this->visitors[] = $next_visitor; |
225
|
1 |
|
} |
226
|
|
|
|
227
|
|
|
/** |
228
|
|
|
* @param resource $parser XML parser |
229
|
|
|
* @param string $name element name |
230
|
|
|
* |
231
|
|
|
* @return void |
232
|
|
|
* |
233
|
|
|
* @see parse() |
234
|
|
|
* @see xml_set_element_handler() |
235
|
|
|
*/ |
236
|
1 |
|
protected function onEndElement($parser, $name) |
237
|
|
|
{ |
238
|
|
|
// Flush the character data buffer: |
239
|
|
|
|
240
|
1 |
|
$this->_flushBuffer(); |
241
|
|
|
|
242
|
|
|
// Apply case folding: |
243
|
|
|
|
244
|
1 |
|
if ($this->case_folding === true) { |
245
|
1 |
|
$name = strtolower($name); |
246
|
1 |
|
} |
247
|
|
|
|
248
|
|
|
// Handle XML namespaces falling out of scope: |
249
|
|
|
|
250
|
1 |
|
$prefixes = array_pop($this->ns_stack); |
251
|
|
|
|
252
|
1 |
|
foreach ($prefixes as $prefix) { |
|
|
|
|
253
|
1 |
|
array_pop($this->ns_uri[$prefix]); |
254
|
1 |
|
} |
255
|
|
|
|
256
|
|
|
// Get previous Visitor from stack and notify: |
257
|
|
|
|
258
|
1 |
|
array_pop($this->visitors); |
259
|
|
|
|
260
|
1 |
|
$this->visitor = null; |
261
|
|
|
|
262
|
1 |
|
for ($n=count($this->visitors) - 1; $n >= 0 && !$this->visitor; $n--) { |
263
|
1 |
|
$this->visitor = $this->visitors[$n]; |
264
|
1 |
|
} |
265
|
|
|
|
266
|
1 |
|
$this->visitor->endElement($this->applyUserPrefix($name, ":")); |
267
|
1 |
|
} |
268
|
|
|
|
269
|
|
|
/** |
270
|
|
|
* @param resource $parser XML parser |
271
|
|
|
* @param string $data partial text node content |
272
|
|
|
* |
273
|
|
|
* @return void |
274
|
|
|
* |
275
|
|
|
* @see parse() |
276
|
|
|
* @see xml_set_character_data_handler() |
277
|
|
|
*/ |
278
|
1 |
|
protected function onCharacterData($parser, $data) |
279
|
|
|
{ |
280
|
|
|
// Buffer the character data: |
281
|
|
|
|
282
|
1 |
|
$this->_buffer .= $data; |
283
|
1 |
|
} |
284
|
|
|
|
285
|
|
|
/** |
286
|
|
|
* Map namespace prefix defined in XML (by xmlns-attribute) to a user-defined prefix. |
287
|
|
|
* |
288
|
|
|
* For example, `a:foo`, where `a` resolves to `http://foo/`, and a user-defined alias has been |
289
|
|
|
* defined for that URI as `b`, the resolved name is `b_foo` - e.g. suitable for parameter injection. |
290
|
|
|
* |
291
|
|
|
* @param string $name |
292
|
|
|
* @param string $separator |
293
|
|
|
* |
294
|
|
|
* @return string |
295
|
|
|
*/ |
296
|
1 |
|
private function applyUserPrefix($name, $separator = "_") |
297
|
|
|
{ |
298
|
1 |
|
$pos = strpos($name, ":"); |
299
|
|
|
|
300
|
1 |
|
if ($pos === false) { |
301
|
1 |
|
return $name; // name isn't namespaced |
302
|
|
|
} |
303
|
|
|
|
304
|
1 |
|
$prefix = substr($name, 0, $pos); |
305
|
|
|
|
306
|
1 |
|
if (empty($this->ns_uri[$prefix])) { |
307
|
1 |
|
return $name; // TODO QA: throw for undefined namespace in file? |
308
|
|
|
} |
309
|
|
|
|
310
|
1 |
|
$uri = $this->ns_uri[$prefix][count($this->ns_uri[$prefix]) - 1]; |
311
|
|
|
|
312
|
1 |
|
if (!isset($this->ns_prefix[$uri])) { |
313
|
|
|
return $name; // TODO QA: throw for namespace with no user-defined alias? |
314
|
|
|
} |
315
|
|
|
|
316
|
1 |
|
$user_prefix = $this->ns_prefix[$uri]; |
317
|
|
|
|
318
|
1 |
|
return "{$user_prefix}{$separator}" . substr($name, $pos + 1); |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
/** |
322
|
|
|
* Flush any buffered text node content to the current visitor. |
323
|
|
|
* |
324
|
|
|
* @return void |
325
|
|
|
*/ |
326
|
1 |
|
private function _flushBuffer() |
327
|
|
|
{ |
328
|
1 |
|
if ($this->trim_text) { |
329
|
1 |
|
$this->_buffer = trim($this->_buffer); |
330
|
1 |
|
} |
331
|
|
|
|
332
|
1 |
|
if ($this->_buffer === '') { |
333
|
1 |
|
return; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
// Notify top-most handler on current stack: |
337
|
|
|
|
338
|
1 |
|
$this->visitor->characterData($this->_buffer); |
339
|
|
|
|
340
|
|
|
// Clear the character data buffer: |
341
|
|
|
|
342
|
1 |
|
$this->_buffer = ''; |
343
|
1 |
|
} |
344
|
|
|
} |
345
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.