1
|
|
|
<?php namespace BetterDOMDocument; |
2
|
|
|
|
3
|
|
|
use Symfony\Component\CssSelector\CssSelectorConverter; |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* Highwire Better DOM Document |
7
|
|
|
* |
8
|
|
|
* Copyright (c) 2010-2011 Board of Trustees, Leland Stanford Jr. University |
9
|
|
|
* This software is open-source licensed under the GNU Public License Version 2 or later |
10
|
|
|
*/ |
11
|
|
|
class DOMDoc extends \DOMDocument { |
12
|
|
|
|
13
|
|
|
private $auto_ns = FALSE; |
14
|
|
|
public $ns = array(); |
15
|
|
|
public $default_ns = FALSE; |
16
|
|
|
public $error_checking = 'strict'; // Can be 'strict', 'warning', 'none' / FALSE |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Create a new DOMDoc |
20
|
|
|
* |
21
|
|
|
* @param mixed $xml |
22
|
|
|
* $xml can either be an XML string, a DOMDocument, or a DOMElement. |
23
|
|
|
* You can also pass FALSE or NULL (or omit it) and load XML later using loadXML or loadHTML |
24
|
|
|
* |
25
|
|
|
* @param mixed $auto_register_namespaces |
26
|
|
|
* Auto-register namespaces. All namespaces in the root element will be registered for use in xpath queries. |
27
|
|
|
* Namespaces that are not declared in the root element will not be auto-registered |
28
|
|
|
* Defaults to TRUE (Meaning it will auto register all auxiliary namespaces but not the default namespace). |
29
|
|
|
* Pass a prefix string to automatically register the default namespace. |
30
|
|
|
* Pass FALSE to disable auto-namespace registeration |
31
|
|
|
* |
32
|
|
|
* @param bool $error_checking |
33
|
|
|
* Can be 'strict', 'warning', or 'none. Defaults to 'strict'. |
34
|
|
|
* 'none' supresses all errors |
35
|
|
|
* 'warning' is the default behavior in DOMDocument |
36
|
|
|
* 'strict' corresponds to DOMDocument strictErrorChecking TRUE |
37
|
|
|
*/ |
38
|
24 |
|
public function __construct($xml = FALSE, $auto_register_namespaces = TRUE, $error_checking = 'strict') { |
39
|
24 |
|
parent::__construct(); |
40
|
|
|
|
41
|
24 |
|
$this->setErrorChecking($error_checking); |
42
|
|
|
|
43
|
24 |
|
if(is_object($xml)){ |
44
|
17 |
|
if (is_a($xml, 'DOMElement')) { |
45
|
8 |
|
$this->appendChild($this->importNode($xml, true)); |
46
|
|
|
} |
47
|
17 |
|
if (is_a($xml, 'BetterDOMDocument\DOMDoc')) { |
48
|
|
|
if ($xml->documentElement) { |
49
|
|
|
$this->appendChild($this->importNode($xml->documentElement, true)); |
50
|
|
|
} |
51
|
|
|
$this->ns = $xml->ns; |
52
|
|
|
} |
53
|
17 |
|
if (is_a($xml, 'DOMDocument')) { |
54
|
17 |
|
if ($xml->documentElement) { |
55
|
17 |
|
$this->appendChild($this->importNode($xml->documentElement, true)); |
56
|
|
|
} |
57
|
|
|
} |
58
|
|
|
} |
59
|
17 |
|
else if (is_string($xml) && !empty($xml)) { |
60
|
14 |
|
if ($this->error_checking == 'none') { |
61
|
|
|
@$this->loadXML($xml, LIBXML_COMPACT); |
62
|
|
|
} |
63
|
14 |
|
else if (!$this->loadXML($xml, LIBXML_COMPACT)) { |
64
|
|
|
trigger_error('BetterDOMDocument\DOMDoc: Could not load: ' . htmlspecialchars($xml), E_USER_WARNING); |
65
|
|
|
} |
66
|
|
|
} |
67
|
|
|
|
68
|
24 |
|
if ($auto_register_namespaces) { |
69
|
24 |
|
$this->AutoRegisterNamespace($auto_register_namespaces); |
70
|
|
|
} |
71
|
24 |
|
} |
72
|
|
|
|
73
|
|
|
/** |
74
|
|
|
* Register a namespace to be used in xpath queries |
75
|
|
|
* |
76
|
|
|
* @param string $prefix |
77
|
|
|
* Namespace prefix to register |
78
|
|
|
* |
79
|
|
|
* @param string $url |
80
|
|
|
* Connonical URL for this namespace prefix |
81
|
|
|
*/ |
82
|
13 |
|
public function registerNamespace($prefix, $url) { |
83
|
13 |
|
$this->ns[$prefix] = $url; |
84
|
13 |
|
} |
85
|
|
|
|
86
|
|
|
/** |
87
|
|
|
* Get the list of registered namespaces as an array |
88
|
|
|
*/ |
89
|
7 |
|
public function getNamespaces() { |
90
|
7 |
|
return $this->ns; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Given a namespace URL, get the prefix |
95
|
|
|
* |
96
|
|
|
* @param string $url |
97
|
|
|
* Connonical URL for this namespace prefix |
98
|
|
|
* |
99
|
|
|
* @return string|false |
100
|
|
|
* The namespace prefix or FALSE if there is no namespace with that URL |
101
|
|
|
*/ |
102
|
1 |
|
public function lookupPrefix($url) { |
103
|
1 |
|
return array_search($url, $this->ns); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* Given a namespace prefix, get the URL |
108
|
|
|
* |
109
|
|
|
* @param string $prefix |
110
|
|
|
* namespace prefix |
111
|
|
|
* |
112
|
|
|
* return string|false |
113
|
|
|
* The namespace URL or FALSE if there is no namespace with that prefix |
114
|
|
|
*/ |
115
|
1 |
|
public function lookupURL($prefix) { |
116
|
1 |
|
if (isset($this->ns[$prefix])) { |
117
|
1 |
|
return $this->ns[$prefix]; |
118
|
|
|
} |
119
|
|
|
else { |
120
|
|
|
return FALSE; |
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* Given an xpath, get a list of nodes. |
126
|
|
|
* |
127
|
|
|
* @param string $xpath |
128
|
|
|
* xpath to be used for query |
129
|
|
|
* |
130
|
|
|
* @param mixed $context |
131
|
|
|
* $context can either be an xpath string, or a DOMElement |
132
|
|
|
* Provides context for the xpath query |
133
|
|
|
* |
134
|
|
|
* @return DOMList|false |
135
|
|
|
* A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
136
|
|
|
*/ |
137
|
18 |
|
public function xpath($xpath, $context = NULL) { |
138
|
18 |
|
$this->createContext($context, 'xpath', FALSE); |
139
|
|
|
|
140
|
18 |
|
if ($context === FALSE) { |
141
|
|
|
return FALSE; |
142
|
|
|
} |
143
|
|
|
|
144
|
18 |
|
$xob = new \DOMXPath($this); |
145
|
|
|
|
146
|
|
|
// Register the namespaces |
147
|
18 |
|
foreach ($this->ns as $namespace => $url) { |
148
|
9 |
|
$xob->registerNamespace($namespace, $url); |
149
|
|
|
} |
150
|
|
|
|
151
|
18 |
|
if ($context) { |
152
|
3 |
|
$result = $xob->query($xpath, $context); |
153
|
|
|
} |
154
|
|
|
else { |
155
|
18 |
|
$result = $xob->query($xpath); |
156
|
|
|
} |
157
|
|
|
|
158
|
18 |
|
if ($result) { |
159
|
18 |
|
return new DOMList($result, $this); |
160
|
|
|
} |
161
|
|
|
else { |
162
|
|
|
return FALSE; |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
|
167
|
|
|
/** |
168
|
|
|
* Given an xpath, get a single node (first one found) |
169
|
|
|
* |
170
|
|
|
* @param string $xpath |
171
|
|
|
* xpath to be used for query |
172
|
|
|
* |
173
|
|
|
* @param mixed $context |
174
|
|
|
* $context can either be an xpath string, or a DOMElement |
175
|
|
|
* Provides context for the xpath query |
176
|
|
|
* |
177
|
|
|
* @return mixed |
178
|
|
|
* The first node found by the xpath query |
179
|
|
|
*/ |
180
|
18 |
|
public function xpathSingle($xpath, $context = NULL) { |
181
|
18 |
|
$result = $this->xpath($xpath, $context); |
182
|
|
|
|
183
|
18 |
|
if (empty($result) || !count($result)) { |
184
|
2 |
|
return FALSE; |
185
|
|
|
} |
186
|
|
|
else { |
187
|
17 |
|
return $result->item(0); |
188
|
|
|
} |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Given an CSS selector, get a list of nodes. |
194
|
|
|
* |
195
|
|
|
* @param string $css_selector |
196
|
|
|
* CSS Selector to be used for query |
197
|
|
|
* |
198
|
|
|
* @param mixed $context |
199
|
|
|
* $context can either be an xpath string, or a DOMElement |
200
|
|
|
* Provides context for the CSS selector |
201
|
|
|
* |
202
|
|
|
* @return DOMList|false |
203
|
|
|
* A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
204
|
|
|
*/ |
205
|
1 |
|
public function select($css_selector, $context = NULL) { |
206
|
1 |
|
$converter = new CssSelectorConverter(); |
207
|
1 |
|
$xpath = $converter->toXPath($css_selector); |
208
|
|
|
|
209
|
1 |
|
return $this->xpath($xpath, $context); |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
/** |
213
|
|
|
* Given an CSS selector, get a single node. |
214
|
|
|
* |
215
|
|
|
* @param string $css_selector |
216
|
|
|
* CSS Selector to be used for query |
217
|
|
|
* |
218
|
|
|
* @param mixed $context |
219
|
|
|
* $context can either be an xpath string, or a DOMElement |
220
|
|
|
* Provides context for the CSS selector |
221
|
|
|
* |
222
|
|
|
* @return DOMList |
223
|
|
|
* A DOMList object, which is very similar to a DOMNodeList, but with better iterabilility. |
224
|
|
|
*/ |
225
|
1 |
|
public function selectSingle($css_selector, $context = NULL) { |
226
|
1 |
|
$converter = new CssSelectorConverter(); |
227
|
1 |
|
$xpath = $converter->toXPath($css_selector); |
228
|
|
|
|
229
|
1 |
|
return $this->xpathSingle($xpath, $context); |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
/** |
233
|
|
|
* Get the document (or an element) as an array |
234
|
|
|
* |
235
|
|
|
* @param string $raw |
236
|
|
|
* Can be either FALSE, 'full', or 'inner'. Defaults to FALSE. |
237
|
|
|
* When set to 'full' every node's full XML is also attached to the array |
238
|
|
|
* When set to 'inner' every node's inner XML is attached to the array. |
239
|
|
|
* |
240
|
|
|
* @param mixed $context |
241
|
|
|
* Optional context node. Can pass an DOMElement object or an xpath string. |
242
|
|
|
* If passed, only the given node will be used when generating the array |
243
|
|
|
*/ |
244
|
1 |
|
public function getArray($raw = FALSE, $context = NULL) { |
245
|
1 |
|
$array = false; |
246
|
|
|
|
247
|
1 |
|
$this->createContext($context, 'xpath', FALSE); |
248
|
|
|
|
249
|
1 |
|
if ($context) { |
250
|
1 |
|
if ($raw == 'full') { |
251
|
1 |
|
$array['#raw'] = $this->saveXML($context); |
252
|
|
|
} |
253
|
1 |
|
if ($raw == 'inner') { |
254
|
1 |
|
$array['#raw'] = $this->innerText($context); |
255
|
|
|
} |
256
|
1 |
|
if ($context->hasAttributes()) { |
257
|
1 |
|
foreach ($context->attributes as $attr) { |
258
|
1 |
|
$array['@'.$attr->nodeName] = $attr->nodeValue; |
259
|
|
|
} |
260
|
|
|
} |
261
|
|
|
|
262
|
1 |
|
if ($context->hasChildNodes()) { |
263
|
1 |
|
if ($context->childNodes->length == 1 && $context->firstChild->nodeType == XML_TEXT_NODE) { |
264
|
1 |
|
$array['#text'] = $context->firstChild->nodeValue; |
265
|
|
|
} |
266
|
|
|
else { |
267
|
1 |
|
foreach ($context->childNodes as $childNode) { |
268
|
1 |
|
if ($childNode->nodeType == XML_ELEMENT_NODE) { |
269
|
1 |
|
$array[$childNode->nodeName][] = $this->getArray($raw, $childNode); |
270
|
|
|
} |
271
|
1 |
|
elseif ($childNode->nodeType == XML_CDATA_SECTION_NODE) { |
272
|
1 |
|
$array['#text'] = $childNode->textContent; |
273
|
|
|
} |
274
|
|
|
} |
275
|
|
|
} |
276
|
|
|
} |
277
|
|
|
} |
278
|
|
|
// Else no node was passed, which means we are processing the entire domDocument |
279
|
|
|
else { |
280
|
1 |
|
foreach ($this->childNodes as $childNode) { |
281
|
1 |
|
if ($childNode->nodeType == XML_ELEMENT_NODE) { |
282
|
1 |
|
$array[$childNode->nodeName][] = $this->getArray($raw, $childNode); |
283
|
|
|
} |
284
|
|
|
} |
285
|
|
|
} |
286
|
|
|
|
287
|
1 |
|
return $array; |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
/** |
291
|
|
|
* Get the inner text of an element |
292
|
|
|
* |
293
|
|
|
* @param mixed $context |
294
|
|
|
* Optional context node. Can pass an DOMElement object or an xpath string. |
295
|
|
|
*/ |
296
|
1 |
|
public function innerText($context = NULL) { |
297
|
1 |
|
$this->createContext($context, 'xpath'); |
298
|
|
|
|
299
|
1 |
|
$pattern = "/<".preg_quote($context->nodeName)."\b[^>]*>(.*)<\/".preg_quote($context->nodeName).">/s"; |
300
|
1 |
|
$matches = array(); |
301
|
1 |
|
if (preg_match($pattern, $this->saveXML($context), $matches)) { |
302
|
1 |
|
return $matches[1]; |
303
|
|
|
} |
304
|
|
|
else { |
305
|
1 |
|
return ''; |
306
|
|
|
} |
307
|
|
|
} |
308
|
|
|
|
309
|
|
|
/** |
310
|
|
|
* Create an DOMElement from XML and attach it to the DOMDocument |
311
|
|
|
* |
312
|
|
|
* Note that this does not place it anywhere in the dom tree, it merely imports it. |
313
|
|
|
* |
314
|
|
|
* @param string $xml |
315
|
|
|
* XML string to import |
316
|
|
|
*/ |
317
|
5 |
|
public function createElementFromXML($xml) { |
318
|
|
|
|
319
|
|
|
// To make thing easy and make sure namespaces work properly, we add the root namespace delcarations if it is not declared |
320
|
5 |
|
$namespaces = $this->ns; |
321
|
5 |
|
$xml = preg_replace_callback('/<[^\?^!].+?>/s', function($root_match) use ($namespaces) { |
322
|
5 |
|
preg_match('/<([^ <>]+)[\d\s]?.*?>/s', $root_match[0], $root_tag); |
323
|
5 |
|
$new_root = $root_tag[1]; |
324
|
5 |
|
if (strpos($new_root, ':')) { |
325
|
|
|
$parts = explode(':', $new_root); |
326
|
|
|
$prefix = $parts[0]; |
327
|
|
|
if (isset($namespaces[$prefix])) { |
328
|
|
|
if (!strpos($root_match[0], "xmlns:$prefix")) { |
329
|
|
|
$new_root .= " xmlns:$prefix='" . $namespaces[$prefix] . "'"; |
330
|
|
|
} |
331
|
|
|
} |
332
|
|
|
} |
333
|
5 |
|
return str_replace($root_tag[1], $new_root, $root_match[0]); |
334
|
5 |
|
}, $xml, 1); |
335
|
|
|
|
336
|
5 |
|
$dom = new DOMDoc($xml, $this->auto_ns); |
337
|
5 |
|
if (!$dom->documentElement) { |
338
|
|
|
trigger_error('BetterDomDocument\DOMDoc Error: Invalid XML: ' . $xml); |
339
|
|
|
} |
340
|
5 |
|
$element = $dom->documentElement; |
341
|
|
|
|
342
|
|
|
// Merge the namespaces |
343
|
5 |
|
foreach ($dom->getNamespaces() as $prefix => $url) { |
344
|
|
|
$this->registerNamespace($prefix, $url); |
345
|
|
|
} |
346
|
|
|
|
347
|
5 |
|
return $this->importNode($element, true); |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
/** |
351
|
|
|
* Append a child to the context node, make it the last child |
352
|
|
|
* |
353
|
|
|
* @param mixed $newnode |
354
|
|
|
* $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
355
|
|
|
* |
356
|
|
|
* @param mixed $context |
357
|
|
|
* $context can either be an xpath string, or a DOMElement |
358
|
|
|
* Omiting $context results in using the root document element as the context |
359
|
|
|
* |
360
|
|
|
* @return DOMElement|false |
361
|
|
|
* The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
362
|
|
|
* then you should replace your DOMElement with the returned one. |
363
|
|
|
*/ |
364
|
1 |
|
public function append($newnode, $context = NULL) { |
365
|
1 |
|
$this->createContext($newnode, 'xml'); |
366
|
1 |
|
$this->createContext($context, 'xpath'); |
367
|
|
|
|
368
|
1 |
|
if (!$context || !$newnode) { |
369
|
|
|
return FALSE; |
370
|
|
|
} |
371
|
|
|
|
372
|
1 |
View Code Duplication |
if ($newnode->ownerDocument === $this) { |
373
|
1 |
|
$appendnode = $newnode; |
374
|
|
|
} |
375
|
|
|
else { |
376
|
|
|
if (is_a($newnode, 'BetterDOMDocument\DOMDoc')) { |
377
|
|
|
foreach ($newnode->ns as $prefix => $uri) { |
378
|
|
|
$this->registerNamespace($prefix, $uri); |
379
|
|
|
} |
380
|
|
|
$newnode = $newnode->documentElement; |
381
|
|
|
} |
382
|
|
|
else if (is_a($newnode, 'DOMDocument')) { |
383
|
|
|
$newnode = $newnode->documentElement; |
384
|
|
|
} |
385
|
|
|
$appendnode = $this->importNode($newnode, true); |
386
|
|
|
} |
387
|
|
|
|
388
|
1 |
|
return $context->appendChild($appendnode); |
389
|
|
|
} |
390
|
|
|
|
391
|
|
|
/** |
392
|
|
|
* Append a child to the context node, make it the first child |
393
|
|
|
* |
394
|
|
|
* @param mixed $newnode |
395
|
|
|
* $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
396
|
|
|
* |
397
|
|
|
* @param mixed $context |
398
|
|
|
* $context can either be an xpath string, or a DOMElement |
399
|
|
|
* Omiting $context results in using the root document element as the context |
400
|
|
|
* |
401
|
|
|
* @return DOMElement|false |
402
|
|
|
* The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
403
|
|
|
* then you should replace your DOMElement with the returned one. |
404
|
|
|
*/ |
405
|
1 |
View Code Duplication |
public function prepend($newnode, $context = NULL) { |
406
|
1 |
|
$this->createContext($newnode, 'xml'); |
407
|
1 |
|
$this->createContext($context, 'xpath'); |
408
|
|
|
|
409
|
1 |
|
if (!$context || !$newnode) { |
410
|
|
|
return FALSE; |
411
|
|
|
} |
412
|
|
|
|
413
|
1 |
|
return $context->insertBefore($newnode, $context->firstChild); |
414
|
|
|
} |
415
|
|
|
|
416
|
|
|
/** |
417
|
|
|
* Prepend a sibling to the context node, put it just before the context node |
418
|
|
|
* |
419
|
|
|
* @param mixed $newnode |
420
|
|
|
* $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
421
|
|
|
* |
422
|
|
|
* @param mixed $context |
423
|
|
|
* $context can either be an xpath string, or a DOMElement |
424
|
|
|
* Omiting $context results in using the root document element as the context |
425
|
|
|
* |
426
|
|
|
* @return DOMElement|false |
427
|
|
|
* The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
428
|
|
|
* then you should replace your DOMElement with the returned one. |
429
|
|
|
*/ |
430
|
1 |
View Code Duplication |
public function prependSibling($newnode, $context = NULL) { |
431
|
1 |
|
$this->createContext($newnode, 'xml'); |
432
|
1 |
|
$this->createContext($context, 'xpath'); |
433
|
|
|
|
434
|
1 |
|
if (!$context || !$newnode) { |
435
|
|
|
return FALSE; |
436
|
|
|
} |
437
|
|
|
|
438
|
1 |
|
return $context->parentNode->insertBefore($newnode, $context); |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
/** |
442
|
|
|
* Append a sibling to the context node, put it just after the context node |
443
|
|
|
* |
444
|
|
|
* @param mixed $newnode |
445
|
|
|
* $newnode can either be an XML string, a DOMDocument, or a DOMElement. |
446
|
|
|
* |
447
|
|
|
* @param mixed $context |
448
|
|
|
* $context can either be an xpath string, or a DOMElement |
449
|
|
|
* Omiting $context results in using the root document element as the context |
450
|
|
|
* |
451
|
|
|
* @return DOMElement|false |
452
|
|
|
* The $newnode, properly attached to DOMDocument. If you passed $newnode as a DOMElement |
453
|
|
|
* then you should replace your DOMElement with the returned one. |
454
|
|
|
*/ |
455
|
1 |
|
public function appendSibling($newnode, $context) { |
456
|
1 |
|
$this->createContext($newnode, 'xml'); |
457
|
1 |
|
$this->createContext($context, 'xpath'); |
458
|
|
|
|
459
|
1 |
|
if (!$context){ |
460
|
|
|
return FALSE; |
461
|
|
|
} |
462
|
|
|
|
463
|
1 |
|
if ($context->nextSibling) { |
464
|
|
|
// $context has an immediate sibling : insert newnode before this one |
465
|
1 |
|
return $context->parentNode->insertBefore($newnode, $context->nextSibling); |
466
|
|
|
} |
467
|
|
View Code Duplication |
else { |
|
|
|
|
468
|
|
|
// $context has no sibling next to it : insert newnode as last child of it's parent |
469
|
|
|
if ($newnode->ownerDocument === $this) { |
470
|
|
|
$appendnode = $newnode; |
471
|
|
|
} |
472
|
|
|
else { |
473
|
|
|
if (is_a($newnode, 'BetterDOMDocument\DOMDoc')) { |
474
|
|
|
foreach ($newnode->ns as $prefix => $uri) { |
475
|
|
|
$this->registerNamespace($prefix, $uri); |
476
|
|
|
} |
477
|
|
|
$newnode = $newnode->documentElement; |
478
|
|
|
} |
479
|
|
|
else if (is_a($newnode, 'DOMDocument')) { |
480
|
|
|
$newnode = $newnode->documentElement; |
481
|
|
|
} |
482
|
|
|
$appendnode = $this->importNode($newnode, true); |
483
|
|
|
} |
484
|
|
|
return $context->parentNode->appendChild($appendnode); |
485
|
|
|
} |
486
|
|
|
} |
487
|
|
|
|
488
|
|
|
/** |
489
|
|
|
* Given an xpath or DOMElement, return a new DOMDoc. |
490
|
|
|
* |
491
|
|
|
* @param mixed $node |
492
|
|
|
* $node can either be an xpath string or a DOMElement. |
493
|
|
|
* |
494
|
|
|
* @return DOMDoc |
495
|
|
|
* A new DOMDoc created from the xpath or DOMElement |
496
|
|
|
*/ |
497
|
7 |
|
public function extract($node, $auto_register_namespaces = TRUE, $error_checking = 'none') { |
498
|
7 |
|
$this->createContext($node, 'xpath'); |
499
|
7 |
|
$dom = new DOMDoc($node, $auto_register_namespaces, $error_checking); |
500
|
7 |
|
$dom->ns = $this->ns; |
501
|
7 |
|
return $dom; |
502
|
|
|
} |
503
|
|
|
|
504
|
|
|
/** |
505
|
|
|
* Given a pair of nodes, replace the first with the second |
506
|
|
|
* |
507
|
|
|
* @param mixed $node |
508
|
|
|
* Node to be replaced. Can either be an xpath string or a DOMDocument (or even a DOMNode). |
509
|
|
|
* |
510
|
|
|
* @param mixed $replace |
511
|
|
|
* Replace $node with $replace. Replace can be an XML string, or a DOMNode |
512
|
|
|
* |
513
|
|
|
* @return mixed |
514
|
|
|
* The overwritten / replaced node. |
515
|
|
|
*/ |
516
|
2 |
|
public function replace($node, $replace) { |
517
|
2 |
|
$this->createContext($node, 'xpath'); |
518
|
2 |
|
$this->createContext($replace, 'xml'); |
519
|
|
|
|
520
|
2 |
|
if (!$node || !$replace) { |
521
|
|
|
return FALSE; |
522
|
|
|
} |
523
|
|
|
|
524
|
2 |
|
if (!$replace->ownerDocument->documentElement->isSameNode($this->documentElement)) { |
525
|
1 |
|
$replace = $this->importNode($replace, true); |
526
|
|
|
} |
527
|
2 |
|
$node->parentNode->replaceChild($replace, $node); |
528
|
2 |
|
$node = $replace; |
529
|
2 |
|
return $node; |
530
|
|
|
} |
531
|
|
|
|
532
|
|
|
/** |
533
|
|
|
* Given a node(s), remove / delete them |
534
|
|
|
* |
535
|
|
|
* @param mixed $node |
536
|
|
|
* Can pass a DOMNode, a NodeList, DOMNodeList, an xpath string, or an array of any of these. |
537
|
|
|
*/ |
538
|
1 |
|
public function remove($node) { |
539
|
|
|
// We can't use createContext here because we want to use the entire nodeList (not just a single element) |
540
|
1 |
|
if (is_string($node)) { |
541
|
1 |
|
$node = $this->xpath($node); |
542
|
|
|
} |
543
|
|
|
|
544
|
1 |
|
if ($node) { |
545
|
1 |
|
if (is_array($node) || get_class($node) == 'BetterDOMDocument\DOMList') { |
546
|
1 |
|
foreach($node as $item) { |
547
|
1 |
|
$this->remove($item); |
548
|
|
|
} |
549
|
|
|
} |
550
|
1 |
|
else if (get_class($node) == 'DOMNodeList') { |
551
|
|
|
$this->remove(new DOMList($node, $this)); |
552
|
|
|
} |
553
|
|
|
else { |
554
|
1 |
|
$parent = $node->parentNode; |
555
|
1 |
|
$parent->removeChild($node); |
556
|
|
|
} |
557
|
|
|
} |
558
|
1 |
|
} |
559
|
|
|
|
560
|
|
|
/** |
561
|
|
|
* Given an XSL string, transform the DOMDoc (or a passed context node) |
562
|
|
|
* |
563
|
|
|
* @param string $xsl |
564
|
|
|
* XSL Transormation |
565
|
|
|
* |
566
|
|
|
* @param mixed $context |
567
|
|
|
* $context can either be an xpath string, or a DOMElement. Ommiting it |
568
|
|
|
* results in transforming the entire document |
569
|
|
|
* |
570
|
|
|
* @return a new DOMDoc |
571
|
|
|
*/ |
572
|
4 |
|
public function tranform($xsl, $context = NULL) { |
573
|
4 |
|
if (!$context) { |
574
|
2 |
|
$doc = $this; |
575
|
|
|
} |
576
|
|
|
else { |
577
|
2 |
|
if (is_string($context)) { |
578
|
1 |
|
$context = $this->xpathSingle($context); |
579
|
|
|
} |
580
|
2 |
|
$doc = new DOMDoc($context); |
581
|
|
|
} |
582
|
|
|
|
583
|
4 |
|
$xslDoc = new DOMDoc($xsl); |
584
|
4 |
|
$xslt = new \XSLTProcessor(); |
585
|
4 |
|
$xslt->importStylesheet($xslDoc); |
586
|
|
|
|
587
|
4 |
|
return new DOMDoc($xslt->transformToDoc($doc)); |
588
|
|
|
} |
589
|
|
|
|
590
|
|
|
/** |
591
|
|
|
* Given a node, change it's namespace to the specified namespace in situ |
592
|
|
|
* |
593
|
|
|
* @param mixed $node |
594
|
|
|
* Node to be changed. Can either be an xpath string or a DOMElement. |
595
|
|
|
* |
596
|
|
|
* @param mixed $prefix |
597
|
|
|
* prefix for the new namespace |
598
|
|
|
* |
599
|
|
|
* @param mixed $url |
600
|
|
|
* The URL for the new namespace |
601
|
|
|
* |
602
|
|
|
* @return mixed |
603
|
|
|
* The node with the new namespace. The node will also be changed in-situ in the document as well. |
604
|
|
|
*/ |
605
|
1 |
|
public function changeNamespace($node, $prefix, $url) { |
606
|
1 |
|
$this->createContext($node, 'xpath'); |
607
|
|
|
|
608
|
1 |
|
if (!$node) { |
609
|
|
|
return FALSE; |
610
|
|
|
} |
611
|
|
|
|
612
|
1 |
|
$this->registerNamespace($prefix, $url); |
613
|
|
|
|
614
|
1 |
|
if (get_class($node) == 'DOMElement') { |
615
|
|
|
$xsl = ' |
616
|
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> |
617
|
|
|
<xsl:template match="*"> |
618
|
1 |
|
<xsl:element name="' . $prefix . ':{local-name()}" namespace="' . $url . '"> |
619
|
|
|
<xsl:copy-of select="@*"/> |
620
|
|
|
<xsl:apply-templates/> |
621
|
|
|
</xsl:element> |
622
|
|
|
</xsl:template> |
623
|
1 |
|
</xsl:stylesheet>'; |
624
|
|
|
|
625
|
1 |
|
$transformed = $this->tranform($xsl, $node); |
626
|
1 |
|
return $this->replace($node, $transformed->documentElement); |
627
|
|
|
} |
628
|
|
|
else { |
629
|
|
|
// @@TODO: Report the correct calling file and number |
630
|
|
|
throw new Exception("Changing the namespace of a " . get_class($node) . " is not supported"); |
631
|
|
|
} |
632
|
|
|
} |
633
|
|
|
|
634
|
|
|
/** |
635
|
|
|
* Get a lossless HTML representation of the XML |
636
|
|
|
* |
637
|
|
|
* Transforms the document (or passed context) into a set of HTML spans. |
638
|
|
|
* The element name becomes the class, all other attributes become HTML5 |
639
|
|
|
* "data-" attributes. |
640
|
|
|
* |
641
|
|
|
* @param mixed $context |
642
|
|
|
* $context can either be an xpath string, or a DOMElement. Ommiting it |
643
|
|
|
* results in transforming the entire document |
644
|
|
|
* |
645
|
|
|
* @param array $options |
646
|
|
|
* Options for transforming the HTML into XML. The following options are supported: |
647
|
|
|
* 'xlink' => {TRUE or xpath} |
648
|
|
|
* Transform xlink links into <a href> elements. If you specify 'xlink' => TRUE then |
649
|
|
|
* it will transform all elements with xlink:type = simple into a <a href> element. |
650
|
|
|
* Alternatively you may specify your own xpath for selecting which elements get transformed |
651
|
|
|
* into <a href> tags. |
652
|
|
|
* @return HTML string |
653
|
|
|
*/ |
654
|
3 |
|
public function asHTML($context = NULL, $options = array()) { |
655
|
|
|
$xslSimple = ' |
656
|
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> |
657
|
|
|
<xsl:template match="*"> |
658
|
|
|
<span class="{translate(name(.),\':\',\'-\')}"> |
659
|
|
|
<xsl:for-each select="./@*"> |
660
|
|
|
<xsl:attribute name="data-{translate(name(.),\':\',\'-\')}"> |
661
|
|
|
<xsl:value-of select="." /> |
662
|
|
|
</xsl:attribute> |
663
|
|
|
</xsl:for-each> |
664
|
|
|
<xsl:apply-templates/> |
665
|
|
|
</span> |
666
|
|
|
</xsl:template> |
667
|
3 |
|
</xsl:stylesheet>'; |
668
|
|
|
|
669
|
|
|
$xslOptions = ' |
670
|
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xlink="http://www.w3.org/1999/xlink" ||namespaces||> |
671
|
|
|
<xsl:template match="*"> |
672
|
|
|
<xsl:choose> |
673
|
|
|
<xsl:when test="||xlink||"> |
674
|
|
|
<a class="{translate(name(.),\':\',\'-\')}"> |
675
|
|
|
<xsl:for-each select="./@*"> |
676
|
|
|
<xsl:attribute name="data-{translate(name(.),\':\',\'-\')}"> |
677
|
|
|
<xsl:value-of select="."/> |
678
|
|
|
</xsl:attribute> |
679
|
|
|
</xsl:for-each> |
680
|
|
|
<xsl:attribute name="href"> |
681
|
|
|
<xsl:value-of select="@xlink:href"/> |
682
|
|
|
</xsl:attribute> |
683
|
|
|
<xsl:apply-templates/> |
684
|
|
|
</a> |
685
|
|
|
</xsl:when> |
686
|
|
|
<xsl:otherwise> |
687
|
|
|
<span class="{translate(name(.),\':\',\'-\')}"> |
688
|
|
|
<xsl:for-each select="./@*"> |
689
|
|
|
<xsl:attribute name="data-{translate(name(.),\':\',\'-\')}"> |
690
|
|
|
<xsl:value-of select="." /> |
691
|
|
|
</xsl:attribute> |
692
|
|
|
</xsl:for-each> |
693
|
|
|
<xsl:apply-templates/> |
694
|
|
|
</span> |
695
|
|
|
</xsl:otherwise> |
696
|
|
|
</xsl:choose> |
697
|
|
|
</xsl:template> |
698
|
3 |
|
</xsl:stylesheet>'; |
699
|
|
|
|
700
|
3 |
|
if (!empty($options)) { |
701
|
|
|
// Add in the namespaces |
702
|
1 |
|
foreach ($this->getNamespaces() as $prefix => $url) { |
703
|
1 |
|
$namespaces = ''; |
704
|
1 |
|
if ($prefix != 'xsl' && $prefix != 'xlink') { |
705
|
1 |
|
$namespaces .= 'xmlns:' . $prefix . '="' . $url. '" '; |
706
|
|
|
} |
707
|
1 |
|
$xslOptions = str_replace("||namespaces||", $namespaces, $xslOptions); |
708
|
|
|
} |
709
|
|
|
|
710
|
|
|
// Add in xlink options |
711
|
1 |
|
if ($options['xlink'] === TRUE) { |
712
|
1 |
|
$options['xlink'] = "@xlink:type = 'simple'"; |
713
|
|
|
} |
714
|
|
|
else if (empty($options['xlink'])) { |
715
|
|
|
$options['xlink'] = "false()"; |
716
|
|
|
} |
717
|
1 |
|
$xslOptions = str_replace("||xlink||", $options['xlink'], $xslOptions); |
718
|
1 |
|
$transformed = $this->tranform($xslOptions, $context); |
719
|
|
|
} |
720
|
|
|
else { |
721
|
2 |
|
$transformed = $this->tranform($xslSimple, $context); |
722
|
|
|
} |
723
|
|
|
|
724
|
3 |
|
return $transformed->out(); |
725
|
|
|
} |
726
|
|
|
|
727
|
|
|
/** |
728
|
|
|
* Output the DOMDoc as an XML string |
729
|
|
|
* |
730
|
|
|
* @param mixed $context |
731
|
|
|
* $context can either be an xpath string, or a DOMElement. Ommiting it |
732
|
|
|
* results in outputting the entire document |
733
|
|
|
* |
734
|
|
|
* @return XML string |
735
|
|
|
*/ |
736
|
12 |
|
public function out($context = NULL) { |
737
|
12 |
|
$this->createContext($context, 'xpath'); |
738
|
12 |
|
if (!$context) { |
739
|
2 |
|
return ''; |
740
|
|
|
} |
741
|
|
|
|
742
|
|
|
// Copy namespace prefixes |
743
|
10 |
|
foreach ($this->ns as $prefix => $namespace) { |
744
|
|
|
if (!$context->hasAttribute('xmlns:' . $prefix)) { |
745
|
|
|
$context->setAttribute('xmlns:' . $prefix, $namespace); |
746
|
|
|
} |
747
|
|
|
} |
748
|
|
|
|
749
|
|
|
// Check to seee if it's HTML, if it is we need to fix broken html void elements. |
750
|
10 |
|
if ($this->documentElement->lookupNamespaceURI(NULL) == 'http://www.w3.org/1999/xhtml' || $this->documentElement->tagName == 'html') { |
751
|
1 |
|
$output = $this->saveXML($context, LIBXML_NOEMPTYTAG); |
752
|
|
|
// The types listed are html "void" elements. |
753
|
|
|
// Find any of these elements that have no child nodes and are therefore candidates for self-closing, replace them with a self-closed version. |
754
|
1 |
|
$pattern = '<(area|base|br|col|command|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)(\b[^<]*)><\/\1>'; |
755
|
1 |
|
return preg_replace('/' . $pattern . '/', '<$1$2/>', $output); |
756
|
|
|
} |
757
|
|
|
else { |
758
|
9 |
|
return $this->saveXML($context, LIBXML_NOEMPTYTAG); |
759
|
|
|
} |
760
|
|
|
} |
761
|
|
|
|
762
|
|
|
/** |
763
|
|
|
* Magic method for casting a DOMDoc as a string |
764
|
|
|
*/ |
765
|
1 |
|
public function __toString() { |
766
|
1 |
|
return $this->out(); |
767
|
|
|
} |
768
|
|
|
|
769
|
24 |
|
public function setErrorChecking($error_checking) { |
770
|
|
|
// Check up error-checking |
771
|
24 |
|
if ($error_checking == FALSE) { |
772
|
|
|
$this->error_checking = 'none'; |
773
|
|
|
} |
774
|
|
|
else { |
775
|
24 |
|
$this->error_checking = $error_checking; |
776
|
|
|
} |
777
|
24 |
|
if ($this->error_checking != 'strict') { |
778
|
7 |
|
$this->strictErrorChecking = FALSE; |
779
|
|
|
} |
780
|
24 |
|
} |
781
|
|
|
|
782
|
14 |
|
public static function loadFile($file_or_url, $auto_register_namespaces = TRUE) { |
783
|
14 |
|
$dom = @parent::load($file_or_url, LIBXML_COMPACT); |
784
|
14 |
|
if (empty($dom)) { |
785
|
|
|
return FALSE; |
786
|
|
|
} |
787
|
|
|
|
788
|
14 |
|
return new DOMDoc($dom, $auto_register_namespaces); |
789
|
|
|
} |
790
|
|
|
|
791
|
1 |
|
public function loadHTML($source, $options = NULL) { |
792
|
1 |
|
$success = parent::loadHTML($source, $options); |
793
|
1 |
|
$this->AutoRegisterNamespace(TRUE); |
794
|
|
|
|
795
|
1 |
|
return boolval($success); |
796
|
|
|
} |
797
|
|
|
|
798
|
14 |
|
public function loadXML($source, $options = NULL) { |
799
|
14 |
|
$success = parent::loadXML($source, $options); |
800
|
14 |
|
$this->AutoRegisterNamespace(TRUE); |
801
|
|
|
|
802
|
14 |
|
return boolval($success); |
803
|
|
|
} |
804
|
|
|
|
805
|
24 |
|
private function AutoRegisterNamespace($auto_register_namespaces) { |
806
|
24 |
|
$this->auto_ns = TRUE; |
807
|
|
|
|
808
|
|
|
// If it's an "XML" document, then get namespaces via xpath |
809
|
24 |
|
$xpath = new \DOMXPath($this); |
810
|
24 |
|
foreach($xpath->query('namespace::*') as $namespace) { |
811
|
23 |
|
if (!empty($namespace->prefix)) { |
812
|
23 |
|
if ($namespace->prefix != 'xml' && $namespace->nodeValue != 'http://www.w3.org/XML/1998/namespace') { |
813
|
23 |
|
$this->registerNamespace($namespace->prefix, $namespace->nodeValue); |
814
|
|
|
} |
815
|
|
|
} |
816
|
|
View Code Duplication |
else { |
817
|
3 |
|
$this->default_ns = $namespace->nodeValue; |
818
|
3 |
|
if (is_string($auto_register_namespaces)) { |
819
|
|
|
$this->registerNamespace($auto_register_namespaces, $namespace->nodeValue); |
820
|
|
|
} |
821
|
|
|
// Otherwise, automatically set-up the root element tag name as the prefix for the default namespace |
822
|
|
|
else { |
823
|
3 |
|
$tagname = $this->documentElement->tagName; |
824
|
3 |
|
if (empty($this->ns[$tagname])) { |
825
|
23 |
|
$this->registerNamespace($tagname, $this->documentElement->getAttribute('xmlns')); |
826
|
|
|
} |
827
|
|
|
} |
828
|
|
|
} |
829
|
|
|
} |
830
|
|
|
|
831
|
|
|
// If it's an "HTML" document, we get namespaces via attributes |
832
|
24 |
|
if (empty($this->ns) && !empty($this->documentElement)) { |
833
|
15 |
|
foreach ($this->documentElement->attributes as $attr) { |
834
|
4 |
|
if ($attr->name == 'xmlns') { |
835
|
1 |
|
$this->default_ns = $attr->value; |
836
|
|
|
// If auto_register_namespaces is a prefix string, then we register the default namespace to that string |
837
|
1 |
View Code Duplication |
if (is_string($auto_register_namespaces)) { |
838
|
|
|
$this->registerNamespace($auto_register_namespaces, $attr->value); |
839
|
|
|
} |
840
|
|
|
// Otherwise, automatically set-up the root element tag name as the prefix for the default namespace |
841
|
|
|
else { |
842
|
1 |
|
$tagname = $this->documentElement->tagName; |
843
|
1 |
|
if (empty($this->ns[$tagname])) { |
844
|
1 |
|
$this->registerNamespace($tagname, $attr->value); |
845
|
|
|
} |
846
|
|
|
} |
847
|
|
|
} |
848
|
3 |
|
else if (substr($attr->name,0,6) == 'xmlns:') { |
849
|
|
|
$prefix = substr($attr->name,6); |
850
|
4 |
|
$this->registerNamespace($prefix, $attr->value); |
851
|
|
|
} |
852
|
|
|
} |
853
|
|
|
} |
854
|
24 |
|
} |
855
|
|
|
|
856
|
22 |
|
private function createContext(&$context, $type = 'xpath', $createDocument = TRUE) { |
857
|
22 |
|
if (!$context && $createDocument) { |
858
|
11 |
|
$context = $this->documentElement; |
859
|
11 |
|
return; |
860
|
|
|
} |
861
|
|
|
|
862
|
19 |
|
if (!$context) { |
863
|
19 |
|
return FALSE; |
864
|
|
|
} |
865
|
|
|
|
866
|
13 |
|
if ($context && is_string($context)) { |
867
|
12 |
|
if ($type == 'xpath') { |
868
|
12 |
|
$context = $this->xpathSingle($context); |
869
|
12 |
|
return; |
870
|
|
|
} |
871
|
5 |
|
if ($type == 'xml') { |
872
|
5 |
|
$context = $this->createElementFromXML($context); |
873
|
5 |
|
return; |
874
|
|
|
} |
875
|
|
|
} |
876
|
|
|
|
877
|
2 |
|
if (is_object($context)) { |
878
|
2 |
|
if (is_a($context, 'DOMElement')) { |
879
|
2 |
|
return $context; |
880
|
|
|
} |
881
|
|
|
if (is_a($context, 'DOMDocument')) { |
882
|
|
|
return $context->documentElement; |
883
|
|
|
} |
884
|
|
|
} |
885
|
|
|
} |
886
|
|
|
} |
887
|
|
|
|
888
|
|
|
|
889
|
|
|
|
890
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.