1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* @method static XmlParser file_get_xml($xml, $libXMLExtraOptions = null) |
9
|
|
|
* <p>Load XML from file.</p> |
10
|
|
|
* @method static XmlParser str_get_xml($xml, $libXMLExtraOptions = null) |
11
|
|
|
* <p>Load XML from string.</p> |
12
|
|
|
*/ |
13
|
|
|
class XmlParser extends HtmlDomParser |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @param string $name |
17
|
|
|
* @param array $arguments |
18
|
|
|
* |
19
|
|
|
* @throws \BadMethodCallException |
20
|
|
|
* @throws \RuntimeException |
21
|
|
|
* |
22
|
|
|
* @return XmlParser |
23
|
|
|
*/ |
24
|
3 |
View Code Duplication |
public static function __callStatic($name, $arguments) |
|
|
|
|
25
|
|
|
{ |
26
|
3 |
|
$arguments0 = $arguments[0] ?? ''; |
27
|
|
|
|
28
|
3 |
|
$arguments1 = $arguments[1] ?? null; |
29
|
|
|
|
30
|
3 |
|
if ($name === 'str_get_xml') { |
31
|
1 |
|
$parser = new static(); |
32
|
|
|
|
33
|
1 |
|
return $parser->loadXml($arguments0, $arguments1); |
34
|
|
|
} |
35
|
|
|
|
36
|
2 |
|
if ($name === 'file_get_xml') { |
37
|
2 |
|
$parser = new static(); |
38
|
|
|
|
39
|
2 |
|
return $parser->loadXmlFile($arguments0, $arguments1); |
|
|
|
|
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
throw new \BadMethodCallException('Method does not exist'); |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @return string |
47
|
|
|
*/ |
48
|
2 |
|
public function __toString() |
49
|
|
|
{ |
50
|
2 |
|
return $this->xml(false, false, true, 0); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* Create DOMDocument from XML. |
55
|
|
|
* |
56
|
|
|
* @param string $xml |
57
|
|
|
* @param int|null $libXMLExtraOptions |
58
|
|
|
* |
59
|
|
|
* @return \DOMDocument |
60
|
|
|
*/ |
61
|
3 |
|
protected function createDOMDocument(string $xml, $libXMLExtraOptions = null): \DOMDocument |
62
|
|
|
{ |
63
|
|
|
// set error level |
64
|
3 |
|
$internalErrors = \libxml_use_internal_errors(true); |
65
|
3 |
|
$disableEntityLoader = \libxml_disable_entity_loader(true); |
66
|
3 |
|
\libxml_clear_errors(); |
67
|
|
|
|
68
|
3 |
|
$optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET; |
69
|
|
|
|
70
|
3 |
|
if (\defined('LIBXML_BIGLINES')) { |
71
|
3 |
|
$optionsXml |= \LIBXML_BIGLINES; |
72
|
|
|
} |
73
|
|
|
|
74
|
3 |
|
if (\defined('LIBXML_COMPACT')) { |
75
|
3 |
|
$optionsXml |= \LIBXML_COMPACT; |
76
|
|
|
} |
77
|
|
|
|
78
|
3 |
|
if ($libXMLExtraOptions !== null) { |
79
|
|
|
$optionsXml |= $libXMLExtraOptions; |
80
|
|
|
} |
81
|
|
|
|
82
|
3 |
|
$xml = self::replaceToPreserveHtmlEntities($xml); |
83
|
|
|
|
84
|
3 |
|
$documentFound = false; |
85
|
3 |
|
$sxe = \simplexml_load_string($xml, \SimpleXMLElement::class, $optionsXml); |
86
|
3 |
View Code Duplication |
if ($sxe !== false && \count(\libxml_get_errors()) === 0) { |
|
|
|
|
87
|
3 |
|
$domElementTmp = \dom_import_simplexml($sxe); |
88
|
3 |
|
if ($domElementTmp) { |
89
|
3 |
|
$documentFound = true; |
90
|
3 |
|
$this->document = $domElementTmp->ownerDocument; |
91
|
|
|
} |
92
|
|
|
} |
93
|
|
|
|
94
|
3 |
View Code Duplication |
if ($documentFound === false) { |
|
|
|
|
95
|
|
|
|
96
|
|
|
// UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251 |
97
|
|
|
$xmlHackUsed = false; |
98
|
|
|
if (\stripos('<?xml', $xml) !== 0) { |
99
|
|
|
$xmlHackUsed = true; |
100
|
|
|
$xml = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $xml; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
$this->document->loadXML($xml, $optionsXml); |
104
|
|
|
|
105
|
|
|
// remove the "xml-encoding" hack |
106
|
|
|
if ($xmlHackUsed) { |
107
|
|
|
foreach ($this->document->childNodes as $child) { |
108
|
|
|
if ($child->nodeType === \XML_PI_NODE) { |
109
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
110
|
|
|
$this->document->removeChild($child); |
111
|
|
|
|
112
|
|
|
break; |
113
|
|
|
} |
114
|
|
|
} |
115
|
|
|
} |
116
|
|
|
} |
117
|
|
|
|
118
|
|
|
// set encoding |
119
|
3 |
|
$this->document->encoding = $this->getEncoding(); |
120
|
|
|
|
121
|
|
|
// restore lib-xml settings |
122
|
3 |
|
\libxml_clear_errors(); |
123
|
3 |
|
\libxml_use_internal_errors($internalErrors); |
124
|
3 |
|
\libxml_disable_entity_loader($disableEntityLoader); |
125
|
|
|
|
126
|
3 |
|
return $this->document; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* Load XML from string. |
131
|
|
|
* |
132
|
|
|
* @param string $xml |
133
|
|
|
* @param int|null $libXMLExtraOptions |
134
|
|
|
* |
135
|
|
|
* @return XmlParser |
136
|
|
|
*/ |
137
|
3 |
|
public function loadXml(string $xml, $libXMLExtraOptions = null): self |
138
|
|
|
{ |
139
|
3 |
|
$this->document = $this->createDOMDocument($xml, $libXMLExtraOptions); |
140
|
|
|
|
141
|
3 |
|
return $this; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
/** |
145
|
|
|
* Load XML from file. |
146
|
|
|
* |
147
|
|
|
* @param string $filePath |
148
|
|
|
* @param int|null $libXMLExtraOptions |
149
|
|
|
* |
150
|
|
|
* @throws \RuntimeException |
151
|
|
|
* |
152
|
|
|
* @return XmlParser |
153
|
|
|
*/ |
154
|
2 |
View Code Duplication |
public function loadXmlFile(string $filePath, $libXMLExtraOptions = null): self |
|
|
|
|
155
|
|
|
{ |
156
|
|
|
if ( |
157
|
2 |
|
!\preg_match("/^https?:\/\//i", $filePath) |
158
|
|
|
&& |
159
|
2 |
|
!\file_exists($filePath) |
160
|
|
|
) { |
161
|
|
|
throw new \RuntimeException("File ${filePath} not found"); |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
try { |
165
|
|
|
if (\class_exists('\voku\helper\UTF8')) { |
166
|
|
|
/** @noinspection PhpUndefinedClassInspection */ |
167
|
|
|
$xml = UTF8::file_get_contents($filePath); |
168
|
|
|
} else { |
169
|
|
|
$xml = \file_get_contents($filePath); |
170
|
|
|
} |
171
|
|
|
} catch (\Exception $e) { |
172
|
|
|
throw new \RuntimeException("Could not load file ${filePath}"); |
173
|
|
|
} |
174
|
|
|
|
175
|
2 |
|
if ($xml === false) { |
176
|
|
|
throw new \RuntimeException("Could not load file ${filePath}"); |
177
|
|
|
} |
178
|
|
|
|
179
|
2 |
|
return $this->loadXml($xml, $libXMLExtraOptions); |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
/** |
183
|
|
|
* @param callable $callback |
184
|
|
|
* @param \DOMNode|null $domNode |
185
|
|
|
*/ |
186
|
|
|
public function replaceTextWithCallback($callback, \DOMNode $domNode = null) |
187
|
|
|
{ |
188
|
1 |
|
if ($domNode === null) { |
189
|
1 |
|
$domNode = $this->document; |
190
|
|
|
} |
191
|
|
|
|
192
|
1 |
|
if ($domNode->hasChildNodes()) { |
193
|
1 |
|
$children = []; |
194
|
|
|
|
195
|
|
|
// since looping through a DOM being modified is a bad idea we prepare an array: |
196
|
1 |
|
foreach ($domNode->childNodes as $child) { |
197
|
1 |
|
$children[] = $child; |
198
|
|
|
} |
199
|
|
|
|
200
|
1 |
|
foreach ($children as $child) { |
201
|
1 |
|
if ($child->nodeType === \XML_TEXT_NODE) { |
202
|
1 |
|
$oldText = self::putReplacedBackToPreserveHtmlEntities($child->wholeText); |
203
|
1 |
|
$newText = $callback($oldText); |
204
|
1 |
|
if ($domNode->ownerDocument) { |
205
|
1 |
|
$newTextNode = $domNode->ownerDocument->createTextNode(self::replaceToPreserveHtmlEntities($newText)); |
206
|
1 |
|
$domNode->replaceChild($newTextNode, $child); |
207
|
|
|
} |
208
|
|
|
} else { |
209
|
1 |
|
$this->replaceTextWithCallback($callback, $child); |
210
|
|
|
} |
211
|
|
|
} |
212
|
|
|
} |
213
|
1 |
|
} |
214
|
|
|
} |
215
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.