1 | <?php |
||
2 | |||
3 | /* |
||
4 | * This file is part of the Koded package. |
||
5 | * |
||
6 | * (c) Mihail Binev <[email protected]> |
||
7 | * |
||
8 | * Please view the LICENSE distributed with this source code |
||
9 | * for the full copyright and license information. |
||
10 | */ |
||
11 | |||
12 | namespace Koded\Stdlib\Serializer; |
||
13 | |||
14 | use DateTimeImmutable; |
||
15 | use DateTimeInterface; |
||
16 | use DOMDocument; |
||
17 | use DOMNode; |
||
18 | use InvalidArgumentException; |
||
19 | use Koded\Stdlib\Serializer; |
||
20 | use Throwable; |
||
21 | use function array_is_list; |
||
22 | use function count; |
||
23 | use function current; |
||
24 | use function end; |
||
25 | use function filter_var; |
||
26 | use function is_array; |
||
27 | use function is_bool; |
||
28 | use function is_float; |
||
29 | use function is_int; |
||
30 | use function is_iterable; |
||
31 | use function is_numeric; |
||
32 | use function is_object; |
||
33 | use function Koded\Stdlib\error_log; |
||
34 | use function Koded\Stdlib\json_serialize; |
||
35 | use function Koded\Stdlib\json_unserialize; |
||
36 | use function key; |
||
37 | use function preg_match; |
||
38 | use function str_contains; |
||
39 | use function str_replace; |
||
40 | use function str_starts_with; |
||
41 | use function substr; |
||
42 | use function trim; |
||
43 | use function xml_parse_into_struct; |
||
44 | use function xml_parser_create; |
||
45 | |||
46 | /** |
||
47 | * Class XmlSerializer is heavily modified Symfony encoder (XmlEncoder). |
||
48 | * |
||
49 | * @see https://www.w3.org/TR/xmlschema-2/#built-in-datatypes |
||
50 | */ |
||
51 | class XmlSerializer implements Serializer |
||
52 | { |
||
53 | /** @var string The key name for the node value */ |
||
54 | private string $val = '#'; |
||
55 | private string|null $root; |
||
56 | |||
57 | 28 | public function __construct(?string $root, string $nodeKey = '#') |
|
58 | { |
||
59 | 28 | $this->root = $root; |
|
60 | 28 | $nodeKey = trim($nodeKey); |
|
61 | 28 | if ('@' === $nodeKey || empty($nodeKey)) { |
|
62 | 1 | throw new InvalidArgumentException('Invalid node key identifier', self::E_INVALID_SERIALIZER); |
|
63 | } |
||
64 | 28 | $this->val = $nodeKey; |
|
65 | } |
||
66 | |||
67 | 2 | public function type(): string |
|
68 | { |
||
69 | 2 | return Serializer::XML; |
|
70 | } |
||
71 | |||
72 | 1 | final public function val(): string |
|
73 | { |
||
74 | 1 | return $this->val; |
|
75 | } |
||
76 | |||
77 | /** |
||
78 | * @param iterable $value |
||
79 | * @return string|null XML |
||
80 | */ |
||
81 | 15 | public function serialize(mixed $value): string|null |
|
82 | { |
||
83 | 15 | $document = new DOMDocument('1.0', 'UTF-8'); |
|
84 | 15 | $document->formatOutput = false; |
|
85 | 15 | if (is_iterable($value)) { |
|
86 | 9 | $root = $document->createElement($this->root); |
|
87 | 9 | $document->appendChild($root); |
|
88 | 9 | $document->createAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'xsi:' . $this->root); |
|
89 | 9 | $this->buildXml($document, $root, $value); |
|
90 | } else { |
||
91 | 6 | $this->appendNode($document, $document, $value, $this->root, null); |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
92 | } |
||
93 | 15 | return trim($document->saveXML()); |
|
94 | } |
||
95 | |||
96 | /** |
||
97 | * Unserialize a proper XML document into array, scalar value or NULL. |
||
98 | * |
||
99 | * @param string $xml XML |
||
100 | * @return mixed scalar|array|null |
||
101 | */ |
||
102 | 19 | public function unserialize(string $xml): mixed |
|
103 | { |
||
104 | 19 | if (empty($xml = trim($xml))) { |
|
105 | 2 | return null; |
|
106 | } |
||
107 | try { |
||
108 | 18 | $document = new DOMDocument('1.0', 'UTF-8'); |
|
109 | 18 | $document->preserveWhiteSpace = false; |
|
110 | 18 | $document->loadXML($xml); |
|
111 | 15 | if ($document->documentElement->hasChildNodes()) { |
|
112 | 13 | return $this->parseXml($document->documentElement); |
|
113 | } |
||
114 | 2 | return !$document->documentElement->getAttributeNode('xmlns:xsi') |
|
115 | 1 | ? $this->parseXml($document->documentElement) |
|
116 | 2 | : []; |
|
117 | |||
118 | 3 | } catch (Throwable $e) { |
|
119 | 3 | $this->logUnserializeError(__METHOD__, $e->getMessage(), $xml); |
|
120 | 3 | return null; |
|
121 | } |
||
122 | } |
||
123 | |||
124 | 9 | private function buildXml(DOMDocument $document, |
|
125 | DOMNode $parent, |
||
126 | iterable $data): void |
||
127 | { |
||
128 | 9 | foreach ($data as $key => $val) { |
|
129 | 8 | $isKeyNumeric = is_numeric($key); |
|
130 | 8 | if (str_starts_with($key, '@') && $name = substr($key, 1)) { |
|
131 | // node attribute |
||
132 | 2 | $parent->setAttribute($name, $val); |
|
133 | 8 | } elseif ($this->val === $key) { |
|
134 | // node value |
||
135 | 2 | $parent->nodeValue = $val; |
|
136 | 8 | } elseif (false === $isKeyNumeric && is_array($val)) { |
|
137 | /* |
||
138 | * If the data is an associative array (with numeric keys) |
||
139 | * the structure is transformed to "item" nodes: |
||
140 | * <item key="0">$key0</item> |
||
141 | * <item key="1">$key1</item> |
||
142 | * by appending it to the parent node (if any) |
||
143 | */ |
||
144 | 6 | if (array_is_list($val)) { |
|
145 | 4 | foreach ($val as $d) { |
|
146 | 4 | $this->appendNode($document, $parent, $d, $key, null); |
|
147 | } |
||
148 | } else { |
||
149 | 6 | $this->appendNode($document, $parent, $val, $key, null); |
|
150 | } |
||
151 | 7 | } elseif ($isKeyNumeric || false === $this->hasValidName($key)) { |
|
152 | /* If the key is not a valid XML tag name, |
||
153 | * transform the key to "item" node: |
||
154 | * <item key="$key">$value</item> |
||
155 | * by appending it to the parent node (if any) |
||
156 | */ |
||
157 | 5 | $this->appendNode($document, $parent, $val, 'item', $key); |
|
158 | } else { |
||
159 | 5 | $this->appendNode($document, $parent, $val, $key, null); |
|
160 | } |
||
161 | } |
||
162 | } |
||
163 | |||
164 | 14 | private function parseXml(DOMNode $node): mixed |
|
165 | { |
||
166 | 14 | $attrs = $this->parseXmlAttributes($node); |
|
167 | 14 | $value = $this->parseXmlValue($node); |
|
168 | 14 | if (0 === count($attrs)) { |
|
169 | 13 | return $value; |
|
170 | } |
||
171 | 9 | if (false === is_array($value)) { |
|
172 | 9 | $attrs[$this->val] = $value; |
|
173 | 9 | return $this->getValueByType($attrs); |
|
174 | } |
||
175 | 3 | if (1 === count($value) && key($value)) { |
|
176 | 3 | $attrs[key($value)] = current($value); |
|
177 | } |
||
178 | 3 | foreach ($value as $k => $v) { |
|
179 | 3 | $attrs[$k] = $v; |
|
180 | } |
||
181 | 3 | return $attrs; |
|
182 | } |
||
183 | |||
184 | 14 | private function parseXmlAttributes(DOMNode $node): array |
|
185 | { |
||
186 | 14 | if (!$node->hasAttributes()) { |
|
187 | 13 | return []; |
|
188 | } |
||
189 | 9 | $attrs = []; |
|
190 | 9 | foreach ($node->attributes as $attr) { |
|
191 | /** @var \DOMAttr $attr */ |
||
192 | 9 | $attrs['@' . $attr->nodeName] = $attr->nodeValue; |
|
193 | } |
||
194 | 9 | return $attrs; |
|
195 | } |
||
196 | |||
197 | /** |
||
198 | * @param DOMNode $node |
||
199 | * @return array|string|null |
||
200 | * @throws \Exception |
||
201 | */ |
||
202 | 14 | private function parseXmlValue(DOMNode $node): mixed |
|
203 | { |
||
204 | 14 | $value = []; |
|
205 | 14 | if ($node->hasChildNodes()) { |
|
206 | /** @var DOMNode $child */ |
||
207 | 13 | $child = $node->firstChild; |
|
208 | 13 | if ($child->nodeType === XML_TEXT_NODE) { |
|
209 | 12 | return $child->nodeValue; |
|
210 | } |
||
211 | 9 | if ($child->nodeType === XML_CDATA_SECTION_NODE) { |
|
212 | 4 | return $child->wholeText; |
|
213 | } |
||
214 | 9 | $this->extractValuesFromChildNodes($node, $value); |
|
215 | } |
||
216 | 10 | foreach ($value as $k => $v) { |
|
217 | 8 | if (is_array($v) && 1 === count($v)) { |
|
218 | 8 | $value[$k] = current($v); |
|
219 | } |
||
220 | } |
||
221 | 10 | return $value ?: ''; |
|
222 | } |
||
223 | |||
224 | /** |
||
225 | * Creates an XML node in the document from the provided value |
||
226 | * according to the PHP type of the value. |
||
227 | * |
||
228 | * @param DOMDocument $document |
||
229 | * @param DOMNode $parent |
||
230 | * @param mixed $data |
||
231 | * @param string $name |
||
232 | * @param string|null $key |
||
233 | */ |
||
234 | 14 | private function appendNode(DOMDocument $document, |
|
235 | DOMNode $parent, |
||
236 | mixed $data, |
||
237 | string $name, |
||
238 | ?string $key): void |
||
239 | { |
||
240 | 14 | $element = $document->createElement($name); |
|
241 | 14 | if (null !== $key) { |
|
242 | 5 | $element->setAttribute('key', $key); |
|
243 | } |
||
244 | 14 | if (is_iterable($data)) { |
|
245 | 6 | $this->buildXml($document, $element, $data); |
|
246 | 13 | } elseif (is_bool($data)) { |
|
247 | 3 | $element->setAttribute('type', 'xsd:boolean'); |
|
248 | 3 | $element->appendChild($document->createTextNode($data)); |
|
249 | 13 | } elseif (is_float($data)) { |
|
250 | 3 | $element->setAttribute('type', 'xsd:float'); |
|
251 | 3 | $element->appendChild($document->createTextNode($data)); |
|
252 | 13 | } elseif (is_int($data)) { |
|
253 | 5 | $element->setAttribute('type', 'xsd:integer'); |
|
254 | 5 | $element->appendChild($document->createTextNode($data)); |
|
255 | 11 | } elseif (null === $data) { |
|
256 | 4 | $element->setAttribute('xsi:nil', 'true'); |
|
257 | 10 | } elseif ($data instanceof DateTimeInterface) { |
|
258 | 3 | $element->setAttribute('type', 'xsd:dateTime'); |
|
259 | 3 | $element->appendChild($document->createTextNode($data->format(DateTimeInterface::RFC3339))); |
|
260 | 10 | } elseif (is_object($data)) { |
|
261 | 3 | $element->setAttribute('type', 'xsd:object'); |
|
262 | 3 | $element->appendChild($document->createCDATASection(json_serialize($data))); |
|
263 | 10 | } elseif (preg_match('/[<>&\'"]/', $data) > 0) { |
|
264 | 4 | $element->appendChild($document->createCDATASection($data)); |
|
265 | } else { |
||
266 | 10 | $element->appendChild($document->createTextNode($data)); |
|
267 | } |
||
268 | 14 | $parent->appendChild($element); |
|
269 | } |
||
270 | |||
271 | /** |
||
272 | * Deserialize the XML document elements into strict PHP values |
||
273 | * in regard to the XSD type defined in the XML element (if any). |
||
274 | * |
||
275 | * [IMPORTANT]: When deserializing an XML document into values, |
||
276 | * if the XmlSerializer encounters an XML element that specifies xsi:nil="true", |
||
277 | * it assigns a NULL to the corresponding element and ignores any other attributes |
||
278 | * |
||
279 | * @param array|string $value |
||
280 | * @return mixed array|string|null |
||
281 | * @throws \Exception |
||
282 | */ |
||
283 | 9 | private function getValueByType(mixed $value): mixed |
|
284 | { |
||
285 | 9 | if (false === is_array($value)) { |
|
286 | 5 | return $value; |
|
287 | } |
||
288 | /* |
||
289 | * [NOTE] if "xsi:nil" is NOT 'true', ignore the xsi:nil |
||
290 | * and process the rest of the attributes for this element |
||
291 | */ |
||
292 | 9 | if (isset($value['@xsi:nil']) && $value['@xsi:nil'] == 'true') { |
|
293 | 2 | unset($value['@xsi:nil']); |
|
294 | 2 | return null; |
|
295 | } |
||
296 | 9 | if (!(isset($value['@type']) && str_starts_with($value['@type'] ?? '', 'xsd:'))) { |
|
297 | 8 | return $value; |
|
298 | } |
||
299 | 4 | $value[$this->val] = match ($value['@type']) { |
|
300 | 4 | 'xsd:integer' => (int)$value[$this->val], |
|
301 | 4 | 'xsd:boolean' => filter_var($value[$this->val], FILTER_VALIDATE_BOOL), |
|
302 | 4 | 'xsd:float' => (float)$value[$this->val], |
|
303 | 4 | 'xsd:dateTime' => new DateTimeImmutable($value[$this->val]), |
|
304 | 4 | 'xsd:object' => json_unserialize($value[$this->val]), |
|
305 | 4 | }; |
|
306 | 4 | unset($value['@type']); |
|
307 | 4 | if (count($value) > 1) { |
|
308 | 1 | return $value; |
|
309 | } |
||
310 | 3 | return $value[$this->val]; |
|
311 | } |
||
312 | |||
313 | 6 | private function hasValidName(int|string $key): bool |
|
314 | { |
||
315 | 6 | return $key && |
|
316 | 6 | !str_contains($key, ' ') && |
|
317 | 6 | preg_match('~^[\pL_][\pL0-9._:-]*$~ui', $key); |
|
318 | } |
||
319 | |||
320 | 9 | private function extractValuesFromChildNodes(DOMNode $node, array &$value): void |
|
321 | { |
||
322 | 9 | foreach ($node->childNodes as $child) { |
|
323 | 9 | if ($child->nodeType === XML_COMMENT_NODE) { |
|
324 | 4 | continue; |
|
325 | } |
||
326 | 8 | $v = $this->parseXml($child); |
|
327 | 8 | if ('item' === $child->nodeName && isset($v['@key'])) { |
|
328 | 4 | $k = $v['@key']; |
|
329 | 4 | $value[$k] = $this->getValueByType($v); |
|
330 | 4 | unset($value[$k]['@key']); |
|
331 | } else { |
||
332 | 7 | $value[$child->nodeName][] = $this->getValueByType($v); |
|
333 | } |
||
334 | } |
||
335 | } |
||
336 | |||
337 | 3 | public function logUnserializeError(string $method, |
|
338 | string $message, |
||
339 | string $xml): void |
||
340 | { |
||
341 | 3 | $parser = xml_parser_create(); |
|
342 | 3 | xml_parse_into_struct($parser, $xml, $values); |
|
343 | 3 | $last = end($values); |
|
344 | 3 | unset($last['type'], $last['level']); |
|
345 | 3 | error_log($method, |
|
346 | 3 | str_replace('DOMDocument::loadXML(): ', '', $message), |
|
347 | 3 | 'hint: ' . json_serialize($last ?: ['<XML>' => $xml]) |
|
348 | 3 | ); |
|
349 | } |
||
350 | } |
||
351 |