kodedphp /
stdlib
| 1 | <?php |
||
| 2 | |||
| 3 | /* |
||
| 4 | * This file is part of the Koded package. |
||
| 5 | * |
||
| 6 | * (c) Mihail Binev <[email protected]> |
||
| 7 | * |
||
| 8 | * Please view the LICENSE distributed with this source code |
||
| 9 | * for the full copyright and license information. |
||
| 10 | */ |
||
| 11 | |||
| 12 | namespace Koded\Stdlib\Serializer; |
||
| 13 | |||
| 14 | use DateTimeImmutable; |
||
| 15 | use DateTimeInterface; |
||
| 16 | use DOMDocument; |
||
| 17 | use DOMNode; |
||
| 18 | use InvalidArgumentException; |
||
| 19 | use Koded\Stdlib\Serializer; |
||
| 20 | use Throwable; |
||
| 21 | use function array_is_list; |
||
| 22 | use function count; |
||
| 23 | use function current; |
||
| 24 | use function end; |
||
| 25 | use function filter_var; |
||
| 26 | use function is_array; |
||
| 27 | use function is_bool; |
||
| 28 | use function is_float; |
||
| 29 | use function is_int; |
||
| 30 | use function is_iterable; |
||
| 31 | use function is_numeric; |
||
| 32 | use function is_object; |
||
| 33 | use function Koded\Stdlib\error_log; |
||
| 34 | use function Koded\Stdlib\json_serialize; |
||
| 35 | use function Koded\Stdlib\json_unserialize; |
||
| 36 | use function key; |
||
| 37 | use function preg_match; |
||
| 38 | use function str_contains; |
||
| 39 | use function str_replace; |
||
| 40 | use function str_starts_with; |
||
| 41 | use function substr; |
||
| 42 | use function trim; |
||
| 43 | use function xml_parse_into_struct; |
||
| 44 | use function xml_parser_create; |
||
| 45 | |||
| 46 | /** |
||
| 47 | * Class XmlSerializer is heavily modified Symfony encoder (XmlEncoder). |
||
| 48 | * |
||
| 49 | * @see https://www.w3.org/TR/xmlschema-2/#built-in-datatypes |
||
| 50 | */ |
||
| 51 | class XmlSerializer implements Serializer |
||
| 52 | { |
||
| 53 | /** @var string The key name for the node value */ |
||
| 54 | private string $val = '#'; |
||
| 55 | private string|null $root; |
||
| 56 | |||
| 57 | 28 | public function __construct(?string $root, string $nodeKey = '#') |
|
| 58 | { |
||
| 59 | 28 | $this->root = $root; |
|
| 60 | 28 | $nodeKey = trim($nodeKey); |
|
| 61 | 28 | if ('@' === $nodeKey || empty($nodeKey)) { |
|
| 62 | 1 | throw new InvalidArgumentException('Invalid node key identifier', self::E_INVALID_SERIALIZER); |
|
| 63 | } |
||
| 64 | 28 | $this->val = $nodeKey; |
|
| 65 | } |
||
| 66 | |||
| 67 | 2 | public function type(): string |
|
| 68 | { |
||
| 69 | 2 | return Serializer::XML; |
|
| 70 | } |
||
| 71 | |||
| 72 | 1 | final public function val(): string |
|
| 73 | { |
||
| 74 | 1 | return $this->val; |
|
| 75 | } |
||
| 76 | |||
| 77 | /** |
||
| 78 | * @param iterable $value |
||
| 79 | * @return string|null XML |
||
| 80 | */ |
||
| 81 | 15 | public function serialize(mixed $value): string|null |
|
| 82 | { |
||
| 83 | 15 | $document = new DOMDocument('1.0', 'UTF-8'); |
|
| 84 | 15 | $document->formatOutput = false; |
|
| 85 | 15 | if (is_iterable($value)) { |
|
| 86 | 9 | $root = $document->createElement($this->root); |
|
| 87 | 9 | $document->appendChild($root); |
|
| 88 | 9 | $document->createAttributeNS('http://www.w3.org/2001/XMLSchema-instance', 'xsi:' . $this->root); |
|
| 89 | 9 | $this->buildXml($document, $root, $value); |
|
| 90 | } else { |
||
| 91 | 6 | $this->appendNode($document, $document, $value, $this->root, null); |
|
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 92 | } |
||
| 93 | 15 | return trim($document->saveXML()); |
|
| 94 | } |
||
| 95 | |||
| 96 | /** |
||
| 97 | * Unserialize a proper XML document into array, scalar value or NULL. |
||
| 98 | * |
||
| 99 | * @param string $xml XML |
||
| 100 | * @return mixed scalar|array|null |
||
| 101 | */ |
||
| 102 | 19 | public function unserialize(string $xml): mixed |
|
| 103 | { |
||
| 104 | 19 | if (empty($xml = trim($xml))) { |
|
| 105 | 2 | return null; |
|
| 106 | } |
||
| 107 | try { |
||
| 108 | 18 | $document = new DOMDocument('1.0', 'UTF-8'); |
|
| 109 | 18 | $document->preserveWhiteSpace = false; |
|
| 110 | 18 | $document->loadXML($xml); |
|
| 111 | 15 | if ($document->documentElement->hasChildNodes()) { |
|
| 112 | 13 | return $this->parseXml($document->documentElement); |
|
| 113 | } |
||
| 114 | 2 | return !$document->documentElement->getAttributeNode('xmlns:xsi') |
|
| 115 | 1 | ? $this->parseXml($document->documentElement) |
|
| 116 | 2 | : []; |
|
| 117 | |||
| 118 | 3 | } catch (Throwable $e) { |
|
| 119 | 3 | $this->logUnserializeError(__METHOD__, $e->getMessage(), $xml); |
|
| 120 | 3 | return null; |
|
| 121 | } |
||
| 122 | } |
||
| 123 | |||
| 124 | 9 | private function buildXml(DOMDocument $document, |
|
| 125 | DOMNode $parent, |
||
| 126 | iterable $data): void |
||
| 127 | { |
||
| 128 | 9 | foreach ($data as $key => $val) { |
|
| 129 | 8 | $isKeyNumeric = is_numeric($key); |
|
| 130 | 8 | if (str_starts_with($key, '@') && $name = substr($key, 1)) { |
|
| 131 | // node attribute |
||
| 132 | 2 | $parent->setAttribute($name, $val); |
|
| 133 | 8 | } elseif ($this->val === $key) { |
|
| 134 | // node value |
||
| 135 | 2 | $parent->nodeValue = $val; |
|
| 136 | 8 | } elseif (false === $isKeyNumeric && is_array($val)) { |
|
| 137 | /* |
||
| 138 | * If the data is an associative array (with numeric keys) |
||
| 139 | * the structure is transformed to "item" nodes: |
||
| 140 | * <item key="0">$key0</item> |
||
| 141 | * <item key="1">$key1</item> |
||
| 142 | * by appending it to the parent node (if any) |
||
| 143 | */ |
||
| 144 | 6 | if (array_is_list($val)) { |
|
| 145 | 4 | foreach ($val as $d) { |
|
| 146 | 4 | $this->appendNode($document, $parent, $d, $key, null); |
|
| 147 | } |
||
| 148 | } else { |
||
| 149 | 6 | $this->appendNode($document, $parent, $val, $key, null); |
|
| 150 | } |
||
| 151 | 7 | } elseif ($isKeyNumeric || false === $this->hasValidName($key)) { |
|
| 152 | /* If the key is not a valid XML tag name, |
||
| 153 | * transform the key to "item" node: |
||
| 154 | * <item key="$key">$value</item> |
||
| 155 | * by appending it to the parent node (if any) |
||
| 156 | */ |
||
| 157 | 5 | $this->appendNode($document, $parent, $val, 'item', $key); |
|
| 158 | } else { |
||
| 159 | 5 | $this->appendNode($document, $parent, $val, $key, null); |
|
| 160 | } |
||
| 161 | } |
||
| 162 | } |
||
| 163 | |||
| 164 | 14 | private function parseXml(DOMNode $node): mixed |
|
| 165 | { |
||
| 166 | 14 | $attrs = $this->parseXmlAttributes($node); |
|
| 167 | 14 | $value = $this->parseXmlValue($node); |
|
| 168 | 14 | if (0 === count($attrs)) { |
|
| 169 | 13 | return $value; |
|
| 170 | } |
||
| 171 | 9 | if (false === is_array($value)) { |
|
| 172 | 9 | $attrs[$this->val] = $value; |
|
| 173 | 9 | return $this->getValueByType($attrs); |
|
| 174 | } |
||
| 175 | 3 | if (1 === count($value) && key($value)) { |
|
| 176 | 3 | $attrs[key($value)] = current($value); |
|
| 177 | } |
||
| 178 | 3 | foreach ($value as $k => $v) { |
|
| 179 | 3 | $attrs[$k] = $v; |
|
| 180 | } |
||
| 181 | 3 | return $attrs; |
|
| 182 | } |
||
| 183 | |||
| 184 | 14 | private function parseXmlAttributes(DOMNode $node): array |
|
| 185 | { |
||
| 186 | 14 | if (!$node->hasAttributes()) { |
|
| 187 | 13 | return []; |
|
| 188 | } |
||
| 189 | 9 | $attrs = []; |
|
| 190 | 9 | foreach ($node->attributes as $attr) { |
|
| 191 | /** @var \DOMAttr $attr */ |
||
| 192 | 9 | $attrs['@' . $attr->nodeName] = $attr->nodeValue; |
|
| 193 | } |
||
| 194 | 9 | return $attrs; |
|
| 195 | } |
||
| 196 | |||
| 197 | /** |
||
| 198 | * @param DOMNode $node |
||
| 199 | * @return array|string|null |
||
| 200 | * @throws \Exception |
||
| 201 | */ |
||
| 202 | 14 | private function parseXmlValue(DOMNode $node): mixed |
|
| 203 | { |
||
| 204 | 14 | $value = []; |
|
| 205 | 14 | if ($node->hasChildNodes()) { |
|
| 206 | /** @var DOMNode $child */ |
||
| 207 | 13 | $child = $node->firstChild; |
|
| 208 | 13 | if ($child->nodeType === XML_TEXT_NODE) { |
|
| 209 | 12 | return $child->nodeValue; |
|
| 210 | } |
||
| 211 | 9 | if ($child->nodeType === XML_CDATA_SECTION_NODE) { |
|
| 212 | 4 | return $child->wholeText; |
|
| 213 | } |
||
| 214 | 9 | $this->extractValuesFromChildNodes($node, $value); |
|
| 215 | } |
||
| 216 | 10 | foreach ($value as $k => $v) { |
|
| 217 | 8 | if (is_array($v) && 1 === count($v)) { |
|
| 218 | 8 | $value[$k] = current($v); |
|
| 219 | } |
||
| 220 | } |
||
| 221 | 10 | return $value ?: ''; |
|
| 222 | } |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Creates an XML node in the document from the provided value |
||
| 226 | * according to the PHP type of the value. |
||
| 227 | * |
||
| 228 | * @param DOMDocument $document |
||
| 229 | * @param DOMNode $parent |
||
| 230 | * @param mixed $data |
||
| 231 | * @param string $name |
||
| 232 | * @param string|null $key |
||
| 233 | */ |
||
| 234 | 14 | private function appendNode(DOMDocument $document, |
|
| 235 | DOMNode $parent, |
||
| 236 | mixed $data, |
||
| 237 | string $name, |
||
| 238 | ?string $key): void |
||
| 239 | { |
||
| 240 | 14 | $element = $document->createElement($name); |
|
| 241 | 14 | if (null !== $key) { |
|
| 242 | 5 | $element->setAttribute('key', $key); |
|
| 243 | } |
||
| 244 | 14 | if (is_iterable($data)) { |
|
| 245 | 6 | $this->buildXml($document, $element, $data); |
|
| 246 | 13 | } elseif (is_bool($data)) { |
|
| 247 | 3 | $element->setAttribute('type', 'xsd:boolean'); |
|
| 248 | 3 | $element->appendChild($document->createTextNode($data)); |
|
| 249 | 13 | } elseif (is_float($data)) { |
|
| 250 | 3 | $element->setAttribute('type', 'xsd:float'); |
|
| 251 | 3 | $element->appendChild($document->createTextNode($data)); |
|
| 252 | 13 | } elseif (is_int($data)) { |
|
| 253 | 5 | $element->setAttribute('type', 'xsd:integer'); |
|
| 254 | 5 | $element->appendChild($document->createTextNode($data)); |
|
| 255 | 11 | } elseif (null === $data) { |
|
| 256 | 4 | $element->setAttribute('xsi:nil', 'true'); |
|
| 257 | 10 | } elseif ($data instanceof DateTimeInterface) { |
|
| 258 | 3 | $element->setAttribute('type', 'xsd:dateTime'); |
|
| 259 | 3 | $element->appendChild($document->createTextNode($data->format(DateTimeInterface::RFC3339))); |
|
| 260 | 10 | } elseif (is_object($data)) { |
|
| 261 | 3 | $element->setAttribute('type', 'xsd:object'); |
|
| 262 | 3 | $element->appendChild($document->createCDATASection(json_serialize($data))); |
|
| 263 | 10 | } elseif (preg_match('/[<>&\'"]/', $data) > 0) { |
|
| 264 | 4 | $element->appendChild($document->createCDATASection($data)); |
|
| 265 | } else { |
||
| 266 | 10 | $element->appendChild($document->createTextNode($data)); |
|
| 267 | } |
||
| 268 | 14 | $parent->appendChild($element); |
|
| 269 | } |
||
| 270 | |||
| 271 | /** |
||
| 272 | * Deserialize the XML document elements into strict PHP values |
||
| 273 | * in regard to the XSD type defined in the XML element (if any). |
||
| 274 | * |
||
| 275 | * [IMPORTANT]: When deserializing an XML document into values, |
||
| 276 | * if the XmlSerializer encounters an XML element that specifies xsi:nil="true", |
||
| 277 | * it assigns a NULL to the corresponding element and ignores any other attributes |
||
| 278 | * |
||
| 279 | * @param array|string $value |
||
| 280 | * @return mixed array|string|null |
||
| 281 | * @throws \Exception |
||
| 282 | */ |
||
| 283 | 9 | private function getValueByType(mixed $value): mixed |
|
| 284 | { |
||
| 285 | 9 | if (false === is_array($value)) { |
|
| 286 | 5 | return $value; |
|
| 287 | } |
||
| 288 | /* |
||
| 289 | * [NOTE] if "xsi:nil" is NOT 'true', ignore the xsi:nil |
||
| 290 | * and process the rest of the attributes for this element |
||
| 291 | */ |
||
| 292 | 9 | if (isset($value['@xsi:nil']) && $value['@xsi:nil'] == 'true') { |
|
| 293 | 2 | unset($value['@xsi:nil']); |
|
| 294 | 2 | return null; |
|
| 295 | } |
||
| 296 | 9 | if (!(isset($value['@type']) && str_starts_with($value['@type'] ?? '', 'xsd:'))) { |
|
| 297 | 8 | return $value; |
|
| 298 | } |
||
| 299 | 4 | $value[$this->val] = match ($value['@type']) { |
|
| 300 | 4 | 'xsd:integer' => (int)$value[$this->val], |
|
| 301 | 4 | 'xsd:boolean' => filter_var($value[$this->val], FILTER_VALIDATE_BOOL), |
|
| 302 | 4 | 'xsd:float' => (float)$value[$this->val], |
|
| 303 | 4 | 'xsd:dateTime' => new DateTimeImmutable($value[$this->val]), |
|
| 304 | 4 | 'xsd:object' => json_unserialize($value[$this->val]), |
|
| 305 | 4 | }; |
|
| 306 | 4 | unset($value['@type']); |
|
| 307 | 4 | if (count($value) > 1) { |
|
| 308 | 1 | return $value; |
|
| 309 | } |
||
| 310 | 3 | return $value[$this->val]; |
|
| 311 | } |
||
| 312 | |||
| 313 | 6 | private function hasValidName(int|string $key): bool |
|
| 314 | { |
||
| 315 | 6 | return $key && |
|
| 316 | 6 | !str_contains($key, ' ') && |
|
| 317 | 6 | preg_match('~^[\pL_][\pL0-9._:-]*$~ui', $key); |
|
| 318 | } |
||
| 319 | |||
| 320 | 9 | private function extractValuesFromChildNodes(DOMNode $node, array &$value): void |
|
| 321 | { |
||
| 322 | 9 | foreach ($node->childNodes as $child) { |
|
| 323 | 9 | if ($child->nodeType === XML_COMMENT_NODE) { |
|
| 324 | 4 | continue; |
|
| 325 | } |
||
| 326 | 8 | $v = $this->parseXml($child); |
|
| 327 | 8 | if ('item' === $child->nodeName && isset($v['@key'])) { |
|
| 328 | 4 | $k = $v['@key']; |
|
| 329 | 4 | $value[$k] = $this->getValueByType($v); |
|
| 330 | 4 | unset($value[$k]['@key']); |
|
| 331 | } else { |
||
| 332 | 7 | $value[$child->nodeName][] = $this->getValueByType($v); |
|
| 333 | } |
||
| 334 | } |
||
| 335 | } |
||
| 336 | |||
| 337 | 3 | public function logUnserializeError(string $method, |
|
| 338 | string $message, |
||
| 339 | string $xml): void |
||
| 340 | { |
||
| 341 | 3 | $parser = xml_parser_create(); |
|
| 342 | 3 | xml_parse_into_struct($parser, $xml, $values); |
|
| 343 | 3 | $last = end($values); |
|
| 344 | 3 | unset($last['type'], $last['level']); |
|
| 345 | 3 | error_log($method, |
|
| 346 | 3 | str_replace('DOMDocument::loadXML(): ', '', $message), |
|
| 347 | 3 | 'hint: ' . json_serialize($last ?: ['<XML>' => $xml]) |
|
| 348 | 3 | ); |
|
| 349 | } |
||
| 350 | } |
||
| 351 |