simplesamlphp /
xml-common
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace SimpleSAML\XML; |
||
| 6 | |||
| 7 | use DOMDocument; |
||
| 8 | use DOMElement; |
||
| 9 | use SimpleSAML\XML\Assert\Assert; |
||
| 10 | use SimpleSAML\XML\Exception\IOException; |
||
| 11 | use SimpleSAML\XML\Exception\RuntimeException; |
||
| 12 | use SimpleSAML\XML\Exception\UnparseableXMLException; |
||
| 13 | use SimpleSAML\XPath\XPath; |
||
| 14 | |||
| 15 | use function file_get_contents; |
||
| 16 | use function func_num_args; |
||
| 17 | use function libxml_clear_errors; |
||
| 18 | use function libxml_set_external_entity_loader; |
||
| 19 | use function libxml_use_internal_errors; |
||
| 20 | use function sprintf; |
||
| 21 | use function strpos; |
||
| 22 | |||
| 23 | /** |
||
| 24 | * @package simplesamlphp/xml-common |
||
| 25 | */ |
||
| 26 | final class DOMDocumentFactory |
||
| 27 | { |
||
| 28 | /** |
||
| 29 | * @var non-negative-int |
||
| 30 | * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available |
||
| 31 | */ |
||
| 32 | public const int DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NONET | \LIBXML_NSCLEAN; |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 33 | |||
| 34 | |||
| 35 | /** |
||
| 36 | * @param string $xml |
||
| 37 | * @param non-negative-int $options |
||
| 38 | */ |
||
| 39 | public static function fromString( |
||
| 40 | string $xml, |
||
| 41 | int $options = self::DEFAULT_OPTIONS, |
||
| 42 | ): DOMDocument { |
||
| 43 | libxml_set_external_entity_loader(null); |
||
| 44 | Assert::notWhitespaceOnly($xml); |
||
| 45 | Assert::notRegex( |
||
| 46 | $xml, |
||
| 47 | '/<(\s*)!(\s*)DOCTYPE/', |
||
| 48 | 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', |
||
| 49 | RuntimeException::class, |
||
| 50 | ); |
||
| 51 | |||
| 52 | $internalErrors = libxml_use_internal_errors(true); |
||
| 53 | libxml_clear_errors(); |
||
| 54 | |||
| 55 | // If LIBXML_NO_XXE is available and option not set |
||
| 56 | if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) { |
||
| 57 | $options |= \LIBXML_NO_XXE; |
||
| 58 | } |
||
| 59 | |||
| 60 | $domDocument = self::create(); |
||
| 61 | $loaded = $domDocument->loadXML($xml, $options); |
||
| 62 | |||
| 63 | libxml_use_internal_errors($internalErrors); |
||
| 64 | |||
| 65 | if (!$loaded) { |
||
| 66 | $error = libxml_get_last_error(); |
||
| 67 | libxml_clear_errors(); |
||
| 68 | |||
| 69 | throw new UnparseableXMLException($error); |
||
| 70 | } |
||
| 71 | |||
| 72 | libxml_clear_errors(); |
||
| 73 | |||
| 74 | foreach ($domDocument->childNodes as $child) { |
||
| 75 | Assert::false( |
||
| 76 | $child->nodeType === \XML_DOCUMENT_TYPE_NODE, |
||
| 77 | 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', |
||
| 78 | RuntimeException::class, |
||
| 79 | ); |
||
| 80 | } |
||
| 81 | |||
| 82 | return $domDocument; |
||
| 83 | } |
||
| 84 | |||
| 85 | |||
| 86 | /** |
||
| 87 | * @param string $file |
||
| 88 | * @param non-negative-int $options |
||
| 89 | */ |
||
| 90 | public static function fromFile( |
||
| 91 | string $file, |
||
| 92 | int $options = self::DEFAULT_OPTIONS, |
||
| 93 | ): DOMDocument { |
||
| 94 | error_clear_last(); |
||
| 95 | $xml = @file_get_contents($file); |
||
| 96 | if ($xml === false) { |
||
| 97 | $e = error_get_last(); |
||
| 98 | $error = $e['message'] ?? "Check that the file exists and can be read."; |
||
| 99 | |||
| 100 | throw new IOException("File '$file' was not loaded; $error"); |
||
| 101 | } |
||
| 102 | |||
| 103 | Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class); |
||
| 104 | return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options); |
||
| 105 | } |
||
| 106 | |||
| 107 | |||
| 108 | /** |
||
| 109 | * @param string $version |
||
| 110 | * @param string $encoding |
||
| 111 | */ |
||
| 112 | public static function create(string $version = '1.0', string $encoding = 'UTF-8'): DOMDocument |
||
| 113 | { |
||
| 114 | return new DOMDocument($version, $encoding); |
||
| 115 | } |
||
| 116 | |||
| 117 | |||
| 118 | /** |
||
| 119 | * @param \DOMDocument $doc |
||
| 120 | */ |
||
| 121 | public static function normalizeDocument(DOMDocument $doc): DOMDocument |
||
| 122 | { |
||
| 123 | // Get the root element |
||
| 124 | $root = $doc->documentElement; |
||
| 125 | |||
| 126 | // Collect all xmlns attributes from the document |
||
| 127 | $xpath = XPath::getXPath($doc); |
||
| 128 | $xmlnsAttributes = []; |
||
| 129 | |||
| 130 | // Register all namespaces to ensure XPath can handle them |
||
| 131 | foreach ($xpath->query('//namespace::*') as $node) { |
||
| 132 | $name = $node->nodeName === 'xmlns' ? 'xmlns' : $node->nodeName; |
||
| 133 | if ($name !== 'xmlns:xml') { |
||
| 134 | $xmlnsAttributes[$name] = $node->nodeValue; |
||
| 135 | } |
||
| 136 | } |
||
| 137 | |||
| 138 | // If no xmlns attributes found, return early with debug info |
||
| 139 | if (empty($xmlnsAttributes)) { |
||
| 140 | return $root->ownerDocument; |
||
| 141 | } |
||
| 142 | |||
| 143 | // Remove xmlns attributes from all elements |
||
| 144 | $nodes = $xpath->query('//*[namespace::*]'); |
||
| 145 | foreach ($nodes as $node) { |
||
| 146 | if ($node instanceof DOMElement) { |
||
| 147 | $attributesToRemove = []; |
||
| 148 | foreach ($node->attributes as $attr) { |
||
| 149 | if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') { |
||
| 150 | $attributesToRemove[] = $attr->nodeName; |
||
| 151 | } |
||
| 152 | } |
||
| 153 | foreach ($attributesToRemove as $attrName) { |
||
| 154 | $node->removeAttribute($attrName); |
||
| 155 | } |
||
| 156 | } |
||
| 157 | } |
||
| 158 | |||
| 159 | // Add all collected xmlns attributes to the root element |
||
| 160 | foreach ($xmlnsAttributes as $name => $value) { |
||
| 161 | $root->setAttribute($name, $value); |
||
| 162 | } |
||
| 163 | |||
| 164 | // Return the normalized XML |
||
| 165 | return static::fromString($root->ownerDocument->saveXML()); |
||
| 166 | } |
||
| 167 | |||
| 168 | |||
| 169 | /** |
||
| 170 | * @param \DOMElement $elt |
||
| 171 | * @param string $prefix |
||
| 172 | */ |
||
| 173 | public static function lookupNamespaceURI(DOMElement $elt, string $prefix): ?string |
||
| 174 | { |
||
| 175 | // Collect all xmlns attributes from the document |
||
| 176 | $xpath = XPath::getXPath($elt->ownerDocument); |
||
| 177 | |||
| 178 | // Register all namespaces to ensure XPath can handle them |
||
| 179 | $xmlnsAttributes = []; |
||
| 180 | foreach ($xpath->query('//namespace::*') as $node) { |
||
| 181 | $xmlnsAttributes[$node->localName] = $node->nodeValue; |
||
| 182 | } |
||
| 183 | |||
| 184 | if (array_key_exists($prefix, $xmlnsAttributes)) { |
||
| 185 | return $xmlnsAttributes[$prefix]; |
||
| 186 | } |
||
| 187 | |||
| 188 | return null; |
||
| 189 | } |
||
| 190 | } |
||
| 191 |