simplesamlphp /
xml-common
| 1 | <?php |
||||
| 2 | |||||
| 3 | declare(strict_types=1); |
||||
| 4 | |||||
| 5 | namespace SimpleSAML\XML; |
||||
| 6 | |||||
| 7 | use DOMDocument; |
||||
| 8 | use DOMElement; |
||||
| 9 | use SimpleSAML\XML\Assert\Assert; |
||||
| 10 | use SimpleSAML\XML\Exception\IOException; |
||||
| 11 | use SimpleSAML\XML\Exception\RuntimeException; |
||||
| 12 | use SimpleSAML\XML\Exception\UnparseableXMLException; |
||||
| 13 | use SimpleSAML\XPath\XPath; |
||||
| 14 | |||||
| 15 | use function file_get_contents; |
||||
| 16 | use function func_num_args; |
||||
| 17 | use function libxml_clear_errors; |
||||
| 18 | use function libxml_set_external_entity_loader; |
||||
| 19 | use function libxml_use_internal_errors; |
||||
| 20 | use function sprintf; |
||||
| 21 | use function strpos; |
||||
| 22 | |||||
| 23 | /** |
||||
| 24 | * @package simplesamlphp/xml-common |
||||
| 25 | */ |
||||
| 26 | final class DOMDocumentFactory |
||||
| 27 | { |
||||
| 28 | /** |
||||
| 29 | * @var non-negative-int |
||||
| 30 | * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available |
||||
| 31 | */ |
||||
| 32 | public const DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NONET | \LIBXML_NSCLEAN; |
||||
| 33 | |||||
| 34 | |||||
| 35 | /** |
||||
| 36 | * @param string $xml |
||||
| 37 | * @param non-negative-int $options |
||||
| 38 | * |
||||
| 39 | * @return \DOMDocument |
||||
| 40 | */ |
||||
| 41 | public static function fromString( |
||||
| 42 | string $xml, |
||||
| 43 | int $options = self::DEFAULT_OPTIONS, |
||||
| 44 | ): DOMDocument { |
||||
| 45 | libxml_set_external_entity_loader(null); |
||||
| 46 | Assert::notWhitespaceOnly($xml); |
||||
| 47 | Assert::notRegex( |
||||
| 48 | $xml, |
||||
| 49 | '/<(\s*)!(\s*)DOCTYPE/', |
||||
| 50 | 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', |
||||
| 51 | RuntimeException::class, |
||||
| 52 | ); |
||||
| 53 | |||||
| 54 | $internalErrors = libxml_use_internal_errors(true); |
||||
| 55 | libxml_clear_errors(); |
||||
| 56 | |||||
| 57 | // If LIBXML_NO_XXE is available and option not set |
||||
| 58 | if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) { |
||||
| 59 | $options |= \LIBXML_NO_XXE; |
||||
| 60 | } |
||||
| 61 | |||||
| 62 | $domDocument = self::create(); |
||||
| 63 | $loaded = $domDocument->loadXML($xml, $options); |
||||
| 64 | |||||
| 65 | libxml_use_internal_errors($internalErrors); |
||||
| 66 | |||||
| 67 | if (!$loaded) { |
||||
| 68 | $error = libxml_get_last_error(); |
||||
| 69 | libxml_clear_errors(); |
||||
| 70 | |||||
| 71 | throw new UnparseableXMLException($error); |
||||
| 72 | } |
||||
| 73 | |||||
| 74 | libxml_clear_errors(); |
||||
| 75 | |||||
| 76 | foreach ($domDocument->childNodes as $child) { |
||||
| 77 | Assert::false( |
||||
| 78 | $child->nodeType === \XML_DOCUMENT_TYPE_NODE, |
||||
| 79 | 'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body', |
||||
| 80 | RuntimeException::class, |
||||
| 81 | ); |
||||
| 82 | } |
||||
| 83 | |||||
| 84 | return $domDocument; |
||||
| 85 | } |
||||
| 86 | |||||
| 87 | |||||
| 88 | /** |
||||
| 89 | * @param string $file |
||||
| 90 | * @param non-negative-int $options |
||||
| 91 | * |
||||
| 92 | * @return \DOMDocument |
||||
| 93 | */ |
||||
| 94 | public static function fromFile( |
||||
| 95 | string $file, |
||||
| 96 | int $options = self::DEFAULT_OPTIONS, |
||||
| 97 | ): DOMDocument { |
||||
| 98 | error_clear_last(); |
||||
| 99 | $xml = @file_get_contents($file); |
||||
| 100 | if ($xml === false) { |
||||
| 101 | $e = error_get_last(); |
||||
| 102 | $error = $e['message'] ?? "Check that the file exists and can be read."; |
||||
| 103 | |||||
| 104 | throw new IOException("File '$file' was not loaded; $error"); |
||||
| 105 | } |
||||
| 106 | |||||
| 107 | Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class); |
||||
| 108 | return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options); |
||||
| 109 | } |
||||
| 110 | |||||
| 111 | |||||
| 112 | /** |
||||
| 113 | * @param string $version |
||||
| 114 | * @param string $encoding |
||||
| 115 | * @return \DOMDocument |
||||
| 116 | */ |
||||
| 117 | public static function create(string $version = '1.0', string $encoding = 'UTF-8'): DOMDocument |
||||
| 118 | { |
||||
| 119 | return new DOMDocument($version, $encoding); |
||||
| 120 | } |
||||
| 121 | |||||
| 122 | |||||
| 123 | /** |
||||
| 124 | * @param \DOMDocument $doc |
||||
| 125 | * @return \DOMDocument |
||||
| 126 | */ |
||||
| 127 | public static function normalizeDocument(DOMDocument $doc): DOMDocument |
||||
| 128 | { |
||||
| 129 | // Get the root element |
||||
| 130 | $root = $doc->documentElement; |
||||
| 131 | |||||
| 132 | // Collect all xmlns attributes from the document |
||||
| 133 | $xpath = XPath::getXPath($doc); |
||||
| 134 | $xmlnsAttributes = []; |
||||
| 135 | |||||
| 136 | // Register all namespaces to ensure XPath can handle them |
||||
| 137 | foreach ($xpath->query('//namespace::*') as $node) { |
||||
| 138 | $name = $node->nodeName === 'xmlns' ? 'xmlns' : $node->nodeName; |
||||
| 139 | if ($name !== 'xmlns:xml') { |
||||
| 140 | $xmlnsAttributes[$name] = $node->nodeValue; |
||||
| 141 | } |
||||
| 142 | } |
||||
| 143 | |||||
| 144 | // If no xmlns attributes found, return early with debug info |
||||
| 145 | if (empty($xmlnsAttributes)) { |
||||
| 146 | return $root->ownerDocument; |
||||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||||
| 147 | } |
||||
| 148 | |||||
| 149 | // Remove xmlns attributes from all elements |
||||
| 150 | $nodes = $xpath->query('//*[namespace::*]'); |
||||
| 151 | foreach ($nodes as $node) { |
||||
| 152 | if ($node instanceof DOMElement) { |
||||
| 153 | $attributesToRemove = []; |
||||
| 154 | foreach ($node->attributes as $attr) { |
||||
| 155 | if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') { |
||||
| 156 | $attributesToRemove[] = $attr->nodeName; |
||||
| 157 | } |
||||
| 158 | } |
||||
| 159 | foreach ($attributesToRemove as $attrName) { |
||||
| 160 | $node->removeAttribute($attrName); |
||||
| 161 | } |
||||
| 162 | } |
||||
| 163 | } |
||||
| 164 | |||||
| 165 | // Add all collected xmlns attributes to the root element |
||||
| 166 | foreach ($xmlnsAttributes as $name => $value) { |
||||
| 167 | $root->setAttribute($name, $value); |
||||
| 168 | } |
||||
| 169 | |||||
| 170 | // Return the normalized XML |
||||
| 171 | return static::fromString($root->ownerDocument->saveXML()); |
||||
|
0 ignored issues
–
show
The method
saveXML() does not exist on null.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces. This is most likely a typographical error or the method has been renamed. Loading history...
|
|||||
| 172 | } |
||||
| 173 | |||||
| 174 | |||||
| 175 | /** |
||||
| 176 | * @param \DOMElement $elt |
||||
| 177 | * @param string $prefix |
||||
| 178 | * @return string|null |
||||
| 179 | */ |
||||
| 180 | public static function lookupNamespaceURI(DOMElement $elt, string $prefix): ?string |
||||
| 181 | { |
||||
| 182 | // Get the root element |
||||
| 183 | $root = $elt->ownerDocument->documentElement; |
||||
|
0 ignored issues
–
show
|
|||||
| 184 | |||||
| 185 | // Collect all xmlns attributes from the document |
||||
| 186 | $xpath = XPath::getXPath($elt->ownerDocument); |
||||
|
0 ignored issues
–
show
It seems like
$elt->ownerDocument can also be of type null; however, parameter $node of SimpleSAML\XPath\XPath::getXPath() does only seem to accept DOMNode, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 187 | |||||
| 188 | // Register all namespaces to ensure XPath can handle them |
||||
| 189 | $xmlnsAttributes = []; |
||||
| 190 | foreach ($xpath->query('//namespace::*') as $node) { |
||||
| 191 | $xmlnsAttributes[$node->localName] = $node->nodeValue; |
||||
| 192 | } |
||||
| 193 | |||||
| 194 | if (array_key_exists($prefix, $xmlnsAttributes)) { |
||||
| 195 | return $xmlnsAttributes[$prefix]; |
||||
| 196 | } |
||||
| 197 | |||||
| 198 | return null; |
||||
| 199 | } |
||||
| 200 | } |
||||
| 201 |