simplesamlphp /
xml-common
| 1 | <?php |
||||
| 2 | |||||
| 3 | declare(strict_types=1); |
||||
| 4 | |||||
| 5 | namespace SimpleSAML\XPath; |
||||
| 6 | |||||
| 7 | use DOMDocument; |
||||
| 8 | use DOMElement; |
||||
| 9 | use DOMNode; |
||||
| 10 | use DOMXPath; |
||||
| 11 | use RuntimeException; |
||||
| 12 | use SimpleSAML\XML\Assert\Assert; |
||||
| 13 | use SimpleSAML\XML\Constants as C_XML; |
||||
| 14 | use SimpleSAML\XMLSchema\Constants as C_XS; |
||||
| 15 | |||||
| 16 | /** |
||||
| 17 | * XPath helper functions for the XML library. |
||||
| 18 | * |
||||
| 19 | * @package simplesamlphp/xml-common |
||||
| 20 | */ |
||||
| 21 | class XPath |
||||
| 22 | { |
||||
| 23 | /** |
||||
| 24 | * Search for an element with a certain name among the children of a reference element. |
||||
| 25 | * |
||||
| 26 | * @param \DOMNode $ref The DOMDocument or DOMElement where encrypted data is expected to be found as a child. |
||||
| 27 | * @param string $name The name (possibly prefixed) of the element we are looking for. |
||||
| 28 | * |
||||
| 29 | * @return \DOMElement|false The element we are looking for, or false when not found. |
||||
| 30 | * |
||||
| 31 | * @throws \RuntimeException If no DOM document is available. |
||||
| 32 | */ |
||||
| 33 | public static function findElement(DOMNode $ref, string $name): DOMElement|false |
||||
| 34 | { |
||||
| 35 | $doc = $ref instanceof DOMDocument ? $ref : $ref->ownerDocument; |
||||
| 36 | if ($doc === null) { |
||||
| 37 | throw new RuntimeException('Cannot search, no DOMDocument available'); |
||||
| 38 | } |
||||
| 39 | |||||
| 40 | $nodeset = self::getXPath($doc)->query('./' . $name, $ref); |
||||
| 41 | |||||
| 42 | return $nodeset->item(0) ?? false; |
||||
|
0 ignored issues
–
show
Bug
Best Practice
introduced
by
Loading history...
|
|||||
| 43 | } |
||||
| 44 | |||||
| 45 | |||||
| 46 | /** |
||||
| 47 | * Get an instance of DOMXPath associated with a DOMNode |
||||
| 48 | * |
||||
| 49 | * - Reuses a cached DOMXPath per document. |
||||
| 50 | * - Registers core XML-related namespaces: 'xml' and 'xs'. |
||||
| 51 | * - Enriches the XPath with all prefixed xmlns declarations found on the |
||||
| 52 | * current node and its ancestors (up to the document element), so |
||||
| 53 | * custom prefixes declared anywhere up the tree can be used in queries. |
||||
| 54 | * |
||||
| 55 | * @param \DOMNode $node The associated node |
||||
| 56 | * @param bool $autoregister Whether to auto-register all namespaces used in the document |
||||
| 57 | * @return \DOMXPath |
||||
| 58 | */ |
||||
| 59 | public static function getXPath(DOMNode $node, bool $autoregister = false): DOMXPath |
||||
| 60 | { |
||||
| 61 | static $xpCache = null; |
||||
| 62 | |||||
| 63 | if ($node instanceof DOMDocument) { |
||||
| 64 | $doc = $node; |
||||
| 65 | } else { |
||||
| 66 | $doc = $node->ownerDocument; |
||||
| 67 | Assert::notNull($doc); |
||||
| 68 | } |
||||
| 69 | |||||
| 70 | if ($xpCache === null || !$xpCache->document->isSameNode($doc)) { |
||||
| 71 | $xpCache = new DOMXPath($doc); |
||||
|
0 ignored issues
–
show
It seems like
$doc can also be of type null; however, parameter $document of DOMXPath::__construct() does only seem to accept DOMDocument, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 72 | } |
||||
| 73 | |||||
| 74 | $xpCache->registerNamespace('xml', C_XML::NS_XML); |
||||
| 75 | $xpCache->registerNamespace('xs', C_XS::NS_XS); |
||||
| 76 | |||||
| 77 | // Enrich with ancestor-declared prefixes for this document context. |
||||
| 78 | $prefixToUri = self::registerAncestorNamespaces($xpCache, $node); |
||||
| 79 | |||||
| 80 | if ($autoregister) { |
||||
| 81 | // Single, bounded subtree scan to pick up descendant-only declarations. |
||||
| 82 | self::registerSubtreePrefixes($xpCache, $node, $prefixToUri); |
||||
| 83 | } |
||||
| 84 | |||||
| 85 | return $xpCache; |
||||
| 86 | } |
||||
| 87 | |||||
| 88 | |||||
| 89 | /** |
||||
| 90 | * Walk from the given node up to the document element, registering all prefixed xmlns declarations. |
||||
| 91 | * |
||||
| 92 | * Safety: |
||||
| 93 | * - Only attributes in the XMLNS namespace (http://www.w3.org/2000/xmlns/). |
||||
| 94 | * - Skip default xmlns (localName === 'xmlns') because XPath requires prefixes. |
||||
| 95 | * - Skip empty URIs. |
||||
| 96 | * - Do not override core 'xml' and 'xs' prefixes (already bound). |
||||
| 97 | * - Nearest binding wins during this pass (prefixes are added once). |
||||
| 98 | * |
||||
| 99 | * @param \DOMXPath $xp |
||||
| 100 | * @param \DOMNode $node |
||||
| 101 | * @return array<string,string> Map of prefix => namespace URI that are bound after this pass |
||||
| 102 | */ |
||||
| 103 | private static function registerAncestorNamespaces(DOMXPath $xp, DOMNode $node): array |
||||
| 104 | { |
||||
| 105 | // Track prefix => uri to feed into subtree scan. Seed with core bindings. |
||||
| 106 | $prefixToUri = [ |
||||
| 107 | 'xml' => C_XML::NS_XML, |
||||
| 108 | 'xs' => C_XS::NS_XS, |
||||
| 109 | ]; |
||||
| 110 | |||||
| 111 | // Start from the nearest element (or documentElement if a DOMDocument is passed). |
||||
| 112 | $current = $node instanceof DOMDocument |
||||
| 113 | ? $node->documentElement |
||||
| 114 | : ($node instanceof DOMElement ? $node : $node->parentNode); |
||||
| 115 | |||||
| 116 | $steps = 0; |
||||
| 117 | |||||
| 118 | while ($current instanceof DOMElement) { |
||||
| 119 | if (++$steps > C_XML::UNBOUNDED_LIMIT) { |
||||
| 120 | throw new RuntimeException(__METHOD__ . ': exceeded ancestor traversal limit'); |
||||
| 121 | } |
||||
| 122 | |||||
| 123 | if ($current->hasAttributes()) { |
||||
| 124 | foreach ($current->attributes as $attr) { |
||||
| 125 | if ($attr->namespaceURI !== C_XML::NS_XMLNS) { |
||||
| 126 | continue; |
||||
| 127 | } |
||||
| 128 | $prefix = $attr->localName; |
||||
| 129 | $uri = (string) $attr->nodeValue; |
||||
| 130 | |||||
| 131 | if ( |
||||
| 132 | $prefix === null || $prefix === '' || |
||||
| 133 | $prefix === 'xmlns' || $uri === '' || |
||||
| 134 | isset($prefixToUri[$prefix]) |
||||
| 135 | ) { |
||||
| 136 | continue; |
||||
| 137 | } |
||||
| 138 | |||||
| 139 | $xp->registerNamespace($prefix, $uri); |
||||
| 140 | $prefixToUri[$prefix] = $uri; |
||||
| 141 | } |
||||
| 142 | } |
||||
| 143 | |||||
| 144 | $current = $current->parentNode; |
||||
| 145 | } |
||||
| 146 | |||||
| 147 | return $prefixToUri; |
||||
| 148 | } |
||||
| 149 | |||||
| 150 | |||||
| 151 | /** |
||||
| 152 | * Single-pass subtree scan from the context element to bind prefixes used only on descendants. |
||||
| 153 | * - Never rebind an already-registered prefix (collision-safe). |
||||
| 154 | * - Skips 'xmlns' and empty URIs. |
||||
| 155 | * - Bounded by UNBOUNDED_LIMIT. |
||||
| 156 | * |
||||
| 157 | * @param \DOMXPath $xp |
||||
| 158 | * @param \DOMNode $node |
||||
| 159 | * @param array<string,string> $prefixToUri |
||||
| 160 | */ |
||||
| 161 | private static function registerSubtreePrefixes(DOMXPath $xp, DOMNode $node, array $prefixToUri): void |
||||
| 162 | { |
||||
| 163 | $root = $node instanceof DOMDocument |
||||
| 164 | ? $node->documentElement |
||||
| 165 | : ($node instanceof DOMElement ? $node : $node->parentNode); |
||||
| 166 | |||||
| 167 | if (!$root instanceof DOMElement) { |
||||
| 168 | return; |
||||
| 169 | } |
||||
| 170 | |||||
| 171 | // $visited = 0; |
||||
| 172 | |||||
| 173 | /** @var array<array{0:\DOMElement,1:int}> $queue */ |
||||
| 174 | $queue = [[$root, 0]]; |
||||
| 175 | |||||
| 176 | while ($queue) { |
||||
|
0 ignored issues
–
show
The expression
$queue of type array<mixed,array> is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using Loading history...
|
|||||
| 177 | /** @var \DOMElement $el */ |
||||
| 178 | /** @var int $depth */ |
||||
| 179 | [$el, $depth] = array_shift($queue); |
||||
| 180 | |||||
| 181 | // Depth guard: cap traversal at UNBOUNDED_LIMIT (root = depth 0). |
||||
| 182 | // Breaking here halts further descent to avoid pathological depth and excessive work, |
||||
| 183 | // which is safer in production than risking runaway traversal or hard failures. |
||||
| 184 | // Trade-off: deeper descendant-only prefixes may remain unregistered, so some |
||||
| 185 | // prefixed XPath queries might fail; overall processing continues gracefully. |
||||
| 186 | if ($depth >= C_XML::UNBOUNDED_LIMIT) { |
||||
| 187 | break; |
||||
| 188 | } |
||||
| 189 | |||||
| 190 | // if (++$visited > C_XML::UNBOUNDED_LIMIT) { |
||||
| 191 | // // Safety valve: stop further traversal to avoid unbounded work and noisy exceptions. |
||||
| 192 | // // Returning here halts namespace registration for this subtree, which is safer in |
||||
| 193 | // // production than risking pathological O(n) behavior or a hard failure (e.g. throwing |
||||
| 194 | // // \RuntimeException(__METHOD__ . ': exceeded subtree traversal limit')). |
||||
| 195 | // // Trade-off: some descendant-only prefixes may remain unregistered, so related XPath |
||||
| 196 | // // queries might fail, but overall processing continues gracefully. |
||||
| 197 | // break; |
||||
| 198 | // } |
||||
| 199 | |||||
| 200 | // Element prefix |
||||
| 201 | if ($el->prefix && !isset($prefixToUri[$el->prefix])) { |
||||
| 202 | $uri = $el->namespaceURI; |
||||
| 203 | if (is_string($uri) && $uri !== '') { |
||||
| 204 | $xp->registerNamespace($el->prefix, $uri); |
||||
| 205 | $prefixToUri[$el->prefix] = $uri; |
||||
| 206 | } |
||||
| 207 | } |
||||
| 208 | |||||
| 209 | // Attribute prefixes (excluding xmlns) |
||||
| 210 | if ($el->hasAttributes()) { |
||||
| 211 | foreach ($el->attributes as $attr) { |
||||
| 212 | if ( |
||||
| 213 | $attr->prefix && |
||||
| 214 | $attr->prefix !== 'xmlns' && |
||||
| 215 | !isset($prefixToUri[$attr->prefix]) |
||||
| 216 | ) { |
||||
| 217 | $uri = $attr->namespaceURI; |
||||
| 218 | if (is_string($uri) && $uri !== '') { |
||||
| 219 | $xp->registerNamespace($attr->prefix, $uri); |
||||
| 220 | $prefixToUri[$attr->prefix] = $uri; |
||||
| 221 | } |
||||
| 222 | } else { |
||||
| 223 | // Optional: collision detection (same prefix, different URI) |
||||
| 224 | // if ($prefixToUri[$pfx] !== $attr->namespaceURI) { |
||||
| 225 | // // Default: skip rebind; could log a debug message here. |
||||
| 226 | // } |
||||
| 227 | } |
||||
| 228 | } |
||||
| 229 | } |
||||
| 230 | |||||
| 231 | // Enqueue children (only DOMElement to keep types precise) |
||||
| 232 | foreach ($el->childNodes as $child) { |
||||
| 233 | if ($child instanceof DOMElement) { |
||||
| 234 | $queue[] = [$child, $depth + 1]; |
||||
| 235 | } |
||||
| 236 | } |
||||
| 237 | } |
||||
| 238 | } |
||||
| 239 | |||||
| 240 | |||||
| 241 | /** |
||||
| 242 | * Do an XPath query on an XML node. |
||||
| 243 | * |
||||
| 244 | * @param \DOMNode $node The XML node. |
||||
| 245 | * @param string $query The query. |
||||
| 246 | * @param \DOMXPath $xpCache The DOMXPath object |
||||
| 247 | * @return array<\DOMNode> Array with matching DOM nodes. |
||||
| 248 | */ |
||||
| 249 | public static function xpQuery(DOMNode $node, string $query, DOMXPath $xpCache): array |
||||
| 250 | { |
||||
| 251 | $ret = []; |
||||
| 252 | |||||
| 253 | $results = $xpCache->query($query, $node); |
||||
| 254 | Assert::notFalse($results, 'Malformed XPath query or invalid contextNode provided.'); |
||||
| 255 | |||||
| 256 | for ($i = 0; $i < $results->length; $i++) { |
||||
| 257 | $ret[$i] = $results->item($i); |
||||
| 258 | } |
||||
| 259 | |||||
| 260 | return $ret; |
||||
| 261 | } |
||||
| 262 | } |
||||
| 263 |