|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace SimpleSAML\XPath; |
|
6
|
|
|
|
|
7
|
|
|
use DOMDocument; |
|
8
|
|
|
use DOMElement; |
|
9
|
|
|
use DOMNode; |
|
10
|
|
|
use DOMXPath; |
|
11
|
|
|
use RuntimeException; |
|
12
|
|
|
use SimpleSAML\XML\Assert\Assert; |
|
13
|
|
|
use SimpleSAML\XML\Constants as C_XML; |
|
14
|
|
|
use SimpleSAML\XMLSchema\Constants as C_XS; |
|
15
|
|
|
|
|
16
|
|
|
/** |
|
17
|
|
|
* XPath helper functions for the XML library. |
|
18
|
|
|
* |
|
19
|
|
|
* @package simplesamlphp/xml-common |
|
20
|
|
|
*/ |
|
21
|
|
|
class XPath |
|
22
|
|
|
{ |
|
23
|
|
|
/** |
|
24
|
|
|
* Search for an element with a certain name among the children of a reference element. |
|
25
|
|
|
* |
|
26
|
|
|
* @param \DOMNode $ref The DOMDocument or DOMElement where encrypted data is expected to be found as a child. |
|
27
|
|
|
* @param string $name The name (possibly prefixed) of the element we are looking for. |
|
28
|
|
|
* |
|
29
|
|
|
* @return \DOMElement|false The element we are looking for, or false when not found. |
|
30
|
|
|
* |
|
31
|
|
|
* @throws \RuntimeException If no DOM document is available. |
|
32
|
|
|
*/ |
|
33
|
|
|
public static function findElement(DOMNode $ref, string $name): DOMElement|false |
|
34
|
|
|
{ |
|
35
|
|
|
$doc = $ref instanceof DOMDocument ? $ref : $ref->ownerDocument; |
|
36
|
|
|
if ($doc === null) { |
|
37
|
|
|
throw new RuntimeException('Cannot search, no DOMDocument available'); |
|
38
|
|
|
} |
|
39
|
|
|
|
|
40
|
|
|
$nodeset = self::getXPath($doc)->query('./' . $name, $ref); |
|
41
|
|
|
|
|
42
|
|
|
return $nodeset->item(0) ?? false; |
|
|
|
|
|
|
43
|
|
|
} |
|
44
|
|
|
|
|
45
|
|
|
|
|
46
|
|
|
/** |
|
47
|
|
|
* Get an instance of DOMXPath associated with a DOMNode |
|
48
|
|
|
* |
|
49
|
|
|
* - Reuses a cached DOMXPath per document. |
|
50
|
|
|
* - Registers core XML-related namespaces: 'xml' and 'xs'. |
|
51
|
|
|
* - Enriches the XPath with all prefixed xmlns declarations found on the |
|
52
|
|
|
* current node and its ancestors (up to the document element), so |
|
53
|
|
|
* custom prefixes declared anywhere up the tree can be used in queries. |
|
54
|
|
|
* |
|
55
|
|
|
* @param \DOMNode $node The associated node |
|
56
|
|
|
* @param bool $autoregister Whether to auto-register all namespaces used in the document |
|
57
|
|
|
* @return \DOMXPath |
|
58
|
|
|
*/ |
|
59
|
|
|
public static function getXPath(DOMNode $node, bool $autoregister = false): DOMXPath |
|
60
|
|
|
{ |
|
61
|
|
|
static $xpCache = null; |
|
62
|
|
|
|
|
63
|
|
|
if ($node instanceof DOMDocument) { |
|
64
|
|
|
$doc = $node; |
|
65
|
|
|
} else { |
|
66
|
|
|
$doc = $node->ownerDocument; |
|
67
|
|
|
Assert::notNull($doc); |
|
68
|
|
|
} |
|
69
|
|
|
|
|
70
|
|
|
if ($xpCache === null || !$xpCache->document->isSameNode($doc)) { |
|
71
|
|
|
$xpCache = new DOMXPath($doc); |
|
|
|
|
|
|
72
|
|
|
} |
|
73
|
|
|
|
|
74
|
|
|
$xpCache->registerNamespace('xml', C_XML::NS_XML); |
|
75
|
|
|
$xpCache->registerNamespace('xs', C_XS::NS_XS); |
|
76
|
|
|
|
|
77
|
|
|
// Enrich with ancestor-declared prefixes for this document context. |
|
78
|
|
|
$prefixToUri = self::registerAncestorNamespaces($xpCache, $node); |
|
79
|
|
|
|
|
80
|
|
|
if ($autoregister) { |
|
81
|
|
|
// Single, bounded subtree scan to pick up descendant-only declarations. |
|
82
|
|
|
self::registerSubtreePrefixes($xpCache, $node, $prefixToUri); |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
return $xpCache; |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
|
|
89
|
|
|
/** |
|
90
|
|
|
* Walk from the given node up to the document element, registering all prefixed xmlns declarations. |
|
91
|
|
|
* |
|
92
|
|
|
* Safety: |
|
93
|
|
|
* - Only attributes in the XMLNS namespace (http://www.w3.org/2000/xmlns/). |
|
94
|
|
|
* - Skip default xmlns (localName === 'xmlns') because XPath requires prefixes. |
|
95
|
|
|
* - Skip empty URIs. |
|
96
|
|
|
* - Do not override core 'xml' and 'xs' prefixes (already bound). |
|
97
|
|
|
* - Nearest binding wins during this pass (prefixes are added once). |
|
98
|
|
|
* |
|
99
|
|
|
* @param \DOMXPath $xp |
|
100
|
|
|
* @param \DOMNode $node |
|
101
|
|
|
* @return array<string,string> Map of prefix => namespace URI that are bound after this pass |
|
102
|
|
|
*/ |
|
103
|
|
|
private static function registerAncestorNamespaces(DOMXPath $xp, DOMNode $node): array |
|
104
|
|
|
{ |
|
105
|
|
|
// Track prefix => uri to feed into subtree scan. Seed with core bindings. |
|
106
|
|
|
$prefixToUri = [ |
|
107
|
|
|
'xml' => C_XML::NS_XML, |
|
108
|
|
|
'xs' => C_XS::NS_XS, |
|
109
|
|
|
]; |
|
110
|
|
|
|
|
111
|
|
|
// Start from the nearest element (or documentElement if a DOMDocument is passed). |
|
112
|
|
|
$current = $node instanceof DOMDocument |
|
113
|
|
|
? $node->documentElement |
|
114
|
|
|
: ($node instanceof DOMElement ? $node : $node->parentNode); |
|
115
|
|
|
|
|
116
|
|
|
$steps = 0; |
|
117
|
|
|
|
|
118
|
|
|
while ($current instanceof DOMElement) { |
|
119
|
|
|
if (++$steps > C_XML::UNBOUNDED_LIMIT) { |
|
120
|
|
|
throw new RuntimeException(__METHOD__ . ': exceeded ancestor traversal limit'); |
|
121
|
|
|
} |
|
122
|
|
|
|
|
123
|
|
|
if ($current->hasAttributes()) { |
|
124
|
|
|
foreach ($current->attributes as $attr) { |
|
125
|
|
|
if ($attr->namespaceURI !== C_XML::NS_XMLNS) { |
|
126
|
|
|
continue; |
|
127
|
|
|
} |
|
128
|
|
|
$prefix = $attr->localName; |
|
129
|
|
|
$uri = (string) $attr->nodeValue; |
|
130
|
|
|
|
|
131
|
|
|
if ( |
|
132
|
|
|
$prefix === null || $prefix === '' || |
|
133
|
|
|
$prefix === 'xmlns' || $uri === '' || |
|
134
|
|
|
isset($prefixToUri[$prefix]) |
|
135
|
|
|
) { |
|
136
|
|
|
continue; |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
$xp->registerNamespace($prefix, $uri); |
|
140
|
|
|
$prefixToUri[$prefix] = $uri; |
|
141
|
|
|
} |
|
142
|
|
|
} |
|
143
|
|
|
|
|
144
|
|
|
$current = $current->parentNode; |
|
145
|
|
|
} |
|
146
|
|
|
|
|
147
|
|
|
return $prefixToUri; |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
|
|
151
|
|
|
/** |
|
152
|
|
|
* Single-pass subtree scan from the context element to bind prefixes used only on descendants. |
|
153
|
|
|
* - Never rebind an already-registered prefix (collision-safe). |
|
154
|
|
|
* - Skips 'xmlns' and empty URIs. |
|
155
|
|
|
* - Bounded by UNBOUNDED_LIMIT. |
|
156
|
|
|
* |
|
157
|
|
|
* @param \DOMXPath $xp |
|
158
|
|
|
* @param \DOMNode $node |
|
159
|
|
|
* @param array<string,string> $prefixToUri |
|
160
|
|
|
*/ |
|
161
|
|
|
private static function registerSubtreePrefixes(DOMXPath $xp, DOMNode $node, array $prefixToUri): void |
|
162
|
|
|
{ |
|
163
|
|
|
$root = $node instanceof DOMDocument |
|
164
|
|
|
? $node->documentElement |
|
165
|
|
|
: ($node instanceof DOMElement ? $node : $node->parentNode); |
|
166
|
|
|
|
|
167
|
|
|
if (!$root instanceof DOMElement) { |
|
168
|
|
|
return; |
|
169
|
|
|
} |
|
170
|
|
|
|
|
171
|
|
|
// $visited = 0; |
|
172
|
|
|
|
|
173
|
|
|
/** @var array<array{0:\DOMElement,1:int}> $queue */ |
|
174
|
|
|
$queue = [[$root, 0]]; |
|
175
|
|
|
|
|
176
|
|
|
while ($queue) { |
|
|
|
|
|
|
177
|
|
|
/** @var \DOMElement $el */ |
|
178
|
|
|
/** @var int $depth */ |
|
179
|
|
|
[$el, $depth] = array_shift($queue); |
|
180
|
|
|
|
|
181
|
|
|
// Depth guard: cap traversal at UNBOUNDED_LIMIT (root = depth 0). |
|
182
|
|
|
// Breaking here halts further descent to avoid pathological depth and excessive work, |
|
183
|
|
|
// which is safer in production than risking runaway traversal or hard failures. |
|
184
|
|
|
// Trade-off: deeper descendant-only prefixes may remain unregistered, so some |
|
185
|
|
|
// prefixed XPath queries might fail; overall processing continues gracefully. |
|
186
|
|
|
if ($depth >= C_XML::UNBOUNDED_LIMIT) { |
|
187
|
|
|
break; |
|
188
|
|
|
} |
|
189
|
|
|
|
|
190
|
|
|
// if (++$visited > C_XML::UNBOUNDED_LIMIT) { |
|
191
|
|
|
// // Safety valve: stop further traversal to avoid unbounded work and noisy exceptions. |
|
192
|
|
|
// // Returning here halts namespace registration for this subtree, which is safer in |
|
193
|
|
|
// // production than risking pathological O(n) behavior or a hard failure (e.g. throwing |
|
194
|
|
|
// // \RuntimeException(__METHOD__ . ': exceeded subtree traversal limit')). |
|
195
|
|
|
// // Trade-off: some descendant-only prefixes may remain unregistered, so related XPath |
|
196
|
|
|
// // queries might fail, but overall processing continues gracefully. |
|
197
|
|
|
// break; |
|
198
|
|
|
// } |
|
199
|
|
|
|
|
200
|
|
|
// Element prefix |
|
201
|
|
|
if ($el->prefix && !isset($prefixToUri[$el->prefix])) { |
|
202
|
|
|
$uri = $el->namespaceURI; |
|
203
|
|
|
if (is_string($uri) && $uri !== '') { |
|
204
|
|
|
$xp->registerNamespace($el->prefix, $uri); |
|
205
|
|
|
$prefixToUri[$el->prefix] = $uri; |
|
206
|
|
|
} |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
// Attribute prefixes (excluding xmlns) |
|
210
|
|
|
if ($el->hasAttributes()) { |
|
211
|
|
|
foreach ($el->attributes as $attr) { |
|
212
|
|
|
if ( |
|
213
|
|
|
$attr->prefix && |
|
214
|
|
|
$attr->prefix !== 'xmlns' && |
|
215
|
|
|
!isset($prefixToUri[$attr->prefix]) |
|
216
|
|
|
) { |
|
217
|
|
|
$uri = $attr->namespaceURI; |
|
218
|
|
|
if (is_string($uri) && $uri !== '') { |
|
219
|
|
|
$xp->registerNamespace($attr->prefix, $uri); |
|
220
|
|
|
$prefixToUri[$attr->prefix] = $uri; |
|
221
|
|
|
} |
|
222
|
|
|
} else { |
|
223
|
|
|
// Optional: collision detection (same prefix, different URI) |
|
224
|
|
|
// if ($prefixToUri[$pfx] !== $attr->namespaceURI) { |
|
225
|
|
|
// // Default: skip rebind; could log a debug message here. |
|
226
|
|
|
// } |
|
227
|
|
|
} |
|
228
|
|
|
} |
|
229
|
|
|
} |
|
230
|
|
|
|
|
231
|
|
|
// Enqueue children (only DOMElement to keep types precise) |
|
232
|
|
|
foreach ($el->childNodes as $child) { |
|
233
|
|
|
if ($child instanceof DOMElement) { |
|
234
|
|
|
$queue[] = [$child, $depth + 1]; |
|
235
|
|
|
} |
|
236
|
|
|
} |
|
237
|
|
|
} |
|
238
|
|
|
} |
|
239
|
|
|
|
|
240
|
|
|
|
|
241
|
|
|
/** |
|
242
|
|
|
* Do an XPath query on an XML node. |
|
243
|
|
|
* |
|
244
|
|
|
* @param \DOMNode $node The XML node. |
|
245
|
|
|
* @param string $query The query. |
|
246
|
|
|
* @param \DOMXPath $xpCache The DOMXPath object |
|
247
|
|
|
* @return array<\DOMNode> Array with matching DOM nodes. |
|
248
|
|
|
*/ |
|
249
|
|
|
public static function xpQuery(DOMNode $node, string $query, DOMXPath $xpCache): array |
|
250
|
|
|
{ |
|
251
|
|
|
$ret = []; |
|
252
|
|
|
|
|
253
|
|
|
$results = $xpCache->query($query, $node); |
|
254
|
|
|
Assert::notFalse($results, 'Malformed XPath query or invalid contextNode provided.'); |
|
255
|
|
|
|
|
256
|
|
|
for ($i = 0; $i < $results->length; $i++) { |
|
257
|
|
|
$ret[$i] = $results->item($i); |
|
258
|
|
|
} |
|
259
|
|
|
|
|
260
|
|
|
return $ret; |
|
261
|
|
|
} |
|
262
|
|
|
} |
|
263
|
|
|
|