1 | <?php |
||
2 | /** |
||
3 | * This file is part of the O2System Framework package. |
||
4 | * |
||
5 | * For the full copyright and license information, please view the LICENSE |
||
6 | * file that was distributed with this source code. |
||
7 | * |
||
8 | * @author Steeve Andrian Salim |
||
9 | * @copyright Copyright (c) Steeve Andrian Salim |
||
10 | */ |
||
11 | |||
12 | // ------------------------------------------------------------------------ |
||
13 | |||
14 | namespace O2System\Html\Dom; |
||
15 | |||
16 | // ------------------------------------------------------------------------ |
||
17 | |||
18 | use InvalidArgumentException; |
||
19 | use O2System\Html\Dom\Lists\Nodes; |
||
20 | use RuntimeException; |
||
21 | |||
22 | /** |
||
23 | * Class XPath |
||
24 | * |
||
25 | * @package O2System\HTML\DOM |
||
26 | */ |
||
27 | class XPath extends \DOMXPath |
||
28 | { |
||
29 | /** |
||
30 | * XPath Compiled Expressions |
||
31 | * |
||
32 | * @var array |
||
33 | */ |
||
34 | private $compiledExpressions = []; |
||
35 | |||
36 | // ------------------------------------------------------------------------ |
||
37 | |||
38 | /** |
||
39 | * XPath::query |
||
40 | * |
||
41 | * Evaluates the given XPath expression. |
||
42 | * |
||
43 | * @see http://php.net/manual/en/domxpath.query.php |
||
44 | * |
||
45 | * @param string $expression <p> |
||
46 | * The XPath expression to execute. |
||
47 | * </p> |
||
48 | * @param \DOMNode $context [optional] <p> |
||
49 | * The optional node context can be specified for |
||
50 | * doing relative XPath queries. By default, the queries are relative to |
||
51 | * the root element. |
||
52 | * </p> |
||
53 | * |
||
54 | * @return Nodes a DOMNodeList containing all nodes matching |
||
55 | * the given XPath expression. Any expression which do |
||
56 | * not return nodes will return an empty DOMNodeList. |
||
57 | * @since 5.0 |
||
58 | */ |
||
59 | public function query($expression, \DOMNode $context = null, $registerNodeNS = null) |
||
60 | { |
||
61 | if (strpos($expression, '/') === false) { |
||
62 | $expression = $this->fetchExpression($expression); |
||
63 | } |
||
64 | |||
65 | return new Lists\Nodes(parent::query($expression, $context)); |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
66 | } |
||
67 | |||
68 | // ------------------------------------------------------------------------ |
||
69 | |||
70 | /** |
||
71 | * XPath::fetchExpression |
||
72 | * |
||
73 | * @param string $expression |
||
74 | * |
||
75 | * @return string |
||
76 | */ |
||
77 | private function fetchExpression($expression) |
||
78 | { |
||
79 | $selectors = explode(',', $expression); |
||
80 | $paths = []; |
||
81 | |||
82 | foreach ($selectors as $selector) { |
||
83 | $selector = trim($selector); |
||
84 | |||
85 | if (array_key_exists($selector, $this->compiledExpressions)) { |
||
86 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
87 | |||
88 | continue; |
||
89 | } |
||
90 | |||
91 | $this->compiledExpressions[ $selector ] = $this->fetchCssExpression($selector); |
||
92 | |||
93 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
94 | } |
||
95 | |||
96 | return implode('|', $paths); |
||
97 | } |
||
98 | |||
99 | // ------------------------------------------------------------------------ |
||
100 | |||
101 | /** |
||
102 | * XPath::fetchCssExpression |
||
103 | * |
||
104 | * Converts a CSS selector into an XPath expression. |
||
105 | * |
||
106 | * @param string $selector A CSS selector |
||
107 | * @param string $prefix Specifies the nesting of nodes |
||
108 | * |
||
109 | * @return string XPath expression |
||
110 | */ |
||
111 | private function fetchCssExpression($selector, $prefix = '//') |
||
112 | { |
||
113 | $pos = strrpos($selector, '::'); |
||
114 | |||
115 | if ($pos !== false) { |
||
116 | $property = substr($selector, $pos + 2); |
||
117 | $property = $this->fetchCssProperty($property); |
||
118 | $property = $this->parseCssProperty($property[ 'name' ], $property[ 'args' ]); |
||
119 | |||
120 | $selector = substr($selector, 0, $pos); |
||
121 | } |
||
122 | |||
123 | if (substr($selector, 0, 1) === '>') { |
||
124 | $prefix = '/'; |
||
125 | |||
126 | $selector = ltrim($selector, '> '); |
||
127 | } |
||
128 | |||
129 | $segments = $this->getSelectorSegments($selector); |
||
130 | $expression = ''; |
||
131 | |||
132 | while (count($segments) > 0) { |
||
133 | $expression .= $this->generateExpression($segments, $prefix); |
||
134 | |||
135 | $selector = trim(substr($selector, strlen($segments[ 'selector' ]))); |
||
136 | $prefix = isset($segments[ 'rel' ]) ? '/' : '//'; |
||
137 | |||
138 | if ($selector === '') { |
||
139 | break; |
||
140 | } |
||
141 | |||
142 | $segments = $this->getSelectorSegments($selector); |
||
143 | } |
||
144 | |||
145 | if (isset($property)) { |
||
146 | $expression = $expression . '/' . $property; |
||
147 | } |
||
148 | |||
149 | return $expression; |
||
150 | } |
||
151 | |||
152 | // ------------------------------------------------------------------------ |
||
153 | |||
154 | /** |
||
155 | * XPath::fetchCssProperty |
||
156 | * |
||
157 | * @param $property |
||
158 | * |
||
159 | * @return array |
||
160 | */ |
||
161 | protected function fetchCssProperty($property) |
||
162 | { |
||
163 | $name = '(?P<name>[\w\-]*)'; |
||
164 | $args = '(?:\((?P<args>[^\)]+)\))'; |
||
165 | $regexp = '/(?:' . $name . $args . '?)?/is'; |
||
166 | |||
167 | if (preg_match($regexp, $property, $segments)) { |
||
168 | $result = []; |
||
169 | |||
170 | $result[ 'name' ] = $segments[ 'name' ]; |
||
171 | $result[ 'args' ] = isset($segments[ 'args' ]) ? explode('|', $segments[ 'args' ]) : []; |
||
172 | |||
173 | return $result; |
||
174 | } |
||
175 | |||
176 | throw new RuntimeException('Invalid selector'); |
||
177 | } |
||
178 | |||
179 | // ------------------------------------------------------------------------ |
||
180 | |||
181 | /** |
||
182 | * XPath::parseCssProperty |
||
183 | * |
||
184 | * @param string $name |
||
185 | * @param array $args |
||
186 | * |
||
187 | * @return string |
||
188 | */ |
||
189 | protected function parseCssProperty($name, $args = []) |
||
190 | { |
||
191 | if ($name === 'text') { |
||
192 | return 'text()'; |
||
193 | } |
||
194 | |||
195 | if ($name === 'attr') { |
||
196 | $attributes = []; |
||
197 | |||
198 | foreach ($args as $attribute) { |
||
199 | $attributes[] = sprintf('name() = "%s"', $attribute); |
||
200 | } |
||
201 | |||
202 | return sprintf('@*[%s]', implode(' or ', $attributes)); |
||
203 | } |
||
204 | |||
205 | throw new RuntimeException('HTML_E_INVALID_CSS_PROPERTY'); |
||
206 | } |
||
207 | |||
208 | // ------------------------------------------------------------------------ |
||
209 | |||
210 | /** |
||
211 | * XPath::getSelectorSegments |
||
212 | * |
||
213 | * Splits the CSS selector into parts (tag name, ID, classes, attributes, pseudo-class). |
||
214 | * |
||
215 | * @param string $selector CSS selector |
||
216 | * |
||
217 | * @return array |
||
218 | * |
||
219 | * @throws \InvalidArgumentException if an empty string is passed |
||
220 | * @throws \RuntimeException if the selector is not valid |
||
221 | */ |
||
222 | public function getSelectorSegments($selector) |
||
223 | { |
||
224 | $selector = trim($selector); |
||
225 | |||
226 | if ($selector === '') { |
||
227 | throw new InvalidArgumentException('HTML_E_INVALID_SELECTOR'); |
||
228 | } |
||
229 | |||
230 | $tag = '(?P<tag>[\*|\w|\-]+)?'; |
||
231 | $id = '(?:#(?P<id>[\w|\-]+))?'; |
||
232 | $classes = '(?P<classes>\.[\w|\-|\.]+)*'; |
||
233 | $attrs = '(?P<attrs>\[.+\])*'; |
||
234 | $name = '(?P<pseudo>[\w\-]*)'; |
||
235 | $expr = '(?:\((?P<expr>[^\)]+)\))'; |
||
236 | $pseudo = '(?::' . $name . $expr . '?)?'; |
||
237 | $rel = '\s*(?P<rel>>)?'; |
||
238 | |||
239 | $regexp = '/' . $tag . $id . $classes . $attrs . $pseudo . $rel . '/is'; |
||
240 | |||
241 | if (preg_match($regexp, $selector, $segments)) { |
||
242 | if ($segments[ 0 ] === '') { |
||
243 | throw new RuntimeException('HTML_E_INVALID_SELECTOR'); |
||
244 | } |
||
245 | |||
246 | $result[ 'selector' ] = $segments[ 0 ]; |
||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||
247 | $result[ 'tag' ] = (isset($segments[ 'tag' ]) and $segments[ 'tag' ] !== '') ? $segments[ 'tag' ] : '*'; |
||
248 | |||
249 | // if the id attribute specified |
||
250 | if (isset($segments[ 'id' ]) and $segments[ 'id' ] !== '') { |
||
251 | $result[ 'id' ] = $segments[ 'id' ]; |
||
252 | } |
||
253 | |||
254 | // if the attributes specified |
||
255 | if (isset($segments[ 'attrs' ])) { |
||
256 | $attributes = trim($segments[ 'attrs' ], '[]'); |
||
257 | $attributes = explode('][', $attributes); |
||
258 | |||
259 | foreach ($attributes as $attribute) { |
||
260 | if ($attribute !== '') { |
||
261 | list($name, $value) = array_pad(explode('=', $attribute, 2), 2, null); |
||
262 | |||
263 | // equal null if specified only the attribute name |
||
264 | $result[ 'attributes' ][ $name ] = is_string($value) ? trim($value, '\'"') : null; |
||
265 | } |
||
266 | } |
||
267 | } |
||
268 | |||
269 | // if the class attribute specified |
||
270 | if (isset($segments[ 'classes' ])) { |
||
271 | $classes = trim($segments[ 'classes' ], '.'); |
||
272 | $classes = explode('.', $classes); |
||
273 | |||
274 | foreach ($classes as $class) { |
||
275 | if ($class !== '') { |
||
276 | $result[ 'classes' ][] = $class; |
||
277 | } |
||
278 | } |
||
279 | } |
||
280 | |||
281 | // if the pseudo class specified |
||
282 | if (isset($segments[ 'pseudo' ]) and $segments[ 'pseudo' ] !== '') { |
||
283 | $result[ 'pseudo' ] = $segments[ 'pseudo' ]; |
||
284 | |||
285 | if (isset($segments[ 'expr' ]) and $segments[ 'expr' ] !== '') { |
||
286 | $result[ 'expr' ] = $segments[ 'expr' ]; |
||
287 | } |
||
288 | } |
||
289 | |||
290 | // if it is a direct descendant |
||
291 | if (isset($segments[ 'rel' ])) { |
||
292 | $result[ 'rel' ] = $segments[ 'rel' ]; |
||
293 | } |
||
294 | |||
295 | return $result; |
||
296 | } |
||
297 | |||
298 | throw new RuntimeException('HTML_E_INVALID_SELECTOR'); |
||
299 | } |
||
300 | |||
301 | // ------------------------------------------------------------------------ |
||
302 | |||
303 | /** |
||
304 | * XPath::generateExpression |
||
305 | * |
||
306 | * @param array $segments |
||
307 | * @param string $prefix Specifies the nesting of nodes |
||
308 | * |
||
309 | * @return string XPath expression |
||
310 | * |
||
311 | * @throws InvalidArgumentException if you neither specify tag name nor attributes |
||
312 | */ |
||
313 | private function generateExpression($segments, $prefix = '//') |
||
314 | { |
||
315 | $tagName = isset($segments[ 'tag' ]) ? $segments[ 'tag' ] : '*'; |
||
316 | |||
317 | $attributes = []; |
||
318 | |||
319 | // if the id attribute specified |
||
320 | if (isset($segments[ 'id' ])) { |
||
321 | $attributes[] = sprintf('@id="%s"', $segments[ 'id' ]); |
||
322 | } |
||
323 | |||
324 | // if the class attribute specified |
||
325 | if (isset($segments[ 'classes' ])) { |
||
326 | foreach ($segments[ 'classes' ] as $class) { |
||
327 | $attributes[] = sprintf('contains(concat(" ", normalize-space(@class), " "), " %s ")', $class); |
||
328 | } |
||
329 | } |
||
330 | |||
331 | // if the attributes specified |
||
332 | if (isset($segments[ 'attributes' ])) { |
||
333 | foreach ($segments[ 'attributes' ] as $name => $value) { |
||
334 | $attributes[] = $this->fetchCssAttributeSelector($name, $value); |
||
335 | } |
||
336 | } |
||
337 | |||
338 | // if the pseudo class specified |
||
339 | if (isset($segments[ 'pseudo' ])) { |
||
340 | $expression = isset($segments[ 'expr' ]) ? trim($segments[ 'expr' ]) : ''; |
||
341 | |||
342 | $parameters = explode(',', $expression); |
||
343 | |||
344 | $attributes[] = $this->fetchCssPseudoSelector($segments[ 'pseudo' ], $parameters, $tagName); |
||
345 | } |
||
346 | |||
347 | if (count($attributes) === 0 and ! isset($segments[ 'tag' ])) { |
||
348 | throw new InvalidArgumentException( |
||
349 | 'The array of segments should contain the name of the tag or at least one attribute' |
||
350 | ); |
||
351 | } |
||
352 | |||
353 | $xpath = $prefix . $tagName; |
||
354 | |||
355 | if ($count = count($attributes)) { |
||
356 | $xpath .= ($count > 1) |
||
357 | ? sprintf('[(%s)]', implode(') and (', $attributes)) |
||
358 | : sprintf( |
||
359 | '[%s]', |
||
360 | $attributes[ 0 ] |
||
361 | ); |
||
362 | } |
||
363 | |||
364 | return $xpath; |
||
365 | } |
||
366 | |||
367 | // ------------------------------------------------------------------------ |
||
368 | |||
369 | /** |
||
370 | * XPath::fetchCssAttributeSelector |
||
371 | * |
||
372 | * @param string $name The attribute name |
||
373 | * @param string $value The attribute value |
||
374 | * |
||
375 | * @return string |
||
376 | */ |
||
377 | protected function fetchCssAttributeSelector($name, $value) |
||
378 | { |
||
379 | // if the attribute name starts with ^ |
||
380 | // example: *[^data-] |
||
381 | if (substr($name, 0, 1) === '^') { |
||
382 | $xpath = sprintf('@*[starts-with(name(), "%s")]', substr($name, 1)); |
||
383 | |||
384 | return $value === null ? $xpath : sprintf('%s="%s"', $xpath, $value); |
||
0 ignored issues
–
show
|
|||
385 | } |
||
386 | |||
387 | // if the attribute name starts with ! |
||
388 | // example: input[!disabled] |
||
389 | if (substr($name, 0, 1) === '!') { |
||
390 | $xpath = sprintf('not(@%s)', substr($name, 1)); |
||
391 | |||
392 | return $xpath; |
||
393 | } |
||
394 | |||
395 | switch (substr($name, -1)) { |
||
396 | case '^': |
||
397 | $xpath = sprintf('starts-with(@%s, "%s")', substr($name, 0, -1), $value); |
||
398 | break; |
||
399 | case '$': |
||
400 | $xpath = sprintf('ends-with(@%s, "%s")', substr($name, 0, -1), $value); |
||
401 | break; |
||
402 | case '*': |
||
403 | $xpath = sprintf('contains(@%s, "%s")', substr($name, 0, -1), $value); |
||
404 | break; |
||
405 | case '!': |
||
406 | $xpath = sprintf('not(@%s="%s")', substr($name, 0, -1), $value); |
||
407 | break; |
||
408 | case '~': |
||
409 | $xpath = sprintf( |
||
410 | 'contains(concat(" ", normalize-space(@%s), " "), " %s ")', |
||
411 | substr($name, 0, -1), |
||
412 | $value |
||
413 | ); |
||
414 | break; |
||
415 | default: |
||
416 | // if specified only the attribute name |
||
417 | $xpath = $value === null ? '@' . $name : sprintf('@%s="%s"', $name, $value); |
||
0 ignored issues
–
show
|
|||
418 | break; |
||
419 | } |
||
420 | |||
421 | return $xpath; |
||
422 | } |
||
423 | |||
424 | // ------------------------------------------------------------------------ |
||
425 | |||
426 | /** |
||
427 | * XPath::fetchCssPseudoSelector |
||
428 | * |
||
429 | * Converts a CSS pseudo-class into an XPath expression. |
||
430 | * |
||
431 | * @param string $pseudo Pseudo-class |
||
432 | * @param array $parameters |
||
433 | * @param string $tagName |
||
434 | * |
||
435 | * @return string |
||
436 | * |
||
437 | * @throws \RuntimeException if passed an unknown pseudo-class |
||
438 | */ |
||
439 | protected function fetchCssPseudoSelector($pseudo, $parameters = [], &$tagName) |
||
440 | { |
||
441 | switch ($pseudo) { |
||
442 | case 'first-child': |
||
443 | return 'position() = 1'; |
||
444 | break; |
||
0 ignored issues
–
show
break is not strictly necessary here and could be removed.
The switch ($x) {
case 1:
return 'foo';
break; // This break is not necessary and can be left off.
}
If you would like to keep this construct to be consistent with other ![]() |
|||
445 | case 'last-child': |
||
446 | return 'position() = last()'; |
||
447 | break; |
||
448 | case 'nth-child': |
||
449 | $xpath = sprintf( |
||
450 | '(name()="%s") and (%s)', |
||
451 | $tagName, |
||
452 | $this->fetchCssPseudoNthSelector($parameters[ 0 ]) |
||
453 | ); |
||
454 | $tagName = '*'; |
||
455 | |||
456 | return $xpath; |
||
457 | break; |
||
458 | case 'contains': |
||
459 | $string = trim($parameters[ 0 ], ' \'"'); |
||
460 | $caseSensitive = isset($parameters[ 1 ]) and (trim($parameters[ 1 ]) === 'true'); |
||
461 | |||
462 | return $this->fetchCssPseudoContainsSelector($string, $caseSensitive); |
||
463 | break; |
||
464 | case 'has': |
||
465 | return $this->fetchCssExpression($parameters[ 0 ], './/'); |
||
466 | break; |
||
467 | case 'not': |
||
468 | return sprintf('not($this->%s)', $this->fetchCssExpression($parameters[ 0 ], '')); |
||
469 | break; |
||
470 | case 'nth-of-type': |
||
471 | return $this->fetchCssPseudoNthSelector($parameters[ 0 ]); |
||
472 | break; |
||
473 | case 'empty': |
||
474 | return 'count(descendant::*) = 0'; |
||
475 | break; |
||
476 | case 'not-empty': |
||
477 | return 'count(descendant::*) > 0'; |
||
478 | break; |
||
479 | } |
||
480 | |||
481 | throw new RuntimeException('Invalid selector: unknown pseudo-class'); |
||
482 | } |
||
483 | |||
484 | // ------------------------------------------------------------------------ |
||
485 | |||
486 | /** |
||
487 | * XPath::fetchCssPseudoNthSelector |
||
488 | * |
||
489 | * Converts nth-expression into an XPath expression. |
||
490 | * |
||
491 | * @param string $expression nth-expression |
||
492 | * |
||
493 | * @return string |
||
494 | * |
||
495 | * @throws \RuntimeException if passed nth-child is empty |
||
496 | * @throws \RuntimeException if passed an unknown nth-child expression |
||
497 | */ |
||
498 | protected function fetchCssPseudoNthSelector($expression) |
||
499 | { |
||
500 | if ($expression === '') { |
||
501 | throw new RuntimeException( |
||
502 | 'Invalid selector: nth-child (or nth-last-child) expression must not be empty' |
||
503 | ); |
||
504 | } |
||
505 | |||
506 | if ($expression === 'odd') { |
||
507 | return 'position() mod 2 = 1 and position() >= 1'; |
||
508 | } |
||
509 | |||
510 | if ($expression === 'even') { |
||
511 | return 'position() mod 2 = 0 and position() >= 0'; |
||
512 | } |
||
513 | |||
514 | if (is_numeric($expression)) { |
||
515 | return sprintf('position() = %d', $expression); |
||
516 | } |
||
517 | |||
518 | if (preg_match("/^(?P<mul>[0-9]?n)(?:(?P<sign>\+|\-)(?P<pos>[0-9]+))?$/is", $expression, $segments)) { |
||
519 | if (isset($segments[ 'mul' ])) { |
||
520 | $multiplier = $segments[ 'mul' ] === 'n' ? 1 : trim($segments[ 'mul' ], 'n'); |
||
521 | $sign = (isset($segments[ 'sign' ]) and $segments[ 'sign' ] === '+') ? '-' : '+'; |
||
522 | $position = isset($segments[ 'pos' ]) ? $segments[ 'pos' ] : 0; |
||
523 | |||
524 | return sprintf( |
||
525 | '(position() %s %d) mod %d = 0 and position() >= %d', |
||
526 | $sign, |
||
527 | $position, |
||
528 | $multiplier, |
||
529 | $position |
||
530 | ); |
||
531 | } |
||
532 | } |
||
533 | |||
534 | throw new RuntimeException('Invalid selector: invalid nth-child expression'); |
||
535 | } |
||
536 | |||
537 | // ------------------------------------------------------------------------ |
||
538 | |||
539 | /** |
||
540 | * XPath::fetchCssPseudoContainsSelector |
||
541 | * |
||
542 | * @param string $string |
||
543 | * @param bool $caseSensitive |
||
544 | * |
||
545 | * @return string |
||
546 | */ |
||
547 | protected function fetchCssPseudoContainsSelector($string, $caseSensitive = false) |
||
548 | { |
||
549 | if ($caseSensitive) { |
||
550 | return sprintf('text() = "%s"', $string); |
||
551 | } |
||
552 | |||
553 | if (function_exists('mb_strtolower')) { |
||
554 | return sprintf( |
||
555 | 'php:functionString("mb_strtolower", .) = php:functionString("mb_strtolower", "%s")', |
||
556 | $string |
||
557 | ); |
||
558 | } else { |
||
559 | return sprintf('php:functionString("strtolower", .) = php:functionString("strtolower", "%s")', $string); |
||
560 | } |
||
561 | } |
||
562 | } |