Total Complexity | 87 |
Total Lines | 533 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like XPath often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XPath, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
27 | class XPath extends \DOMXPath |
||
28 | { |
||
29 | /** |
||
30 | * XPath Compiled Expressions |
||
31 | * |
||
32 | * @var array |
||
33 | */ |
||
34 | private $compiledExpressions = []; |
||
35 | |||
36 | // ------------------------------------------------------------------------ |
||
37 | |||
38 | /** |
||
39 | * XPath::query |
||
40 | * |
||
41 | * Evaluates the given XPath expression. |
||
42 | * |
||
43 | * @see http://php.net/manual/en/domxpath.query.php |
||
44 | * |
||
45 | * @param string $expression <p> |
||
46 | * The XPath expression to execute. |
||
47 | * </p> |
||
48 | * @param \DOMNode $context [optional] <p> |
||
49 | * The optional node context can be specified for |
||
50 | * doing relative XPath queries. By default, the queries are relative to |
||
51 | * the root element. |
||
52 | * </p> |
||
53 | * |
||
54 | * @return Nodes a DOMNodeList containing all nodes matching |
||
55 | * the given XPath expression. Any expression which do |
||
56 | * not return nodes will return an empty DOMNodeList. |
||
57 | * @since 5.0 |
||
58 | */ |
||
59 | public function query($expression, \DOMNode $context = null, $registerNodeNS = null) |
||
66 | } |
||
67 | |||
68 | // ------------------------------------------------------------------------ |
||
69 | |||
70 | /** |
||
71 | * XPath::fetchExpression |
||
72 | * |
||
73 | * @param string $expression |
||
74 | * |
||
75 | * @return string |
||
76 | */ |
||
77 | private function fetchExpression($expression) |
||
78 | { |
||
79 | $selectors = explode(',', $expression); |
||
80 | $paths = []; |
||
81 | |||
82 | foreach ($selectors as $selector) { |
||
83 | $selector = trim($selector); |
||
84 | |||
85 | if (array_key_exists($selector, $this->compiledExpressions)) { |
||
86 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
87 | |||
88 | continue; |
||
89 | } |
||
90 | |||
91 | $this->compiledExpressions[ $selector ] = $this->fetchCssExpression($selector); |
||
92 | |||
93 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
94 | } |
||
95 | |||
96 | return implode('|', $paths); |
||
97 | } |
||
98 | |||
99 | // ------------------------------------------------------------------------ |
||
100 | |||
101 | /** |
||
102 | * XPath::fetchCssExpression |
||
103 | * |
||
104 | * Converts a CSS selector into an XPath expression. |
||
105 | * |
||
106 | * @param string $selector A CSS selector |
||
107 | * @param string $prefix Specifies the nesting of nodes |
||
108 | * |
||
109 | * @return string XPath expression |
||
110 | */ |
||
111 | private function fetchCssExpression($selector, $prefix = '//') |
||
112 | { |
||
113 | $pos = strrpos($selector, '::'); |
||
114 | |||
115 | if ($pos !== false) { |
||
116 | $property = substr($selector, $pos + 2); |
||
117 | $property = $this->fetchCssProperty($property); |
||
118 | $property = $this->parseCssProperty($property[ 'name' ], $property[ 'args' ]); |
||
119 | |||
120 | $selector = substr($selector, 0, $pos); |
||
121 | } |
||
122 | |||
123 | if (substr($selector, 0, 1) === '>') { |
||
124 | $prefix = '/'; |
||
125 | |||
126 | $selector = ltrim($selector, '> '); |
||
127 | } |
||
128 | |||
129 | $segments = $this->getSelectorSegments($selector); |
||
130 | $expression = ''; |
||
131 | |||
132 | while (count($segments) > 0) { |
||
133 | $expression .= $this->generateExpression($segments, $prefix); |
||
134 | |||
135 | $selector = trim(substr($selector, strlen($segments[ 'selector' ]))); |
||
136 | $prefix = isset($segments[ 'rel' ]) ? '/' : '//'; |
||
137 | |||
138 | if ($selector === '') { |
||
139 | break; |
||
140 | } |
||
141 | |||
142 | $segments = $this->getSelectorSegments($selector); |
||
143 | } |
||
144 | |||
145 | if (isset($property)) { |
||
146 | $expression = $expression . '/' . $property; |
||
147 | } |
||
148 | |||
149 | return $expression; |
||
150 | } |
||
151 | |||
152 | // ------------------------------------------------------------------------ |
||
153 | |||
154 | /** |
||
155 | * XPath::fetchCssProperty |
||
156 | * |
||
157 | * @param $property |
||
158 | * |
||
159 | * @return array |
||
160 | */ |
||
161 | protected function fetchCssProperty($property) |
||
162 | { |
||
163 | $name = '(?P<name>[\w\-]*)'; |
||
164 | $args = '(?:\((?P<args>[^\)]+)\))'; |
||
165 | $regexp = '/(?:' . $name . $args . '?)?/is'; |
||
166 | |||
167 | if (preg_match($regexp, $property, $segments)) { |
||
168 | $result = []; |
||
169 | |||
170 | $result[ 'name' ] = $segments[ 'name' ]; |
||
171 | $result[ 'args' ] = isset($segments[ 'args' ]) ? explode('|', $segments[ 'args' ]) : []; |
||
172 | |||
173 | return $result; |
||
174 | } |
||
175 | |||
176 | throw new RuntimeException('Invalid selector'); |
||
177 | } |
||
178 | |||
179 | // ------------------------------------------------------------------------ |
||
180 | |||
181 | /** |
||
182 | * XPath::parseCssProperty |
||
183 | * |
||
184 | * @param string $name |
||
185 | * @param array $args |
||
186 | * |
||
187 | * @return string |
||
188 | */ |
||
189 | protected function parseCssProperty($name, $args = []) |
||
190 | { |
||
191 | if ($name === 'text') { |
||
192 | return 'text()'; |
||
193 | } |
||
194 | |||
195 | if ($name === 'attr') { |
||
196 | $attributes = []; |
||
197 | |||
198 | foreach ($args as $attribute) { |
||
199 | $attributes[] = sprintf('name() = "%s"', $attribute); |
||
200 | } |
||
201 | |||
202 | return sprintf('@*[%s]', implode(' or ', $attributes)); |
||
203 | } |
||
204 | |||
205 | throw new RuntimeException('HTML_E_INVALID_CSS_PROPERTY'); |
||
206 | } |
||
207 | |||
208 | // ------------------------------------------------------------------------ |
||
209 | |||
210 | /** |
||
211 | * XPath::getSelectorSegments |
||
212 | * |
||
213 | * Splits the CSS selector into parts (tag name, ID, classes, attributes, pseudo-class). |
||
214 | * |
||
215 | * @param string $selector CSS selector |
||
216 | * |
||
217 | * @return array |
||
218 | * |
||
219 | * @throws \InvalidArgumentException if an empty string is passed |
||
220 | * @throws \RuntimeException if the selector is not valid |
||
221 | */ |
||
222 | public function getSelectorSegments($selector) |
||
299 | } |
||
300 | |||
301 | // ------------------------------------------------------------------------ |
||
302 | |||
303 | /** |
||
304 | * XPath::generateExpression |
||
305 | * |
||
306 | * @param array $segments |
||
307 | * @param string $prefix Specifies the nesting of nodes |
||
308 | * |
||
309 | * @return string XPath expression |
||
310 | * |
||
311 | * @throws InvalidArgumentException if you neither specify tag name nor attributes |
||
312 | */ |
||
313 | private function generateExpression($segments, $prefix = '//') |
||
365 | } |
||
366 | |||
367 | // ------------------------------------------------------------------------ |
||
368 | |||
369 | /** |
||
370 | * XPath::fetchCssAttributeSelector |
||
371 | * |
||
372 | * @param string $name The attribute name |
||
373 | * @param string $value The attribute value |
||
374 | * |
||
375 | * @return string |
||
376 | */ |
||
377 | protected function fetchCssAttributeSelector($name, $value) |
||
378 | { |
||
379 | // if the attribute name starts with ^ |
||
380 | // example: *[^data-] |
||
381 | if (substr($name, 0, 1) === '^') { |
||
382 | $xpath = sprintf('@*[starts-with(name(), "%s")]', substr($name, 1)); |
||
383 | |||
384 | return $value === null ? $xpath : sprintf('%s="%s"', $xpath, $value); |
||
385 | } |
||
386 | |||
387 | // if the attribute name starts with ! |
||
388 | // example: input[!disabled] |
||
389 | if (substr($name, 0, 1) === '!') { |
||
390 | $xpath = sprintf('not(@%s)', substr($name, 1)); |
||
391 | |||
392 | return $xpath; |
||
393 | } |
||
394 | |||
395 | switch (substr($name, -1)) { |
||
396 | case '^': |
||
397 | $xpath = sprintf('starts-with(@%s, "%s")', substr($name, 0, -1), $value); |
||
398 | break; |
||
399 | case '$': |
||
400 | $xpath = sprintf('ends-with(@%s, "%s")', substr($name, 0, -1), $value); |
||
401 | break; |
||
402 | case '*': |
||
403 | $xpath = sprintf('contains(@%s, "%s")', substr($name, 0, -1), $value); |
||
404 | break; |
||
405 | case '!': |
||
406 | $xpath = sprintf('not(@%s="%s")', substr($name, 0, -1), $value); |
||
407 | break; |
||
408 | case '~': |
||
409 | $xpath = sprintf( |
||
410 | 'contains(concat(" ", normalize-space(@%s), " "), " %s ")', |
||
411 | substr($name, 0, -1), |
||
412 | $value |
||
413 | ); |
||
414 | break; |
||
415 | default: |
||
416 | // if specified only the attribute name |
||
417 | $xpath = $value === null ? '@' . $name : sprintf('@%s="%s"', $name, $value); |
||
418 | break; |
||
419 | } |
||
420 | |||
421 | return $xpath; |
||
422 | } |
||
423 | |||
424 | // ------------------------------------------------------------------------ |
||
425 | |||
426 | /** |
||
427 | * XPath::fetchCssPseudoSelector |
||
428 | * |
||
429 | * Converts a CSS pseudo-class into an XPath expression. |
||
430 | * |
||
431 | * @param string $pseudo Pseudo-class |
||
432 | * @param array $parameters |
||
433 | * @param string $tagName |
||
434 | * |
||
435 | * @return string |
||
436 | * |
||
437 | * @throws \RuntimeException if passed an unknown pseudo-class |
||
438 | */ |
||
439 | protected function fetchCssPseudoSelector($pseudo, $parameters = [], &$tagName) |
||
440 | { |
||
441 | switch ($pseudo) { |
||
442 | case 'first-child': |
||
443 | return 'position() = 1'; |
||
444 | break; |
||
445 | case 'last-child': |
||
446 | return 'position() = last()'; |
||
447 | break; |
||
448 | case 'nth-child': |
||
449 | $xpath = sprintf( |
||
450 | '(name()="%s") and (%s)', |
||
451 | $tagName, |
||
452 | $this->fetchCssPseudoNthSelector($parameters[ 0 ]) |
||
453 | ); |
||
454 | $tagName = '*'; |
||
455 | |||
456 | return $xpath; |
||
457 | break; |
||
458 | case 'contains': |
||
459 | $string = trim($parameters[ 0 ], ' \'"'); |
||
460 | $caseSensitive = isset($parameters[ 1 ]) and (trim($parameters[ 1 ]) === 'true'); |
||
461 | |||
462 | return $this->fetchCssPseudoContainsSelector($string, $caseSensitive); |
||
463 | break; |
||
464 | case 'has': |
||
465 | return $this->fetchCssExpression($parameters[ 0 ], './/'); |
||
466 | break; |
||
467 | case 'not': |
||
468 | return sprintf('not($this->%s)', $this->fetchCssExpression($parameters[ 0 ], '')); |
||
469 | break; |
||
470 | case 'nth-of-type': |
||
471 | return $this->fetchCssPseudoNthSelector($parameters[ 0 ]); |
||
472 | break; |
||
473 | case 'empty': |
||
474 | return 'count(descendant::*) = 0'; |
||
475 | break; |
||
476 | case 'not-empty': |
||
477 | return 'count(descendant::*) > 0'; |
||
478 | break; |
||
479 | } |
||
480 | |||
481 | throw new RuntimeException('Invalid selector: unknown pseudo-class'); |
||
482 | } |
||
483 | |||
484 | // ------------------------------------------------------------------------ |
||
485 | |||
486 | /** |
||
487 | * XPath::fetchCssPseudoNthSelector |
||
488 | * |
||
489 | * Converts nth-expression into an XPath expression. |
||
490 | * |
||
491 | * @param string $expression nth-expression |
||
492 | * |
||
493 | * @return string |
||
494 | * |
||
495 | * @throws \RuntimeException if passed nth-child is empty |
||
496 | * @throws \RuntimeException if passed an unknown nth-child expression |
||
497 | */ |
||
498 | protected function fetchCssPseudoNthSelector($expression) |
||
499 | { |
||
500 | if ($expression === '') { |
||
501 | throw new RuntimeException( |
||
502 | 'Invalid selector: nth-child (or nth-last-child) expression must not be empty' |
||
503 | ); |
||
504 | } |
||
505 | |||
506 | if ($expression === 'odd') { |
||
507 | return 'position() mod 2 = 1 and position() >= 1'; |
||
508 | } |
||
509 | |||
510 | if ($expression === 'even') { |
||
511 | return 'position() mod 2 = 0 and position() >= 0'; |
||
512 | } |
||
513 | |||
514 | if (is_numeric($expression)) { |
||
515 | return sprintf('position() = %d', $expression); |
||
516 | } |
||
517 | |||
518 | if (preg_match("/^(?P<mul>[0-9]?n)(?:(?P<sign>\+|\-)(?P<pos>[0-9]+))?$/is", $expression, $segments)) { |
||
519 | if (isset($segments[ 'mul' ])) { |
||
520 | $multiplier = $segments[ 'mul' ] === 'n' ? 1 : trim($segments[ 'mul' ], 'n'); |
||
521 | $sign = (isset($segments[ 'sign' ]) and $segments[ 'sign' ] === '+') ? '-' : '+'; |
||
522 | $position = isset($segments[ 'pos' ]) ? $segments[ 'pos' ] : 0; |
||
523 | |||
524 | return sprintf( |
||
525 | '(position() %s %d) mod %d = 0 and position() >= %d', |
||
526 | $sign, |
||
527 | $position, |
||
528 | $multiplier, |
||
529 | $position |
||
530 | ); |
||
531 | } |
||
532 | } |
||
533 | |||
534 | throw new RuntimeException('Invalid selector: invalid nth-child expression'); |
||
535 | } |
||
536 | |||
537 | // ------------------------------------------------------------------------ |
||
538 | |||
539 | /** |
||
540 | * XPath::fetchCssPseudoContainsSelector |
||
541 | * |
||
542 | * @param string $string |
||
543 | * @param bool $caseSensitive |
||
544 | * |
||
545 | * @return string |
||
546 | */ |
||
547 | protected function fetchCssPseudoContainsSelector($string, $caseSensitive = false) |
||
560 | } |
||
561 | } |
||
562 | } |