| Total Complexity | 87 |
| Total Lines | 533 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like XPath often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use XPath, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 27 | class XPath extends \DOMXPath |
||
| 28 | {
|
||
| 29 | /** |
||
| 30 | * XPath Compiled Expressions |
||
| 31 | * |
||
| 32 | * @var array |
||
| 33 | */ |
||
| 34 | private $compiledExpressions = []; |
||
| 35 | |||
| 36 | // ------------------------------------------------------------------------ |
||
| 37 | |||
| 38 | /** |
||
| 39 | * XPath::query |
||
| 40 | * |
||
| 41 | * Evaluates the given XPath expression. |
||
| 42 | * |
||
| 43 | * @see http://php.net/manual/en/domxpath.query.php |
||
| 44 | * |
||
| 45 | * @param string $expression <p> |
||
| 46 | * The XPath expression to execute. |
||
| 47 | * </p> |
||
| 48 | * @param \DOMNode $context [optional] <p> |
||
| 49 | * The optional node context can be specified for |
||
| 50 | * doing relative XPath queries. By default, the queries are relative to |
||
| 51 | * the root element. |
||
| 52 | * </p> |
||
| 53 | * |
||
| 54 | * @return Nodes a DOMNodeList containing all nodes matching |
||
| 55 | * the given XPath expression. Any expression which do |
||
| 56 | * not return nodes will return an empty DOMNodeList. |
||
| 57 | * @since 5.0 |
||
| 58 | */ |
||
| 59 | public function query($expression, \DOMNode $context = null, $registerNodeNS = null) |
||
| 66 | } |
||
| 67 | |||
| 68 | // ------------------------------------------------------------------------ |
||
| 69 | |||
| 70 | /** |
||
| 71 | * XPath::fetchExpression |
||
| 72 | * |
||
| 73 | * @param string $expression |
||
| 74 | * |
||
| 75 | * @return string |
||
| 76 | */ |
||
| 77 | private function fetchExpression($expression) |
||
| 78 | {
|
||
| 79 | $selectors = explode(',', $expression);
|
||
| 80 | $paths = []; |
||
| 81 | |||
| 82 | foreach ($selectors as $selector) {
|
||
| 83 | $selector = trim($selector); |
||
| 84 | |||
| 85 | if (array_key_exists($selector, $this->compiledExpressions)) {
|
||
| 86 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
| 87 | |||
| 88 | continue; |
||
| 89 | } |
||
| 90 | |||
| 91 | $this->compiledExpressions[ $selector ] = $this->fetchCssExpression($selector); |
||
| 92 | |||
| 93 | $paths[] = $this->compiledExpressions[ $selector ]; |
||
| 94 | } |
||
| 95 | |||
| 96 | return implode('|', $paths);
|
||
| 97 | } |
||
| 98 | |||
| 99 | // ------------------------------------------------------------------------ |
||
| 100 | |||
| 101 | /** |
||
| 102 | * XPath::fetchCssExpression |
||
| 103 | * |
||
| 104 | * Converts a CSS selector into an XPath expression. |
||
| 105 | * |
||
| 106 | * @param string $selector A CSS selector |
||
| 107 | * @param string $prefix Specifies the nesting of nodes |
||
| 108 | * |
||
| 109 | * @return string XPath expression |
||
| 110 | */ |
||
| 111 | private function fetchCssExpression($selector, $prefix = '//') |
||
| 112 | {
|
||
| 113 | $pos = strrpos($selector, '::'); |
||
| 114 | |||
| 115 | if ($pos !== false) {
|
||
| 116 | $property = substr($selector, $pos + 2); |
||
| 117 | $property = $this->fetchCssProperty($property); |
||
| 118 | $property = $this->parseCssProperty($property[ 'name' ], $property[ 'args' ]); |
||
| 119 | |||
| 120 | $selector = substr($selector, 0, $pos); |
||
| 121 | } |
||
| 122 | |||
| 123 | if (substr($selector, 0, 1) === '>') {
|
||
| 124 | $prefix = '/'; |
||
| 125 | |||
| 126 | $selector = ltrim($selector, '> '); |
||
| 127 | } |
||
| 128 | |||
| 129 | $segments = $this->getSelectorSegments($selector); |
||
| 130 | $expression = ''; |
||
| 131 | |||
| 132 | while (count($segments) > 0) {
|
||
| 133 | $expression .= $this->generateExpression($segments, $prefix); |
||
| 134 | |||
| 135 | $selector = trim(substr($selector, strlen($segments[ 'selector' ]))); |
||
| 136 | $prefix = isset($segments[ 'rel' ]) ? '/' : '//'; |
||
| 137 | |||
| 138 | if ($selector === '') {
|
||
| 139 | break; |
||
| 140 | } |
||
| 141 | |||
| 142 | $segments = $this->getSelectorSegments($selector); |
||
| 143 | } |
||
| 144 | |||
| 145 | if (isset($property)) {
|
||
| 146 | $expression = $expression . '/' . $property; |
||
| 147 | } |
||
| 148 | |||
| 149 | return $expression; |
||
| 150 | } |
||
| 151 | |||
| 152 | // ------------------------------------------------------------------------ |
||
| 153 | |||
| 154 | /** |
||
| 155 | * XPath::fetchCssProperty |
||
| 156 | * |
||
| 157 | * @param $property |
||
| 158 | * |
||
| 159 | * @return array |
||
| 160 | */ |
||
| 161 | protected function fetchCssProperty($property) |
||
| 162 | {
|
||
| 163 | $name = '(?P<name>[\w\-]*)'; |
||
| 164 | $args = '(?:\((?P<args>[^\)]+)\))'; |
||
| 165 | $regexp = '/(?:' . $name . $args . '?)?/is'; |
||
| 166 | |||
| 167 | if (preg_match($regexp, $property, $segments)) {
|
||
| 168 | $result = []; |
||
| 169 | |||
| 170 | $result[ 'name' ] = $segments[ 'name' ]; |
||
| 171 | $result[ 'args' ] = isset($segments[ 'args' ]) ? explode('|', $segments[ 'args' ]) : [];
|
||
| 172 | |||
| 173 | return $result; |
||
| 174 | } |
||
| 175 | |||
| 176 | throw new RuntimeException('Invalid selector');
|
||
| 177 | } |
||
| 178 | |||
| 179 | // ------------------------------------------------------------------------ |
||
| 180 | |||
| 181 | /** |
||
| 182 | * XPath::parseCssProperty |
||
| 183 | * |
||
| 184 | * @param string $name |
||
| 185 | * @param array $args |
||
| 186 | * |
||
| 187 | * @return string |
||
| 188 | */ |
||
| 189 | protected function parseCssProperty($name, $args = []) |
||
| 190 | {
|
||
| 191 | if ($name === 'text') {
|
||
| 192 | return 'text()'; |
||
| 193 | } |
||
| 194 | |||
| 195 | if ($name === 'attr') {
|
||
| 196 | $attributes = []; |
||
| 197 | |||
| 198 | foreach ($args as $attribute) {
|
||
| 199 | $attributes[] = sprintf('name() = "%s"', $attribute);
|
||
| 200 | } |
||
| 201 | |||
| 202 | return sprintf('@*[%s]', implode(' or ', $attributes));
|
||
| 203 | } |
||
| 204 | |||
| 205 | throw new RuntimeException('HTML_E_INVALID_CSS_PROPERTY');
|
||
| 206 | } |
||
| 207 | |||
| 208 | // ------------------------------------------------------------------------ |
||
| 209 | |||
| 210 | /** |
||
| 211 | * XPath::getSelectorSegments |
||
| 212 | * |
||
| 213 | * Splits the CSS selector into parts (tag name, ID, classes, attributes, pseudo-class). |
||
| 214 | * |
||
| 215 | * @param string $selector CSS selector |
||
| 216 | * |
||
| 217 | * @return array |
||
| 218 | * |
||
| 219 | * @throws \InvalidArgumentException if an empty string is passed |
||
| 220 | * @throws \RuntimeException if the selector is not valid |
||
| 221 | */ |
||
| 222 | public function getSelectorSegments($selector) |
||
| 299 | } |
||
| 300 | |||
| 301 | // ------------------------------------------------------------------------ |
||
| 302 | |||
| 303 | /** |
||
| 304 | * XPath::generateExpression |
||
| 305 | * |
||
| 306 | * @param array $segments |
||
| 307 | * @param string $prefix Specifies the nesting of nodes |
||
| 308 | * |
||
| 309 | * @return string XPath expression |
||
| 310 | * |
||
| 311 | * @throws InvalidArgumentException if you neither specify tag name nor attributes |
||
| 312 | */ |
||
| 313 | private function generateExpression($segments, $prefix = '//') |
||
| 365 | } |
||
| 366 | |||
| 367 | // ------------------------------------------------------------------------ |
||
| 368 | |||
| 369 | /** |
||
| 370 | * XPath::fetchCssAttributeSelector |
||
| 371 | * |
||
| 372 | * @param string $name The attribute name |
||
| 373 | * @param string $value The attribute value |
||
| 374 | * |
||
| 375 | * @return string |
||
| 376 | */ |
||
| 377 | protected function fetchCssAttributeSelector($name, $value) |
||
| 378 | {
|
||
| 379 | // if the attribute name starts with ^ |
||
| 380 | // example: *[^data-] |
||
| 381 | if (substr($name, 0, 1) === '^') {
|
||
| 382 | $xpath = sprintf('@*[starts-with(name(), "%s")]', substr($name, 1));
|
||
| 383 | |||
| 384 | return $value === null ? $xpath : sprintf('%s="%s"', $xpath, $value);
|
||
| 385 | } |
||
| 386 | |||
| 387 | // if the attribute name starts with ! |
||
| 388 | // example: input[!disabled] |
||
| 389 | if (substr($name, 0, 1) === '!') {
|
||
| 390 | $xpath = sprintf('not(@%s)', substr($name, 1));
|
||
| 391 | |||
| 392 | return $xpath; |
||
| 393 | } |
||
| 394 | |||
| 395 | switch (substr($name, -1)) {
|
||
| 396 | case '^': |
||
| 397 | $xpath = sprintf('starts-with(@%s, "%s")', substr($name, 0, -1), $value);
|
||
| 398 | break; |
||
| 399 | case '$': |
||
| 400 | $xpath = sprintf('ends-with(@%s, "%s")', substr($name, 0, -1), $value);
|
||
| 401 | break; |
||
| 402 | case '*': |
||
| 403 | $xpath = sprintf('contains(@%s, "%s")', substr($name, 0, -1), $value);
|
||
| 404 | break; |
||
| 405 | case '!': |
||
| 406 | $xpath = sprintf('not(@%s="%s")', substr($name, 0, -1), $value);
|
||
| 407 | break; |
||
| 408 | case '~': |
||
| 409 | $xpath = sprintf( |
||
| 410 | 'contains(concat(" ", normalize-space(@%s), " "), " %s ")',
|
||
| 411 | substr($name, 0, -1), |
||
| 412 | $value |
||
| 413 | ); |
||
| 414 | break; |
||
| 415 | default: |
||
| 416 | // if specified only the attribute name |
||
| 417 | $xpath = $value === null ? '@' . $name : sprintf('@%s="%s"', $name, $value);
|
||
| 418 | break; |
||
| 419 | } |
||
| 420 | |||
| 421 | return $xpath; |
||
| 422 | } |
||
| 423 | |||
| 424 | // ------------------------------------------------------------------------ |
||
| 425 | |||
| 426 | /** |
||
| 427 | * XPath::fetchCssPseudoSelector |
||
| 428 | * |
||
| 429 | * Converts a CSS pseudo-class into an XPath expression. |
||
| 430 | * |
||
| 431 | * @param string $pseudo Pseudo-class |
||
| 432 | * @param array $parameters |
||
| 433 | * @param string $tagName |
||
| 434 | * |
||
| 435 | * @return string |
||
| 436 | * |
||
| 437 | * @throws \RuntimeException if passed an unknown pseudo-class |
||
| 438 | */ |
||
| 439 | protected function fetchCssPseudoSelector($pseudo, $parameters = [], &$tagName) |
||
| 440 | {
|
||
| 441 | switch ($pseudo) {
|
||
| 442 | case 'first-child': |
||
| 443 | return 'position() = 1'; |
||
| 444 | break; |
||
| 445 | case 'last-child': |
||
| 446 | return 'position() = last()'; |
||
| 447 | break; |
||
| 448 | case 'nth-child': |
||
| 449 | $xpath = sprintf( |
||
| 450 | '(name()="%s") and (%s)', |
||
| 451 | $tagName, |
||
| 452 | $this->fetchCssPseudoNthSelector($parameters[ 0 ]) |
||
| 453 | ); |
||
| 454 | $tagName = '*'; |
||
| 455 | |||
| 456 | return $xpath; |
||
| 457 | break; |
||
| 458 | case 'contains': |
||
| 459 | $string = trim($parameters[ 0 ], ' \'"'); |
||
| 460 | $caseSensitive = isset($parameters[ 1 ]) and (trim($parameters[ 1 ]) === 'true'); |
||
| 461 | |||
| 462 | return $this->fetchCssPseudoContainsSelector($string, $caseSensitive); |
||
| 463 | break; |
||
| 464 | case 'has': |
||
| 465 | return $this->fetchCssExpression($parameters[ 0 ], './/'); |
||
| 466 | break; |
||
| 467 | case 'not': |
||
| 468 | return sprintf('not($this->%s)', $this->fetchCssExpression($parameters[ 0 ], ''));
|
||
| 469 | break; |
||
| 470 | case 'nth-of-type': |
||
| 471 | return $this->fetchCssPseudoNthSelector($parameters[ 0 ]); |
||
| 472 | break; |
||
| 473 | case 'empty': |
||
| 474 | return 'count(descendant::*) = 0'; |
||
| 475 | break; |
||
| 476 | case 'not-empty': |
||
| 477 | return 'count(descendant::*) > 0'; |
||
| 478 | break; |
||
| 479 | } |
||
| 480 | |||
| 481 | throw new RuntimeException('Invalid selector: unknown pseudo-class');
|
||
| 482 | } |
||
| 483 | |||
| 484 | // ------------------------------------------------------------------------ |
||
| 485 | |||
| 486 | /** |
||
| 487 | * XPath::fetchCssPseudoNthSelector |
||
| 488 | * |
||
| 489 | * Converts nth-expression into an XPath expression. |
||
| 490 | * |
||
| 491 | * @param string $expression nth-expression |
||
| 492 | * |
||
| 493 | * @return string |
||
| 494 | * |
||
| 495 | * @throws \RuntimeException if passed nth-child is empty |
||
| 496 | * @throws \RuntimeException if passed an unknown nth-child expression |
||
| 497 | */ |
||
| 498 | protected function fetchCssPseudoNthSelector($expression) |
||
| 499 | {
|
||
| 500 | if ($expression === '') {
|
||
| 501 | throw new RuntimeException( |
||
| 502 | 'Invalid selector: nth-child (or nth-last-child) expression must not be empty' |
||
| 503 | ); |
||
| 504 | } |
||
| 505 | |||
| 506 | if ($expression === 'odd') {
|
||
| 507 | return 'position() mod 2 = 1 and position() >= 1'; |
||
| 508 | } |
||
| 509 | |||
| 510 | if ($expression === 'even') {
|
||
| 511 | return 'position() mod 2 = 0 and position() >= 0'; |
||
| 512 | } |
||
| 513 | |||
| 514 | if (is_numeric($expression)) {
|
||
| 515 | return sprintf('position() = %d', $expression);
|
||
| 516 | } |
||
| 517 | |||
| 518 | if (preg_match("/^(?P<mul>[0-9]?n)(?:(?P<sign>\+|\-)(?P<pos>[0-9]+))?$/is", $expression, $segments)) {
|
||
| 519 | if (isset($segments[ 'mul' ])) {
|
||
| 520 | $multiplier = $segments[ 'mul' ] === 'n' ? 1 : trim($segments[ 'mul' ], 'n'); |
||
| 521 | $sign = (isset($segments[ 'sign' ]) and $segments[ 'sign' ] === '+') ? '-' : '+'; |
||
| 522 | $position = isset($segments[ 'pos' ]) ? $segments[ 'pos' ] : 0; |
||
| 523 | |||
| 524 | return sprintf( |
||
| 525 | '(position() %s %d) mod %d = 0 and position() >= %d', |
||
| 526 | $sign, |
||
| 527 | $position, |
||
| 528 | $multiplier, |
||
| 529 | $position |
||
| 530 | ); |
||
| 531 | } |
||
| 532 | } |
||
| 533 | |||
| 534 | throw new RuntimeException('Invalid selector: invalid nth-child expression');
|
||
| 535 | } |
||
| 536 | |||
| 537 | // ------------------------------------------------------------------------ |
||
| 538 | |||
| 539 | /** |
||
| 540 | * XPath::fetchCssPseudoContainsSelector |
||
| 541 | * |
||
| 542 | * @param string $string |
||
| 543 | * @param bool $caseSensitive |
||
| 544 | * |
||
| 545 | * @return string |
||
| 546 | */ |
||
| 547 | protected function fetchCssPseudoContainsSelector($string, $caseSensitive = false) |
||
| 560 | } |
||
| 561 | } |
||
| 562 | } |