| 1 | <?php |
||
| 2 | |||
| 3 | namespace Bavix\AdvancedHtmlDom; |
||
| 4 | |||
| 5 | class CSS |
||
| 6 | { |
||
| 7 | /** |
||
| 8 | * @param $str |
||
| 9 | * @param string $last_nav |
||
| 10 | * |
||
| 11 | * @return string |
||
| 12 | */ |
||
| 13 | 6 | public static function translate_part($str, $last_nav = '') |
|
| 14 | { |
||
| 15 | 6 | $str = \preg_replace('/:contains\(([^()]*)\)/', '[text*=\\1]', $str); // quick and dirty contains fix |
|
| 16 | 6 | $retval = array(); |
|
| 17 | 6 | $re = '/(:(?:nth-last-child|nth-of-type|nth-last-of-type|first-child|last-child|first-of-type|last-of-type|only-child|only-of-type|nth-child|first|last|gt|lt|eq|root|nth|empty|not|has|contains|parent|link|visited|hover|active)(?:\((?>[^()]|(?R))*\))?|\[(?>[^\[\]]|(?R))*\]|[#.][\w-]+)/'; |
|
| 18 | 6 | $name = '*'; |
|
| 19 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
| 20 | switch (true) { |
||
| 21 | 6 | case 0 === \strpos($token, ':'): |
|
| 22 | // case preg_match('/^:/', $token): |
||
| 23 | $retval[] = self::do_pseudo($token, $name); |
||
| 24 | break; |
||
| 25 | 6 | case 0 === \strpos($token, '['): |
|
| 26 | // case preg_match('/^\[/', $token): |
||
| 27 | 2 | $retval[] = self::do_braces($token); |
|
| 28 | 2 | break; |
|
| 29 | 6 | case 0 === \strpos($token, '#'): |
|
| 30 | // case preg_match('/^#/', $token): |
||
| 31 | 1 | $retval[] = "[" . self::do_id($token) . "]"; |
|
| 32 | 1 | break; |
|
| 33 | 6 | case 0 === \strpos($token, '.'): |
|
| 34 | // case preg_match('/^\./', $token): |
||
| 35 | 1 | $retval[] = "[" . self::do_class($token) . "]"; |
|
| 36 | 1 | break; |
|
| 37 | default: |
||
| 38 | 6 | $name = $token; |
|
| 39 | } |
||
| 40 | } |
||
| 41 | 6 | if (\in_array($name, array('text', 'comment'))) { |
|
| 42 | $name .= '()'; |
||
| 43 | } |
||
| 44 | |||
| 45 | 6 | return ($last_nav === '+' ? "*[1]/self::" : '') . $name . \implode('', $retval); |
|
| 46 | //return $name . implode('', $retval); |
||
| 47 | } |
||
| 48 | |||
| 49 | /** |
||
| 50 | * @param $str |
||
| 51 | * @param $name |
||
| 52 | * |
||
| 53 | * @return string |
||
| 54 | */ |
||
| 55 | public static function do_pseudo($str, $name) |
||
|
0 ignored issues
–
show
|
|||
| 56 | { |
||
| 57 | if (!\preg_match('/^:([\w-]+)(?:\((.*)\))?$/', $str, $m)) { |
||
| 58 | die('no attribute match!'); |
||
|
0 ignored issues
–
show
|
|||
| 59 | } |
||
| 60 | //var_dump($m); exit; |
||
| 61 | @list($_, $pseudo, $value) = $m; |
||
| 62 | |||
| 63 | switch (true) { |
||
| 64 | #case preg_match('/^\[.*\]$/', $value): $inner = preg_replace('/^\[(.*)\]$/', '\1', self::do_braces($value)); break; |
||
| 65 | default: |
||
| 66 | $inner = self::translate($value); |
||
| 67 | break; |
||
| 68 | } |
||
| 69 | |||
| 70 | // self::translate_part($value) |
||
| 71 | switch ($pseudo) { |
||
| 72 | case 'last': |
||
| 73 | return '[position() = last()]'; |
||
| 74 | case 'first': |
||
| 75 | return '[position() = 1]'; |
||
| 76 | case 'parent': |
||
| 77 | return '[node()]'; |
||
| 78 | case 'contains': |
||
| 79 | return '[contains(., ' . $value . ')]'; |
||
| 80 | case 'nth': |
||
| 81 | return '[position() = ' . $value . ']'; |
||
| 82 | case 'gt': |
||
| 83 | return '[position() > ' . $value . ']'; |
||
| 84 | case 'lt': |
||
| 85 | return '[position() < ' . $value . ']'; |
||
| 86 | case 'eq': |
||
| 87 | return '[position() = ' . $value . ']'; |
||
| 88 | case 'root': |
||
| 89 | return '[not(parent::*)]'; |
||
| 90 | # case 'nth-child': return '[count(preceding-sibling::*) = ' . ($value - 1) . ']'; |
||
| 91 | case 'nth-child': |
||
| 92 | return '[' . self::nth_child($value) . ']'; |
||
| 93 | # case 'nth-last-child': return '[count(following-sibling::*) = ' . ($value - 1) . ']'; |
||
| 94 | case 'nth-last-child': |
||
| 95 | return '[' . self::nth_child($value, true) . ']'; |
||
| 96 | # case 'nth-of-type': return '[position() = ' . $value . ']'; |
||
| 97 | case 'nth-of-type': |
||
| 98 | return '[' . self::nth($value) . ']'; |
||
| 99 | # case 'nth-last-of-type': return $value ? '[position() = last() - ' . ($value - 1) . ']' : '[position() = last()'; |
||
| 100 | case 'nth-last-of-type': |
||
| 101 | return '[' . self::nth($value, true) . ']'; |
||
| 102 | case 'first-child': |
||
| 103 | return '[count(preceding-sibling::*) = 0]'; |
||
| 104 | case 'first-of-type': |
||
| 105 | return '[position() = 1]'; |
||
| 106 | case 'last-child': |
||
| 107 | return '[count(following-sibling::*) = 0]'; |
||
| 108 | case 'last-of-type': |
||
| 109 | return '[position() = last()]'; |
||
| 110 | case 'only-child': |
||
| 111 | return '[count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0]'; |
||
| 112 | case 'only-of-type': |
||
| 113 | return '[last() = 1]'; |
||
| 114 | case 'empty': |
||
| 115 | return '[not(node())]'; |
||
| 116 | case 'not': |
||
| 117 | return '[not(' . self::not($value) . ')]'; |
||
| 118 | # case 'has': return '[' . $inner . ']'; |
||
| 119 | case 'has': |
||
| 120 | return '[' . $inner . ']'; |
||
| 121 | // case 'link': return '[link(.)]'; |
||
| 122 | case 'link': |
||
| 123 | case 'visited': |
||
| 124 | case 'hover': |
||
| 125 | case 'active': |
||
| 126 | return '[' . $pseudo . '(.)]'; |
||
| 127 | |||
| 128 | default: |
||
| 129 | die('unknown pseudo element: ' . $str); |
||
|
0 ignored issues
–
show
|
|||
| 130 | } |
||
| 131 | } |
||
| 132 | |||
| 133 | /** |
||
| 134 | * @param $str |
||
| 135 | * |
||
| 136 | * @return string |
||
| 137 | */ |
||
| 138 | 6 | public static function translate($str) |
|
| 139 | { |
||
| 140 | 6 | $retval = array(); |
|
| 141 | 6 | $re = '/(\((?>[^()]|(?R))*\)|\[(?>[^\[\]]|(?R))*\]|\s*[+~>]\s*| \s*)/'; |
|
| 142 | 6 | $item = ''; |
|
| 143 | |||
| 144 | 6 | $last_nav = null; |
|
| 145 | //echo "\n!" . $str . "!\n"; |
||
| 146 | //var_dump(preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY)); |
||
| 147 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
| 148 | 6 | $token = \trim($token); |
|
| 149 | //echo $token . "-\n"; |
||
| 150 | switch ($token) { |
||
| 151 | 6 | case '>': |
|
| 152 | 6 | case '~': |
|
| 153 | 6 | case '+': |
|
| 154 | 6 | case '': |
|
| 155 | if (!empty($item)) { |
||
| 156 | $retval[] = self::translate_part(\trim($item), $last_nav); |
||
| 157 | } |
||
| 158 | $item = ''; |
||
| 159 | $last_nav = $token; |
||
| 160 | if (!isset($first_nav)) { |
||
| 161 | $first_nav = $token; |
||
| 162 | } else { |
||
| 163 | $retval[] = self::translate_nav(\trim($token)); |
||
| 164 | } |
||
| 165 | break; |
||
| 166 | default: |
||
| 167 | 6 | if (!isset($first_nav)) { |
|
| 168 | 6 | $first_nav = ''; |
|
| 169 | } |
||
| 170 | 6 | $item .= $token; |
|
| 171 | } |
||
| 172 | } |
||
| 173 | // var_dump($first_nav, $retval); exit; |
||
| 174 | |||
| 175 | 6 | $retval[] = self::translate_part(\trim($item), $last_nav); |
|
| 176 | 6 | if (!isset($first_nav)) { |
|
| 177 | $first_nav = ''; |
||
| 178 | } |
||
| 179 | |||
| 180 | 6 | return '.' . self::translate_nav($first_nav) . \implode('', $retval); |
|
| 181 | } |
||
| 182 | |||
| 183 | /** |
||
| 184 | * @param $str |
||
| 185 | * |
||
| 186 | * @return string |
||
| 187 | */ |
||
| 188 | 6 | public static function translate_nav($str) |
|
| 189 | { |
||
| 190 | switch ($str) { |
||
| 191 | 6 | case '+': |
|
| 192 | return '/following-sibling::'; |
||
| 193 | 6 | case '~': |
|
| 194 | return '/following-sibling::'; |
||
| 195 | 6 | case '>': |
|
| 196 | return '/'; |
||
| 197 | 6 | case '': |
|
| 198 | 6 | return '//'; |
|
| 199 | } |
||
| 200 | } |
||
| 201 | |||
| 202 | /** |
||
| 203 | * @param $str |
||
| 204 | * @param bool $last |
||
| 205 | * |
||
| 206 | * @return string |
||
| 207 | */ |
||
| 208 | private static function nth_child($str, $last = false) |
||
| 209 | { |
||
| 210 | list($a, $b) = self::parse_nth($str); |
||
| 211 | |||
| 212 | $tokens = []; |
||
| 213 | if ($last) { |
||
| 214 | if ($a === null) { |
||
| 215 | return "count(following-sibling::*) = " . ($b - 1); |
||
| 216 | } |
||
| 217 | if ($b > 0 && $a >= 0) { |
||
| 218 | $tokens[] = "((last()-position()+1) >= " . $b . ")"; |
||
| 219 | } |
||
| 220 | if ($b > 0 && $a < 0) { |
||
| 221 | $tokens[] = "((last()-position()+1) <= " . $b . ")"; |
||
| 222 | } |
||
| 223 | if ($a != 0 && $b != 0) { |
||
| 224 | $tokens[] = "((((last()-position()+1)-" . $b . ") mod " . abs($a) . ") = 0)"; |
||
| 225 | } |
||
| 226 | if ($a != 0 && $b == 0) { |
||
| 227 | $tokens[] = "((last()-position()+1) mod " . abs($a) . ") = 0"; |
||
| 228 | } |
||
| 229 | } else { |
||
| 230 | if ($a === null) { |
||
| 231 | return "count(preceding-sibling::*) = " . ($b - 1); |
||
| 232 | } |
||
| 233 | if ($b > 0 && $a >= 0) { |
||
| 234 | $tokens[] = "(position() >= " . $b . ")"; |
||
| 235 | } |
||
| 236 | if ($b > 0 && $a < 0) { |
||
| 237 | $tokens[] = "(position() <= " . $b . ")"; |
||
| 238 | } |
||
| 239 | if ($a != 0 && $b != 0) { |
||
| 240 | $tokens[] = "(((position()-" . $b . ") mod " . abs($a) . ") = 0)"; |
||
| 241 | } |
||
| 242 | if ($a != 0 && $b == 0) { |
||
| 243 | $tokens[] = "(position() mod " . abs($a) . ") = 0"; |
||
| 244 | } |
||
| 245 | } |
||
| 246 | |||
| 247 | return \implode(' and ', $tokens); |
||
| 248 | } |
||
| 249 | |||
| 250 | // This stuff is wrong, I need to look at this some more. |
||
| 251 | |||
| 252 | /** |
||
| 253 | * @param $str |
||
| 254 | * |
||
| 255 | * @return array |
||
| 256 | */ |
||
| 257 | private static function parse_nth($str) |
||
| 258 | { |
||
| 259 | switch (true) { |
||
|
0 ignored issues
–
show
|
|||
| 260 | |||
| 261 | case \preg_match('/^(-?\d+)(?:n\+(\d+))$/', $str, $m): |
||
| 262 | return array((int)$m[1], (int)$m[2]); |
||
| 263 | |||
| 264 | case \preg_match('/^n\+(\d+)$/', $str, $m): |
||
| 265 | return array(1, (int)$m[1]); |
||
| 266 | |||
| 267 | case \preg_match('/^-n\+(\d+)$/', $str, $m): |
||
| 268 | return array(-1, (int)$m[1]); |
||
| 269 | |||
| 270 | case \preg_match('/^(\d+)n$/', $str, $m): |
||
| 271 | return array((int)$m[1], 0); |
||
| 272 | |||
| 273 | case \preg_match('/^even$/', $str, $m): |
||
| 274 | return self::parse_nth('2n+0'); |
||
| 275 | |||
| 276 | case \preg_match('/^odd$/', $str, $m): |
||
| 277 | return self::parse_nth('2n+1'); |
||
| 278 | |||
| 279 | case \preg_match('/^(-?\d+)$/', $str, $m): |
||
| 280 | return array(null, (int)$m[1]); |
||
| 281 | |||
| 282 | default: |
||
| 283 | die('no match: ' . $str); |
||
|
0 ignored issues
–
show
|
|||
| 284 | } |
||
| 285 | } |
||
| 286 | |||
| 287 | /** |
||
| 288 | * @param $str |
||
| 289 | * @param bool $last |
||
| 290 | * |
||
| 291 | * @return string |
||
| 292 | */ |
||
| 293 | private static function nth($str, $last = false) |
||
| 294 | { |
||
| 295 | list($a, $b) = self::parse_nth($str); |
||
| 296 | //echo $a . ":" . $b . "\n"; |
||
| 297 | $tokens = []; |
||
| 298 | |||
| 299 | if ($last) { |
||
| 300 | if ($a === null) { |
||
| 301 | return 'position() = last() - ' . ($b - 1); |
||
| 302 | } |
||
| 303 | |||
| 304 | if ($b > 0 && $a >= 0) { |
||
| 305 | $tokens[] = '((last()-position()+1) >= ' . $b . ')'; |
||
| 306 | } |
||
| 307 | |||
| 308 | if ($b > 0 && $a < 0) { |
||
| 309 | $tokens[] = '((last()-position()+1) <= ' . $b . ')'; |
||
| 310 | } |
||
| 311 | |||
| 312 | if ($a != 0 && $b != 0) { |
||
| 313 | $tokens[] = '((((last()-position()+1)-' . $b . ') mod ' . abs($a) . ') = 0)'; |
||
| 314 | } |
||
| 315 | |||
| 316 | if ($a != 0 && $b == 0) { |
||
| 317 | $tokens[] = '((last()-position()+1) mod ' . abs($a) . ') = 0'; |
||
| 318 | } |
||
| 319 | |||
| 320 | } else { |
||
| 321 | |||
| 322 | if ($a === null) { |
||
| 323 | return 'position() = ' . $b; |
||
| 324 | } |
||
| 325 | |||
| 326 | if ($b > 0 && $a >= 0) { |
||
| 327 | $tokens[] = '(position() >= ' . $b . ')'; |
||
| 328 | } |
||
| 329 | |||
| 330 | if ($b > 0 && $a < 0) { |
||
| 331 | $tokens[] = '(position() <= ' . $b . ')'; |
||
| 332 | } |
||
| 333 | |||
| 334 | if ($a != 0 && $b != 0) { |
||
| 335 | $tokens[] = '(((position()-' . $b . ') mod ' . abs($a) . ') = 0)'; |
||
| 336 | } |
||
| 337 | |||
| 338 | if ($a != 0 && $b == 0) { |
||
| 339 | $tokens[] = '(position() mod ' . abs($a) . ') = 0'; |
||
| 340 | } |
||
| 341 | |||
| 342 | } |
||
| 343 | |||
| 344 | return \implode(' and ', $tokens); |
||
| 345 | } |
||
| 346 | |||
| 347 | /** |
||
| 348 | * @param $str |
||
| 349 | * |
||
| 350 | * @return string |
||
| 351 | */ |
||
| 352 | private static function not($str) |
||
| 353 | { |
||
| 354 | switch (true) { |
||
|
0 ignored issues
–
show
|
|||
| 355 | case \preg_match('/^\.(\w+)$/', $str, $m): |
||
| 356 | return self::do_class($str); |
||
| 357 | case \preg_match('/^\#(\w+)$/', $str, $m): |
||
| 358 | return self::do_id($str); |
||
| 359 | case \preg_match('/^(\w+)$/', $str, $m): |
||
| 360 | return "self::" . $str; |
||
| 361 | case \preg_match('/^\[(.*)\]$/', $str, $m): |
||
| 362 | return substr(self::do_braces($str), 1, -1); |
||
| 363 | default: |
||
| 364 | return self::translate($str); |
||
| 365 | } |
||
| 366 | } |
||
| 367 | |||
| 368 | /** |
||
| 369 | * @param $str |
||
| 370 | * |
||
| 371 | * @return string |
||
| 372 | */ |
||
| 373 | 1 | public static function do_class($str) |
|
| 374 | { |
||
| 375 | 1 | if (!\preg_match('/^\.(.*)/', $str, $m)) { |
|
| 376 | die('no attribute match!'); |
||
|
0 ignored issues
–
show
|
|||
| 377 | } |
||
| 378 | |||
| 379 | 1 | return "contains(concat(' ', normalize-space(@class), ' '), ' " . $m[1] . " ')"; |
|
| 380 | } |
||
| 381 | |||
| 382 | /** |
||
| 383 | * @param $str |
||
| 384 | * |
||
| 385 | * @return string |
||
| 386 | */ |
||
| 387 | 1 | public static function do_id($str) |
|
| 388 | { |
||
| 389 | 1 | if (!\preg_match('/^#(.*)/', $str, $m)) { |
|
| 390 | die('no attribute match!'); |
||
|
0 ignored issues
–
show
|
|||
| 391 | } |
||
| 392 | |||
| 393 | 1 | return "@id = '" . $m[1] . '\''; |
|
| 394 | } |
||
| 395 | |||
| 396 | /** |
||
| 397 | * @param $str |
||
| 398 | * |
||
| 399 | * @return string |
||
| 400 | */ |
||
| 401 | 2 | public static function do_braces($str) |
|
| 402 | { |
||
| 403 | 2 | $re = '/("(?>[^"]|(?R))*\)"|\'(?>[^\']|(?R))*\'|[~^$*|]?=)\s*/'; |
|
| 404 | |||
| 405 | 2 | $tokens = \preg_split($re, \substr($str, 1, \strlen($str) - 2), 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
| 406 | |||
| 407 | 2 | $attr = \trim(\array_shift($tokens)); |
|
| 408 | |||
| 409 | 2 | if (!$op = @\trim(\array_shift($tokens))) { |
|
| 410 | switch (true) { |
||
|
0 ignored issues
–
show
|
|||
| 411 | 1 | case \preg_match('/^\d+$/', $attr): |
|
| 412 | return "[count(preceding-sibling::*) = " . ($attr - 1) . "]"; // [2] -> [count(preceding-sibling::*) = 1] |
||
| 413 | default: |
||
| 414 | 1 | return "[@" . $attr . "]"; // [foo] => [@foo] |
|
| 415 | } |
||
| 416 | } |
||
| 417 | switch (true) { |
||
|
0 ignored issues
–
show
|
|||
| 418 | 1 | case \preg_match('/^(text|comment)$/', $attr, $m): |
|
| 419 | 1 | $attr = $m[1] . "()"; |
|
| 420 | 1 | break; |
|
| 421 | case !\preg_match('/[@(]/', $attr): |
||
| 422 | $attr = '@' . $attr; |
||
| 423 | break; |
||
| 424 | } |
||
| 425 | |||
| 426 | 1 | $value = @\trim(\array_shift($tokens)); |
|
| 427 | 1 | if (!\preg_match('/^["\'].*["\']$/', $value)) { |
|
| 428 | $value = "'" . $value . "'"; |
||
| 429 | } |
||
| 430 | |||
| 431 | switch ($op) { |
||
| 432 | 1 | case '*=': |
|
| 433 | 1 | return "[contains(" . $attr . ", " . $value . ")]"; |
|
| 434 | case '^=': |
||
| 435 | return "[starts-with(" . $attr . ", " . $value . ")]"; |
||
| 436 | case '~=': |
||
| 437 | return "[contains(concat(\" \", " . $attr . ", \" \"),concat(\" \", " . $value . ", \" \"))]"; |
||
| 438 | case '$=': |
||
| 439 | return "[substring(" . $attr . ", string-length(" . $attr . ") - string-length(" . $value . ") + 1, string-length(" . $value . ")) = " . $value . "]"; |
||
| 440 | case '|=': |
||
| 441 | return "[" . $attr . " = " . $value . " or starts-with(" . $attr . ", concat(" . $value . ", '-'))]"; |
||
| 442 | case '=': |
||
| 443 | return "[" . $attr . " = " . $value . "]"; |
||
| 444 | default: |
||
| 445 | die('unknown op: ' . $op); |
||
|
0 ignored issues
–
show
|
|||
| 446 | } |
||
| 447 | } |
||
| 448 | |||
| 449 | /** |
||
| 450 | * @param $str |
||
| 451 | * |
||
| 452 | * @return mixed|string |
||
| 453 | */ |
||
| 454 | 6 | public static function xpath_for($str) |
|
| 455 | { |
||
| 456 | 6 | if (self::is_xpath($str)) { |
|
| 457 | 1 | return $str; |
|
| 458 | } |
||
| 459 | 6 | $str = \preg_replace('/\b(text|comment)\(\)/', '\1', $str); |
|
| 460 | 6 | $retval = array(); |
|
| 461 | 6 | foreach (self::get_expressions($str) as $expr) { |
|
| 462 | 6 | $retval[] = self::translate($expr); |
|
| 463 | } |
||
| 464 | |||
| 465 | 6 | return \implode('|', $retval); |
|
| 466 | } |
||
| 467 | |||
| 468 | /** |
||
| 469 | * @param $str |
||
| 470 | * |
||
| 471 | * @return int |
||
| 472 | */ |
||
| 473 | 6 | private static function is_xpath($str) |
|
| 474 | { |
||
| 475 | 6 | return \preg_match('/^(?:string)?\(?\.?\//', $str); |
|
| 476 | } |
||
| 477 | |||
| 478 | /** |
||
| 479 | * @param $str |
||
| 480 | * |
||
| 481 | * @return array |
||
| 482 | */ |
||
| 483 | 6 | private static function get_expressions($str) |
|
| 484 | { |
||
| 485 | 6 | $retval = array(); |
|
| 486 | 6 | $re = '/(\((?>[^()]|(?R))*\)|\[(?>[^\[\]]|(?R))*\]|,)/'; |
|
| 487 | 6 | $item = ''; |
|
| 488 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
| 489 | 6 | if (',' === $token) { |
|
| 490 | $retval[] = \trim($item); |
||
| 491 | $item = ''; |
||
| 492 | } else { |
||
| 493 | 6 | $item .= $token; |
|
| 494 | } |
||
| 495 | } |
||
| 496 | 6 | $retval[] = \trim($item); |
|
| 497 | |||
| 498 | 6 | return $retval; |
|
| 499 | } |
||
| 500 | } |
||
| 501 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.