1 | <?php |
||
2 | |||
3 | namespace Bavix\AdvancedHtmlDom; |
||
4 | |||
5 | class CSS |
||
6 | { |
||
7 | /** |
||
8 | * @param $str |
||
9 | * @param string $last_nav |
||
10 | * |
||
11 | * @return string |
||
12 | */ |
||
13 | 6 | public static function translate_part($str, $last_nav = '') |
|
14 | { |
||
15 | 6 | $str = \preg_replace('/:contains\(([^()]*)\)/', '[text*=\\1]', $str); // quick and dirty contains fix |
|
16 | 6 | $retval = array(); |
|
17 | 6 | $re = '/(:(?:nth-last-child|nth-of-type|nth-last-of-type|first-child|last-child|first-of-type|last-of-type|only-child|only-of-type|nth-child|first|last|gt|lt|eq|root|nth|empty|not|has|contains|parent|link|visited|hover|active)(?:\((?>[^()]|(?R))*\))?|\[(?>[^\[\]]|(?R))*\]|[#.][\w-]+)/'; |
|
18 | 6 | $name = '*'; |
|
19 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
20 | switch (true) { |
||
21 | 6 | case 0 === \strpos($token, ':'): |
|
22 | // case preg_match('/^:/', $token): |
||
23 | $retval[] = self::do_pseudo($token, $name); |
||
24 | break; |
||
25 | 6 | case 0 === \strpos($token, '['): |
|
26 | // case preg_match('/^\[/', $token): |
||
27 | 2 | $retval[] = self::do_braces($token); |
|
28 | 2 | break; |
|
29 | 6 | case 0 === \strpos($token, '#'): |
|
30 | // case preg_match('/^#/', $token): |
||
31 | 1 | $retval[] = "[" . self::do_id($token) . "]"; |
|
32 | 1 | break; |
|
33 | 6 | case 0 === \strpos($token, '.'): |
|
34 | // case preg_match('/^\./', $token): |
||
35 | 1 | $retval[] = "[" . self::do_class($token) . "]"; |
|
36 | 1 | break; |
|
37 | default: |
||
38 | 6 | $name = $token; |
|
39 | } |
||
40 | } |
||
41 | 6 | if (\in_array($name, array('text', 'comment'))) { |
|
42 | $name .= '()'; |
||
43 | } |
||
44 | |||
45 | 6 | return ($last_nav === '+' ? "*[1]/self::" : '') . $name . \implode('', $retval); |
|
46 | //return $name . implode('', $retval); |
||
47 | } |
||
48 | |||
49 | /** |
||
50 | * @param $str |
||
51 | * @param $name |
||
52 | * |
||
53 | * @return string |
||
54 | */ |
||
55 | public static function do_pseudo($str, $name) |
||
0 ignored issues
–
show
|
|||
56 | { |
||
57 | if (!\preg_match('/^:([\w-]+)(?:\((.*)\))?$/', $str, $m)) { |
||
58 | die('no attribute match!'); |
||
0 ignored issues
–
show
|
|||
59 | } |
||
60 | //var_dump($m); exit; |
||
61 | @list($_, $pseudo, $value) = $m; |
||
62 | |||
63 | switch (true) { |
||
64 | #case preg_match('/^\[.*\]$/', $value): $inner = preg_replace('/^\[(.*)\]$/', '\1', self::do_braces($value)); break; |
||
65 | default: |
||
66 | $inner = self::translate($value); |
||
67 | break; |
||
68 | } |
||
69 | |||
70 | // self::translate_part($value) |
||
71 | switch ($pseudo) { |
||
72 | case 'last': |
||
73 | return '[position() = last()]'; |
||
74 | case 'first': |
||
75 | return '[position() = 1]'; |
||
76 | case 'parent': |
||
77 | return '[node()]'; |
||
78 | case 'contains': |
||
79 | return '[contains(., ' . $value . ')]'; |
||
80 | case 'nth': |
||
81 | return '[position() = ' . $value . ']'; |
||
82 | case 'gt': |
||
83 | return '[position() > ' . $value . ']'; |
||
84 | case 'lt': |
||
85 | return '[position() < ' . $value . ']'; |
||
86 | case 'eq': |
||
87 | return '[position() = ' . $value . ']'; |
||
88 | case 'root': |
||
89 | return '[not(parent::*)]'; |
||
90 | # case 'nth-child': return '[count(preceding-sibling::*) = ' . ($value - 1) . ']'; |
||
91 | case 'nth-child': |
||
92 | return '[' . self::nth_child($value) . ']'; |
||
93 | # case 'nth-last-child': return '[count(following-sibling::*) = ' . ($value - 1) . ']'; |
||
94 | case 'nth-last-child': |
||
95 | return '[' . self::nth_child($value, true) . ']'; |
||
96 | # case 'nth-of-type': return '[position() = ' . $value . ']'; |
||
97 | case 'nth-of-type': |
||
98 | return '[' . self::nth($value) . ']'; |
||
99 | # case 'nth-last-of-type': return $value ? '[position() = last() - ' . ($value - 1) . ']' : '[position() = last()'; |
||
100 | case 'nth-last-of-type': |
||
101 | return '[' . self::nth($value, true) . ']'; |
||
102 | case 'first-child': |
||
103 | return '[count(preceding-sibling::*) = 0]'; |
||
104 | case 'first-of-type': |
||
105 | return '[position() = 1]'; |
||
106 | case 'last-child': |
||
107 | return '[count(following-sibling::*) = 0]'; |
||
108 | case 'last-of-type': |
||
109 | return '[position() = last()]'; |
||
110 | case 'only-child': |
||
111 | return '[count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0]'; |
||
112 | case 'only-of-type': |
||
113 | return '[last() = 1]'; |
||
114 | case 'empty': |
||
115 | return '[not(node())]'; |
||
116 | case 'not': |
||
117 | return '[not(' . self::not($value) . ')]'; |
||
118 | # case 'has': return '[' . $inner . ']'; |
||
119 | case 'has': |
||
120 | return '[' . $inner . ']'; |
||
121 | // case 'link': return '[link(.)]'; |
||
122 | case 'link': |
||
123 | case 'visited': |
||
124 | case 'hover': |
||
125 | case 'active': |
||
126 | return '[' . $pseudo . '(.)]'; |
||
127 | |||
128 | default: |
||
129 | die('unknown pseudo element: ' . $str); |
||
0 ignored issues
–
show
|
|||
130 | } |
||
131 | } |
||
132 | |||
133 | /** |
||
134 | * @param $str |
||
135 | * |
||
136 | * @return string |
||
137 | */ |
||
138 | 6 | public static function translate($str) |
|
139 | { |
||
140 | 6 | $retval = array(); |
|
141 | 6 | $re = '/(\((?>[^()]|(?R))*\)|\[(?>[^\[\]]|(?R))*\]|\s*[+~>]\s*| \s*)/'; |
|
142 | 6 | $item = ''; |
|
143 | |||
144 | 6 | $last_nav = null; |
|
145 | //echo "\n!" . $str . "!\n"; |
||
146 | //var_dump(preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY)); |
||
147 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
148 | 6 | $token = \trim($token); |
|
149 | //echo $token . "-\n"; |
||
150 | switch ($token) { |
||
151 | 6 | case '>': |
|
152 | 6 | case '~': |
|
153 | 6 | case '+': |
|
154 | 6 | case '': |
|
155 | if (!empty($item)) { |
||
156 | $retval[] = self::translate_part(\trim($item), $last_nav); |
||
157 | } |
||
158 | $item = ''; |
||
159 | $last_nav = $token; |
||
160 | if (!isset($first_nav)) { |
||
161 | $first_nav = $token; |
||
162 | } else { |
||
163 | $retval[] = self::translate_nav(\trim($token)); |
||
164 | } |
||
165 | break; |
||
166 | default: |
||
167 | 6 | if (!isset($first_nav)) { |
|
168 | 6 | $first_nav = ''; |
|
169 | } |
||
170 | 6 | $item .= $token; |
|
171 | } |
||
172 | } |
||
173 | // var_dump($first_nav, $retval); exit; |
||
174 | |||
175 | 6 | $retval[] = self::translate_part(\trim($item), $last_nav); |
|
176 | 6 | if (!isset($first_nav)) { |
|
177 | $first_nav = ''; |
||
178 | } |
||
179 | |||
180 | 6 | return '.' . self::translate_nav($first_nav) . \implode('', $retval); |
|
181 | } |
||
182 | |||
183 | /** |
||
184 | * @param $str |
||
185 | * |
||
186 | * @return string |
||
187 | */ |
||
188 | 6 | public static function translate_nav($str) |
|
189 | { |
||
190 | switch ($str) { |
||
191 | 6 | case '+': |
|
192 | return '/following-sibling::'; |
||
193 | 6 | case '~': |
|
194 | return '/following-sibling::'; |
||
195 | 6 | case '>': |
|
196 | return '/'; |
||
197 | 6 | case '': |
|
198 | 6 | return '//'; |
|
199 | } |
||
200 | } |
||
201 | |||
202 | /** |
||
203 | * @param $str |
||
204 | * @param bool $last |
||
205 | * |
||
206 | * @return string |
||
207 | */ |
||
208 | private static function nth_child($str, $last = false) |
||
209 | { |
||
210 | list($a, $b) = self::parse_nth($str); |
||
211 | |||
212 | $tokens = []; |
||
213 | if ($last) { |
||
214 | if ($a === null) { |
||
215 | return "count(following-sibling::*) = " . ($b - 1); |
||
216 | } |
||
217 | if ($b > 0 && $a >= 0) { |
||
218 | $tokens[] = "((last()-position()+1) >= " . $b . ")"; |
||
219 | } |
||
220 | if ($b > 0 && $a < 0) { |
||
221 | $tokens[] = "((last()-position()+1) <= " . $b . ")"; |
||
222 | } |
||
223 | if ($a != 0 && $b != 0) { |
||
224 | $tokens[] = "((((last()-position()+1)-" . $b . ") mod " . abs($a) . ") = 0)"; |
||
225 | } |
||
226 | if ($a != 0 && $b == 0) { |
||
227 | $tokens[] = "((last()-position()+1) mod " . abs($a) . ") = 0"; |
||
228 | } |
||
229 | } else { |
||
230 | if ($a === null) { |
||
231 | return "count(preceding-sibling::*) = " . ($b - 1); |
||
232 | } |
||
233 | if ($b > 0 && $a >= 0) { |
||
234 | $tokens[] = "(position() >= " . $b . ")"; |
||
235 | } |
||
236 | if ($b > 0 && $a < 0) { |
||
237 | $tokens[] = "(position() <= " . $b . ")"; |
||
238 | } |
||
239 | if ($a != 0 && $b != 0) { |
||
240 | $tokens[] = "(((position()-" . $b . ") mod " . abs($a) . ") = 0)"; |
||
241 | } |
||
242 | if ($a != 0 && $b == 0) { |
||
243 | $tokens[] = "(position() mod " . abs($a) . ") = 0"; |
||
244 | } |
||
245 | } |
||
246 | |||
247 | return \implode(' and ', $tokens); |
||
248 | } |
||
249 | |||
250 | // This stuff is wrong, I need to look at this some more. |
||
251 | |||
252 | /** |
||
253 | * @param $str |
||
254 | * |
||
255 | * @return array |
||
256 | */ |
||
257 | private static function parse_nth($str) |
||
258 | { |
||
259 | switch (true) { |
||
0 ignored issues
–
show
|
|||
260 | |||
261 | case \preg_match('/^(-?\d+)(?:n\+(\d+))$/', $str, $m): |
||
262 | return array((int)$m[1], (int)$m[2]); |
||
263 | |||
264 | case \preg_match('/^n\+(\d+)$/', $str, $m): |
||
265 | return array(1, (int)$m[1]); |
||
266 | |||
267 | case \preg_match('/^-n\+(\d+)$/', $str, $m): |
||
268 | return array(-1, (int)$m[1]); |
||
269 | |||
270 | case \preg_match('/^(\d+)n$/', $str, $m): |
||
271 | return array((int)$m[1], 0); |
||
272 | |||
273 | case \preg_match('/^even$/', $str, $m): |
||
274 | return self::parse_nth('2n+0'); |
||
275 | |||
276 | case \preg_match('/^odd$/', $str, $m): |
||
277 | return self::parse_nth('2n+1'); |
||
278 | |||
279 | case \preg_match('/^(-?\d+)$/', $str, $m): |
||
280 | return array(null, (int)$m[1]); |
||
281 | |||
282 | default: |
||
283 | die('no match: ' . $str); |
||
0 ignored issues
–
show
|
|||
284 | } |
||
285 | } |
||
286 | |||
287 | /** |
||
288 | * @param $str |
||
289 | * @param bool $last |
||
290 | * |
||
291 | * @return string |
||
292 | */ |
||
293 | private static function nth($str, $last = false) |
||
294 | { |
||
295 | list($a, $b) = self::parse_nth($str); |
||
296 | //echo $a . ":" . $b . "\n"; |
||
297 | $tokens = []; |
||
298 | |||
299 | if ($last) { |
||
300 | if ($a === null) { |
||
301 | return 'position() = last() - ' . ($b - 1); |
||
302 | } |
||
303 | |||
304 | if ($b > 0 && $a >= 0) { |
||
305 | $tokens[] = '((last()-position()+1) >= ' . $b . ')'; |
||
306 | } |
||
307 | |||
308 | if ($b > 0 && $a < 0) { |
||
309 | $tokens[] = '((last()-position()+1) <= ' . $b . ')'; |
||
310 | } |
||
311 | |||
312 | if ($a != 0 && $b != 0) { |
||
313 | $tokens[] = '((((last()-position()+1)-' . $b . ') mod ' . abs($a) . ') = 0)'; |
||
314 | } |
||
315 | |||
316 | if ($a != 0 && $b == 0) { |
||
317 | $tokens[] = '((last()-position()+1) mod ' . abs($a) . ') = 0'; |
||
318 | } |
||
319 | |||
320 | } else { |
||
321 | |||
322 | if ($a === null) { |
||
323 | return 'position() = ' . $b; |
||
324 | } |
||
325 | |||
326 | if ($b > 0 && $a >= 0) { |
||
327 | $tokens[] = '(position() >= ' . $b . ')'; |
||
328 | } |
||
329 | |||
330 | if ($b > 0 && $a < 0) { |
||
331 | $tokens[] = '(position() <= ' . $b . ')'; |
||
332 | } |
||
333 | |||
334 | if ($a != 0 && $b != 0) { |
||
335 | $tokens[] = '(((position()-' . $b . ') mod ' . abs($a) . ') = 0)'; |
||
336 | } |
||
337 | |||
338 | if ($a != 0 && $b == 0) { |
||
339 | $tokens[] = '(position() mod ' . abs($a) . ') = 0'; |
||
340 | } |
||
341 | |||
342 | } |
||
343 | |||
344 | return \implode(' and ', $tokens); |
||
345 | } |
||
346 | |||
347 | /** |
||
348 | * @param $str |
||
349 | * |
||
350 | * @return string |
||
351 | */ |
||
352 | private static function not($str) |
||
353 | { |
||
354 | switch (true) { |
||
0 ignored issues
–
show
|
|||
355 | case \preg_match('/^\.(\w+)$/', $str, $m): |
||
356 | return self::do_class($str); |
||
357 | case \preg_match('/^\#(\w+)$/', $str, $m): |
||
358 | return self::do_id($str); |
||
359 | case \preg_match('/^(\w+)$/', $str, $m): |
||
360 | return "self::" . $str; |
||
361 | case \preg_match('/^\[(.*)\]$/', $str, $m): |
||
362 | return substr(self::do_braces($str), 1, -1); |
||
363 | default: |
||
364 | return self::translate($str); |
||
365 | } |
||
366 | } |
||
367 | |||
368 | /** |
||
369 | * @param $str |
||
370 | * |
||
371 | * @return string |
||
372 | */ |
||
373 | 1 | public static function do_class($str) |
|
374 | { |
||
375 | 1 | if (!\preg_match('/^\.(.*)/', $str, $m)) { |
|
376 | die('no attribute match!'); |
||
0 ignored issues
–
show
|
|||
377 | } |
||
378 | |||
379 | 1 | return "contains(concat(' ', normalize-space(@class), ' '), ' " . $m[1] . " ')"; |
|
380 | } |
||
381 | |||
382 | /** |
||
383 | * @param $str |
||
384 | * |
||
385 | * @return string |
||
386 | */ |
||
387 | 1 | public static function do_id($str) |
|
388 | { |
||
389 | 1 | if (!\preg_match('/^#(.*)/', $str, $m)) { |
|
390 | die('no attribute match!'); |
||
0 ignored issues
–
show
|
|||
391 | } |
||
392 | |||
393 | 1 | return "@id = '" . $m[1] . '\''; |
|
394 | } |
||
395 | |||
396 | /** |
||
397 | * @param $str |
||
398 | * |
||
399 | * @return string |
||
400 | */ |
||
401 | 2 | public static function do_braces($str) |
|
402 | { |
||
403 | 2 | $re = '/("(?>[^"]|(?R))*\)"|\'(?>[^\']|(?R))*\'|[~^$*|]?=)\s*/'; |
|
404 | |||
405 | 2 | $tokens = \preg_split($re, \substr($str, 1, \strlen($str) - 2), 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); |
|
406 | |||
407 | 2 | $attr = \trim(\array_shift($tokens)); |
|
408 | |||
409 | 2 | if (!$op = @\trim(\array_shift($tokens))) { |
|
410 | switch (true) { |
||
0 ignored issues
–
show
|
|||
411 | 1 | case \preg_match('/^\d+$/', $attr): |
|
412 | return "[count(preceding-sibling::*) = " . ($attr - 1) . "]"; // [2] -> [count(preceding-sibling::*) = 1] |
||
413 | default: |
||
414 | 1 | return "[@" . $attr . "]"; // [foo] => [@foo] |
|
415 | } |
||
416 | } |
||
417 | switch (true) { |
||
0 ignored issues
–
show
|
|||
418 | 1 | case \preg_match('/^(text|comment)$/', $attr, $m): |
|
419 | 1 | $attr = $m[1] . "()"; |
|
420 | 1 | break; |
|
421 | case !\preg_match('/[@(]/', $attr): |
||
422 | $attr = '@' . $attr; |
||
423 | break; |
||
424 | } |
||
425 | |||
426 | 1 | $value = @\trim(\array_shift($tokens)); |
|
427 | 1 | if (!\preg_match('/^["\'].*["\']$/', $value)) { |
|
428 | $value = "'" . $value . "'"; |
||
429 | } |
||
430 | |||
431 | switch ($op) { |
||
432 | 1 | case '*=': |
|
433 | 1 | return "[contains(" . $attr . ", " . $value . ")]"; |
|
434 | case '^=': |
||
435 | return "[starts-with(" . $attr . ", " . $value . ")]"; |
||
436 | case '~=': |
||
437 | return "[contains(concat(\" \", " . $attr . ", \" \"),concat(\" \", " . $value . ", \" \"))]"; |
||
438 | case '$=': |
||
439 | return "[substring(" . $attr . ", string-length(" . $attr . ") - string-length(" . $value . ") + 1, string-length(" . $value . ")) = " . $value . "]"; |
||
440 | case '|=': |
||
441 | return "[" . $attr . " = " . $value . " or starts-with(" . $attr . ", concat(" . $value . ", '-'))]"; |
||
442 | case '=': |
||
443 | return "[" . $attr . " = " . $value . "]"; |
||
444 | default: |
||
445 | die('unknown op: ' . $op); |
||
0 ignored issues
–
show
|
|||
446 | } |
||
447 | } |
||
448 | |||
449 | /** |
||
450 | * @param $str |
||
451 | * |
||
452 | * @return mixed|string |
||
453 | */ |
||
454 | 6 | public static function xpath_for($str) |
|
455 | { |
||
456 | 6 | if (self::is_xpath($str)) { |
|
457 | 1 | return $str; |
|
458 | } |
||
459 | 6 | $str = \preg_replace('/\b(text|comment)\(\)/', '\1', $str); |
|
460 | 6 | $retval = array(); |
|
461 | 6 | foreach (self::get_expressions($str) as $expr) { |
|
462 | 6 | $retval[] = self::translate($expr); |
|
463 | } |
||
464 | |||
465 | 6 | return \implode('|', $retval); |
|
466 | } |
||
467 | |||
468 | /** |
||
469 | * @param $str |
||
470 | * |
||
471 | * @return int |
||
472 | */ |
||
473 | 6 | private static function is_xpath($str) |
|
474 | { |
||
475 | 6 | return \preg_match('/^(?:string)?\(?\.?\//', $str); |
|
476 | } |
||
477 | |||
478 | /** |
||
479 | * @param $str |
||
480 | * |
||
481 | * @return array |
||
482 | */ |
||
483 | 6 | private static function get_expressions($str) |
|
484 | { |
||
485 | 6 | $retval = array(); |
|
486 | 6 | $re = '/(\((?>[^()]|(?R))*\)|\[(?>[^\[\]]|(?R))*\]|,)/'; |
|
487 | 6 | $item = ''; |
|
488 | 6 | foreach (\preg_split($re, $str, 0, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $token) { |
|
489 | 6 | if (',' === $token) { |
|
490 | $retval[] = \trim($item); |
||
491 | $item = ''; |
||
492 | } else { |
||
493 | 6 | $item .= $token; |
|
494 | } |
||
495 | } |
||
496 | 6 | $retval[] = \trim($item); |
|
497 | |||
498 | 6 | return $retval; |
|
499 | } |
||
500 | } |
||
501 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.