1 | <?php |
||
2 | /** |
||
3 | * @author Niels A.D. |
||
4 | * @author Todd Burry <[email protected]> |
||
5 | * @copyright 2010 Niels A.D., 2014 Todd Burry |
||
6 | * @license http://opensource.org/licenses/LGPL-2.1 LGPL-2.1 |
||
7 | * @package pQuery |
||
8 | */ |
||
9 | |||
10 | namespace pQuery; |
||
11 | |||
12 | /** |
||
13 | * Tokenizes a css selector query |
||
14 | */ |
||
15 | class CSSQueryTokenizer extends TokenizerBase { |
||
16 | |||
17 | /** |
||
18 | * Opening bracket token, used for "[" |
||
19 | */ |
||
20 | const TOK_BRACKET_OPEN = 100; |
||
21 | /** |
||
22 | * Closing bracket token, used for "]" |
||
23 | */ |
||
24 | const TOK_BRACKET_CLOSE = 101; |
||
25 | /** |
||
26 | * Opening brace token, used for "(" |
||
27 | */ |
||
28 | const TOK_BRACE_OPEN = 102; |
||
29 | /** |
||
30 | * Closing brace token, used for ")" |
||
31 | */ |
||
32 | const TOK_BRACE_CLOSE = 103; |
||
33 | /** |
||
34 | * String token |
||
35 | */ |
||
36 | const TOK_STRING = 104; |
||
37 | /** |
||
38 | * Colon token, used for ":" |
||
39 | */ |
||
40 | const TOK_COLON = 105; |
||
41 | /** |
||
42 | * Comma token, used for "," |
||
43 | */ |
||
44 | const TOK_COMMA = 106; |
||
45 | /** |
||
46 | * "Not" token, used for "!" |
||
47 | */ |
||
48 | const TOK_NOT = 107; |
||
49 | |||
50 | /** |
||
51 | * "All" token, used for "*" in query |
||
52 | */ |
||
53 | const TOK_ALL = 108; |
||
54 | /** |
||
55 | * Pipe token, used for "|" |
||
56 | */ |
||
57 | const TOK_PIPE = 109; |
||
58 | /** |
||
59 | * Plus token, used for "+" |
||
60 | */ |
||
61 | const TOK_PLUS = 110; |
||
62 | /** |
||
63 | * "Sibling" token, used for "~" in query |
||
64 | */ |
||
65 | const TOK_SIBLING = 111; |
||
66 | /** |
||
67 | * Class token, used for "." in query |
||
68 | */ |
||
69 | const TOK_CLASS = 112; |
||
70 | /** |
||
71 | * ID token, used for "#" in query |
||
72 | */ |
||
73 | const TOK_ID = 113; |
||
74 | /** |
||
75 | * Child token, used for ">" in query |
||
76 | */ |
||
77 | const TOK_CHILD = 114; |
||
78 | |||
79 | /** |
||
80 | * Attribute compare prefix token, used for "|=" |
||
81 | */ |
||
82 | const TOK_COMPARE_PREFIX = 115; |
||
83 | /** |
||
84 | * Attribute contains token, used for "*=" |
||
85 | */ |
||
86 | const TOK_COMPARE_CONTAINS = 116; |
||
87 | /** |
||
88 | * Attribute contains word token, used for "~=" |
||
89 | */ |
||
90 | const TOK_COMPARE_CONTAINS_WORD = 117; |
||
91 | /** |
||
92 | * Attribute compare end token, used for "$=" |
||
93 | */ |
||
94 | const TOK_COMPARE_ENDS = 118; |
||
95 | /** |
||
96 | * Attribute equals token, used for "=" |
||
97 | */ |
||
98 | const TOK_COMPARE_EQUALS = 119; |
||
99 | /** |
||
100 | * Attribute not equal token, used for "!=" |
||
101 | */ |
||
102 | const TOK_COMPARE_NOT_EQUAL = 120; |
||
103 | /** |
||
104 | * Attribute compare bigger than token, used for ">=" |
||
105 | */ |
||
106 | const TOK_COMPARE_BIGGER_THAN = 121; |
||
107 | /** |
||
108 | * Attribute compare smaller than token, used for "<=" |
||
109 | */ |
||
110 | const TOK_COMPARE_SMALLER_THAN = 122; |
||
111 | /** |
||
112 | * Attribute compare with regex, used for "%=" |
||
113 | */ |
||
114 | const TOK_COMPARE_REGEX = 123; |
||
115 | /** |
||
116 | * Attribute compare start token, used for "^=" |
||
117 | */ |
||
118 | const TOK_COMPARE_STARTS = 124; |
||
119 | |||
120 | /** |
||
121 | * Sets query identifiers |
||
122 | * @see TokenizerBase::$identifiers |
||
123 | * @access private |
||
124 | */ |
||
125 | var $identifiers = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890_-?'; |
||
126 | |||
127 | /** |
||
128 | * Map characters to match their tokens |
||
129 | * @see TokenizerBase::$custom_char_map |
||
130 | * @access private |
||
131 | */ |
||
132 | var $custom_char_map = array( |
||
133 | '.' => self::TOK_CLASS, |
||
134 | '#' => self::TOK_ID, |
||
135 | ',' => self::TOK_COMMA, |
||
136 | '>' => 'parse_gt',//self::TOK_CHILD, |
||
137 | |||
138 | '+' => self::TOK_PLUS, |
||
139 | '~' => 'parse_sibling', |
||
140 | |||
141 | '|' => 'parse_pipe', |
||
142 | '*' => 'parse_star', |
||
143 | '$' => 'parse_compare', |
||
144 | '=' => self::TOK_COMPARE_EQUALS, |
||
145 | '!' => 'parse_not', |
||
146 | '%' => 'parse_compare', |
||
147 | '^' => 'parse_compare', |
||
148 | '<' => 'parse_compare', |
||
149 | |||
150 | '"' => 'parse_string', |
||
151 | "'" => 'parse_string', |
||
152 | '(' => self::TOK_BRACE_OPEN, |
||
153 | ')' => self::TOK_BRACE_CLOSE, |
||
154 | '[' => self::TOK_BRACKET_OPEN, |
||
155 | ']' => self::TOK_BRACKET_CLOSE, |
||
156 | ':' => self::TOK_COLON |
||
157 | ); |
||
158 | |||
159 | /** |
||
160 | * Parse ">" character |
||
161 | * @internal Could be {@link TOK_CHILD} or {@link TOK_COMPARE_BIGGER_THAN} |
||
162 | * @return int |
||
163 | */ |
||
164 | protected function parse_gt() { |
||
165 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
||
166 | ++$this->pos; |
||
167 | return ($this->token = self::TOK_COMPARE_BIGGER_THAN); |
||
168 | } else { |
||
169 | return ($this->token = self::TOK_CHILD); |
||
170 | } |
||
171 | } |
||
172 | |||
173 | /** |
||
174 | * Parse "~" character |
||
175 | * @internal Could be {@link TOK_SIBLING} or {@link TOK_COMPARE_CONTAINS_WORD} |
||
176 | * @return int |
||
177 | */ |
||
178 | protected function parse_sibling() { |
||
179 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
||
180 | ++$this->pos; |
||
181 | return ($this->token = self::TOK_COMPARE_CONTAINS_WORD); |
||
182 | } else { |
||
183 | return ($this->token = self::TOK_SIBLING); |
||
184 | } |
||
185 | } |
||
186 | |||
187 | /** |
||
188 | * Parse "|" character |
||
189 | * @internal Could be {@link TOK_PIPE} or {@link TOK_COMPARE_PREFIX} |
||
190 | * @return int |
||
191 | */ |
||
192 | 1 | protected function parse_pipe() { |
|
193 | 1 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
|
194 | 1 | ++$this->pos; |
|
195 | 1 | return ($this->token = self::TOK_COMPARE_PREFIX); |
|
196 | } else { |
||
197 | return ($this->token = self::TOK_PIPE); |
||
198 | } |
||
199 | } |
||
200 | |||
201 | /** |
||
202 | * Parse "*" character |
||
203 | * @internal Could be {@link TOK_ALL} or {@link TOK_COMPARE_CONTAINS} |
||
204 | * @return int |
||
205 | */ |
||
206 | 2 | protected function parse_star() { |
|
207 | 2 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
|
208 | ++$this->pos; |
||
209 | return ($this->token = self::TOK_COMPARE_CONTAINS); |
||
210 | } else { |
||
211 | 2 | return ($this->token = self::TOK_ALL); |
|
212 | } |
||
213 | } |
||
214 | |||
215 | /** |
||
216 | * Parse "!" character |
||
217 | * @internal Could be {@link TOK_NOT} or {@link TOK_COMPARE_NOT_EQUAL} |
||
218 | * @return int |
||
219 | */ |
||
220 | protected function parse_not() { |
||
221 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
||
222 | ++$this->pos; |
||
223 | return ($this->token = self::TOK_COMPARE_NOT_EQUAL); |
||
224 | } else { |
||
225 | return ($this->token = self::TOK_NOT); |
||
226 | } |
||
227 | } |
||
228 | |||
229 | /** |
||
230 | * Parse several compare characters |
||
231 | * @return int |
||
232 | */ |
||
233 | protected function parse_compare() { |
||
234 | if ((($this->pos + 1) < $this->size) && ($this->doc[$this->pos + 1] === '=')) { |
||
235 | switch($this->doc[$this->pos++]) { |
||
236 | case '$': |
||
237 | return ($this->token = self::TOK_COMPARE_ENDS); |
||
238 | case '%': |
||
239 | return ($this->token = self::TOK_COMPARE_REGEX); |
||
240 | case '^': |
||
241 | return ($this->token = self::TOK_COMPARE_STARTS); |
||
242 | case '<': |
||
243 | return ($this->token = self::TOK_COMPARE_SMALLER_THAN); |
||
244 | } |
||
245 | } |
||
246 | return false; |
||
247 | } |
||
248 | |||
249 | /** |
||
250 | * Parse strings (" and ') |
||
251 | * @return int |
||
252 | */ |
||
253 | 5 | protected function parse_string() { |
|
254 | 5 | $char = $this->doc[$this->pos]; |
|
255 | |||
256 | 5 | while (true) { |
|
257 | 5 | if ($this->next_search($char.'\\', false) !== self::TOK_NULL) { |
|
258 | 5 | if($this->doc[$this->pos] === $char) { |
|
259 | 5 | break; |
|
260 | } else { |
||
261 | ++$this->pos; |
||
262 | } |
||
263 | } else { |
||
264 | $this->pos = $this->size - 1; |
||
265 | break; |
||
266 | } |
||
267 | } |
||
268 | |||
269 | 5 | return ($this->token = self::TOK_STRING); |
|
270 | } |
||
271 | |||
272 | } |
||
273 | |||
274 | /** |
||
275 | * Performs a css select query on HTML nodes |
||
276 | */ |
||
277 | class HtmlSelector { |
||
278 | |||
279 | /** |
||
280 | * Parser object |
||
281 | * @internal If string, then it will create a new instance as parser |
||
282 | * @var CSSQueryTokenizer |
||
283 | */ |
||
284 | var $parser = 'pQuery\\CSSQueryTokenizer'; |
||
285 | |||
286 | /** |
||
287 | * Target of queries |
||
288 | * @var DomNode |
||
289 | */ |
||
290 | var $root = null; |
||
291 | |||
292 | /** |
||
293 | * Last performed query, result in {@link $result} |
||
294 | * @var string |
||
295 | */ |
||
296 | var $query = ''; |
||
297 | |||
298 | /** |
||
299 | * Array of matching nodes |
||
300 | * @var array |
||
301 | */ |
||
302 | var $result = array(); |
||
303 | |||
304 | /** |
||
305 | * Include root in search, if false the only child nodes are evaluated |
||
306 | * @var bool |
||
307 | */ |
||
308 | var $search_root = false; |
||
309 | |||
310 | /** |
||
311 | * Search recursively |
||
312 | * @var bool |
||
313 | */ |
||
314 | var $search_recursive = true; |
||
315 | |||
316 | /** |
||
317 | * Extra function map for custom filters |
||
318 | * @var array |
||
319 | * @internal array('root' => 'filter_root') will cause the |
||
320 | * selector to call $this->filter_root at :root |
||
321 | * @see DomNode::$filter_map |
||
322 | */ |
||
323 | var $custom_filter_map = array(); |
||
324 | |||
325 | /** |
||
326 | * Class constructor |
||
327 | * @param DomNode $root {@link $root} |
||
328 | * @param string $query |
||
329 | * @param bool $search_root {@link $search_root} |
||
330 | * @param bool $search_recursive {@link $search_recursive} |
||
331 | * @param CSSQueryTokenizer $parser If null, then default class will be used |
||
332 | */ |
||
333 | 37 | function __construct($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) { |
|
334 | 37 | if ($parser === null) { |
|
335 | 37 | $parser = new $this->parser(); |
|
336 | 37 | } |
|
337 | 37 | $this->parser = $parser; |
|
338 | 37 | $this->root =& $root; |
|
339 | |||
340 | 37 | $this->search_root = $search_root; |
|
341 | 37 | $this->search_recursive = $search_recursive; |
|
342 | |||
343 | 37 | $this->select($query); |
|
344 | 37 | } |
|
345 | |||
346 | #php4 PHP4 class constructor compatibility |
||
347 | #function HtmlSelector($root, $query = '*', $search_root = false, $search_recursive = true, $parser = null) {return $this->__construct($root, $query, $search_root, $search_recursive, $parser);} |
||
348 | #php4e |
||
349 | |||
350 | /** |
||
351 | * toString method, returns {@link $query} |
||
352 | * @return string |
||
353 | * @access private |
||
354 | */ |
||
355 | function __toString() { |
||
356 | return $this->query; |
||
357 | } |
||
358 | |||
359 | /** |
||
360 | * Class magic invoke method, performs {@link select()} |
||
361 | * @return array |
||
362 | * @access private |
||
363 | */ |
||
364 | function __invoke($query = '*') { |
||
365 | return $this->select($query); |
||
366 | } |
||
367 | |||
368 | /** |
||
369 | * Perform query |
||
370 | * @param string $query |
||
371 | * @return array False on failure |
||
372 | */ |
||
373 | 37 | function select($query = '*') { |
|
374 | 37 | $this->parser->setDoc($query); |
|
375 | 37 | $this->query = $query; |
|
376 | 37 | return (($this->parse()) ? $this->result : false); |
|
377 | } |
||
378 | |||
379 | /** |
||
380 | * Trigger error |
||
381 | * @param string $error |
||
382 | * @internal %pos% and %tok% will be replace in string with position and token(string) |
||
383 | * @access private |
||
384 | */ |
||
385 | protected function error($error) { |
||
386 | $error = htmlentities(str_replace( |
||
387 | array('%tok%', '%pos%'), |
||
388 | array($this->parser->getTokenString(), (int) $this->parser->getPos()), |
||
389 | $error |
||
390 | )); |
||
391 | |||
392 | trigger_error($error); |
||
393 | } |
||
394 | |||
395 | /** |
||
396 | * Get identifier (parse identifier or string) |
||
397 | * @param bool $do_error Error on failure |
||
398 | * @return string False on failure |
||
399 | * @access private |
||
400 | */ |
||
401 | 36 | protected function parse_getIdentifier($do_error = true) { |
|
402 | 36 | $p =& $this->parser; |
|
403 | 36 | $tok = $p->token; |
|
404 | |||
405 | 36 | if ($tok === CSSQueryTokenizer::TOK_IDENTIFIER) { |
|
406 | 36 | return $p->getTokenString(); |
|
407 | 17 | } elseif($tok === CSSQueryTokenizer::TOK_STRING) { |
|
408 | 5 | return str_replace(array('\\\'', '\\"', '\\\\'), array('\'', '"', '\\'), $p->getTokenString(1, -1)); |
|
409 | 17 | } elseif ($do_error) { |
|
410 | $this->error('Expected identifier at %pos%!'); |
||
411 | } |
||
412 | 17 | return false; |
|
413 | } |
||
414 | |||
415 | /** |
||
416 | * Get query conditions (tag, attribute and filter conditions) |
||
417 | * @return array False on failure |
||
418 | * @see DomNode::match() |
||
419 | * @access private |
||
420 | */ |
||
421 | 37 | protected function parse_conditions() { |
|
422 | 37 | $p =& $this->parser; |
|
423 | 37 | $tok = $p->token; |
|
424 | |||
425 | 37 | if ($tok === CSSQueryTokenizer::TOK_NULL) { |
|
426 | $this->error('Invalid search pattern(1): Empty string!'); |
||
427 | return false; |
||
428 | } |
||
429 | 37 | $conditions_all = array(); |
|
430 | |||
431 | //Tags |
||
432 | 37 | while ($tok !== CSSQueryTokenizer::TOK_NULL) { |
|
433 | 37 | $conditions = array('tags' => array(), 'attributes' => array()); |
|
434 | |||
435 | 37 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
|
436 | 2 | $tok = $p->next(); |
|
437 | 2 | if (($tok === CSSQueryTokenizer::TOK_PIPE) && ($tok = $p->next()) && ($tok !== CSSQueryTokenizer::TOK_ALL)) { |
|
438 | if (($tag = $this->parse_getIdentifier()) === false) { |
||
439 | return false; |
||
440 | } |
||
441 | $conditions['tags'][] = array( |
||
442 | 'tag' => $tag, |
||
443 | 'compare' => 'name' |
||
444 | ); |
||
445 | $tok = $p->next_no_whitespace(); |
||
446 | } else { |
||
447 | 2 | $conditions['tags'][''] = array( |
|
448 | 2 | 'tag' => '', |
|
449 | 'match' => false |
||
450 | 2 | ); |
|
451 | 2 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
|
452 | $tok = $p->next_no_whitespace(); |
||
453 | } |
||
454 | } |
||
455 | 37 | } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { |
|
456 | $tok = $p->next(); |
||
457 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
||
458 | $conditions['tags'][] = array( |
||
459 | 'tag' => '', |
||
460 | 'compare' => 'namespace', |
||
461 | ); |
||
462 | } elseif (($tag = $this->parse_getIdentifier()) !== false) { |
||
463 | $conditions['tags'][] = array( |
||
464 | 'tag' => $tag, |
||
465 | 'compare' => 'total', |
||
466 | ); |
||
467 | } else { |
||
468 | return false; |
||
469 | } |
||
470 | $tok = $p->next_no_whitespace(); |
||
471 | 36 | } elseif ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { |
|
472 | $tok = $p->next_no_whitespace(); |
||
473 | $last_mode = 'or'; |
||
474 | |||
475 | while (true) { |
||
476 | $match = true; |
||
477 | $compare = 'total'; |
||
478 | |||
479 | if ($tok === CSSQueryTokenizer::TOK_NOT) { |
||
480 | $match = false; |
||
481 | $tok = $p->next_no_whitespace(); |
||
482 | } |
||
483 | |||
484 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
||
485 | $tok = $p->next(); |
||
486 | if ($tok === CSSQueryTokenizer::TOK_PIPE) { |
||
487 | $this->next(); |
||
488 | $compare = 'name'; |
||
489 | if (($tag = $this->parse_getIdentifier()) === false) { |
||
490 | return false; |
||
491 | } |
||
492 | } |
||
493 | } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { |
||
494 | $tok = $p->next(); |
||
495 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
||
496 | $tag = ''; |
||
497 | $compare = 'namespace'; |
||
498 | } elseif (($tag = $this->parse_getIdentifier()) === false) { |
||
499 | return false; |
||
500 | } |
||
501 | $tok = $p->next_no_whitespace(); |
||
502 | } else { |
||
503 | if (($tag = $this->parse_getIdentifier()) === false) { |
||
504 | return false; |
||
505 | } |
||
506 | $tok = $p->next(); |
||
507 | if ($tok === CSSQueryTokenizer::TOK_PIPE) { |
||
508 | $tok = $p->next(); |
||
509 | |||
510 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
||
511 | $compare = 'namespace'; |
||
512 | } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { |
||
513 | $tag = $tag.':'.$tag_name; |
||
514 | } else { |
||
515 | return false; |
||
516 | } |
||
517 | |||
518 | $tok = $p->next_no_whitespace(); |
||
519 | } |
||
520 | } |
||
521 | if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { |
||
522 | $tok = $p->next_no_whitespace(); |
||
523 | } |
||
524 | |||
525 | $conditions['tags'][] = array( |
||
526 | 'tag' => $tag, |
||
527 | 'match' => $match, |
||
528 | 'operator' => $last_mode, |
||
529 | 'compare' => $compare |
||
530 | ); |
||
531 | switch($tok) { |
||
532 | case CSSQueryTokenizer::TOK_COMMA: |
||
533 | $tok = $p->next_no_whitespace(); |
||
534 | $last_mode = 'or'; |
||
535 | continue 2; |
||
536 | case CSSQueryTokenizer::TOK_PLUS: |
||
537 | $tok = $p->next_no_whitespace(); |
||
538 | $last_mode = 'and'; |
||
539 | continue 2; |
||
540 | case CSSQueryTokenizer::TOK_BRACE_CLOSE: |
||
541 | $tok = $p->next(); |
||
542 | break 2; |
||
543 | default: |
||
544 | $this->error('Expected closing brace or comma at pos %pos%!'); |
||
545 | return false; |
||
546 | } |
||
547 | } |
||
548 | 36 | } elseif (($tag = $this->parse_getIdentifier(false)) !== false) { |
|
549 | 22 | $tok = $p->next(); |
|
550 | 22 | if ($tok === CSSQueryTokenizer::TOK_PIPE) { |
|
551 | $tok = $p->next(); |
||
552 | |||
553 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
||
554 | $conditions['tags'][] = array( |
||
555 | 'tag' => $tag, |
||
556 | 'compare' => 'namespace' |
||
557 | ); |
||
558 | } elseif (($tag_name = $this->parse_getIdentifier()) !== false) { |
||
559 | $tag = $tag.':'.$tag_name; |
||
560 | $conditions['tags'][] = array( |
||
561 | 'tag' => $tag, |
||
562 | 'match' => true |
||
563 | ); |
||
564 | } else { |
||
565 | return false; |
||
566 | } |
||
567 | |||
568 | $tok = $p->next(); |
||
569 | 22 | } elseif ($tag === 'text' && $tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { |
|
570 | 1 | $pos = $p->getPos(); |
|
571 | 1 | $tok = $p->next(); |
|
572 | 1 | if ($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) { |
|
573 | 1 | $conditions['tags'][] = array( |
|
574 | 1 | 'tag' => '~text~', |
|
575 | 'match' => true |
||
576 | 1 | ); |
|
577 | 1 | $p->next(); |
|
578 | 1 | } else { |
|
579 | $p->setPos($pos); |
||
580 | } |
||
581 | 1 | } else { |
|
582 | 21 | $conditions['tags'][] = array( |
|
583 | 21 | 'tag' => $tag, |
|
584 | 'match' => true |
||
585 | 21 | ); |
|
586 | } |
||
587 | 22 | } else { |
|
588 | 17 | unset($conditions['tags']); |
|
589 | } |
||
590 | |||
591 | //Class |
||
592 | 37 | $last_mode = 'or'; |
|
593 | 37 | if ($tok === CSSQueryTokenizer::TOK_CLASS) { |
|
594 | 12 | $p->next(); |
|
595 | 12 | if (($class = $this->parse_getIdentifier()) === false) { |
|
596 | return false; |
||
597 | } |
||
598 | |||
599 | 12 | $conditions['attributes'][] = array( |
|
600 | 12 | 'attribute' => 'class', |
|
601 | 12 | 'operator_value' => 'contains_word', |
|
602 | 12 | 'value' => $class, |
|
603 | 'operator_result' => $last_mode |
||
604 | 12 | ); |
|
605 | 12 | $last_mode = 'and'; |
|
606 | 12 | $tok = $p->next(); |
|
607 | 12 | } |
|
608 | |||
609 | //ID |
||
610 | 37 | if ($tok === CSSQueryTokenizer::TOK_ID) { |
|
611 | 1 | $p->next(); |
|
612 | 1 | if (($id = $this->parse_getIdentifier()) === false) { |
|
613 | return false; |
||
614 | } |
||
615 | |||
616 | 1 | $conditions['attributes'][] = array( |
|
617 | 1 | 'attribute' => 'id', |
|
618 | 1 | 'operator_value' => 'equals', |
|
619 | 1 | 'value' => $id, |
|
620 | 'operator_result' => $last_mode |
||
621 | 1 | ); |
|
622 | 1 | $last_mode = 'and'; |
|
623 | 1 | $tok = $p->next(); |
|
624 | 1 | } |
|
625 | |||
626 | //Attributes |
||
627 | 37 | if ($tok === CSSQueryTokenizer::TOK_BRACKET_OPEN) { |
|
628 | 6 | $tok = $p->next_no_whitespace(); |
|
629 | |||
630 | 6 | while (true) { |
|
631 | 6 | $match = true; |
|
632 | 6 | $compare = 'total'; |
|
633 | 6 | if ($tok === CSSQueryTokenizer::TOK_NOT) { |
|
634 | $match = false; |
||
635 | $tok = $p->next_no_whitespace(); |
||
636 | } |
||
637 | |||
638 | 6 | if ($tok === CSSQueryTokenizer::TOK_ALL) { |
|
639 | $tok = $p->next(); |
||
640 | if ($tok === CSSQueryTokenizer::TOK_PIPE) { |
||
641 | $tok = $p->next(); |
||
642 | if (($attribute = $this->parse_getIdentifier()) === false) { |
||
643 | return false; |
||
644 | } |
||
645 | $compare = 'name'; |
||
646 | $tok = $p->next(); |
||
647 | } else { |
||
648 | $this->error('Expected pipe at pos %pos%!'); |
||
649 | return false; |
||
650 | } |
||
651 | 6 | } elseif ($tok === CSSQueryTokenizer::TOK_PIPE) { |
|
652 | $tok = $p->next(); |
||
653 | if (($tag = $this->parse_getIdentifier()) === false) { |
||
654 | return false; |
||
655 | } |
||
656 | $tok = $p->next_no_whitespace(); |
||
657 | 6 | } elseif (($attribute = $this->parse_getIdentifier()) !== false) { |
|
658 | 6 | $tok = $p->next(); |
|
659 | 6 | if ($tok === CSSQueryTokenizer::TOK_PIPE) { |
|
660 | $tok = $p->next(); |
||
661 | |||
662 | if (($attribute_name = $this->parse_getIdentifier()) !== false) { |
||
663 | $attribute = $attribute.':'.$attribute_name; |
||
664 | } else { |
||
665 | return false; |
||
666 | } |
||
667 | |||
668 | $tok = $p->next(); |
||
669 | } |
||
670 | 6 | } else { |
|
671 | return false; |
||
672 | } |
||
673 | 6 | if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { |
|
674 | $tok = $p->next_no_whitespace(); |
||
675 | } |
||
676 | |||
677 | 6 | $operator_value = ''; |
|
678 | 6 | $val = ''; |
|
679 | switch($tok) { |
||
680 | 6 | case CSSQueryTokenizer::TOK_COMPARE_PREFIX: |
|
681 | 6 | case CSSQueryTokenizer::TOK_COMPARE_CONTAINS: |
|
682 | 6 | case CSSQueryTokenizer::TOK_COMPARE_CONTAINS_WORD: |
|
683 | 6 | case CSSQueryTokenizer::TOK_COMPARE_ENDS: |
|
684 | 6 | case CSSQueryTokenizer::TOK_COMPARE_EQUALS: |
|
685 | 6 | case CSSQueryTokenizer::TOK_COMPARE_NOT_EQUAL: |
|
686 | 6 | case CSSQueryTokenizer::TOK_COMPARE_REGEX: |
|
687 | 6 | case CSSQueryTokenizer::TOK_COMPARE_STARTS: |
|
688 | 6 | case CSSQueryTokenizer::TOK_COMPARE_BIGGER_THAN: |
|
689 | 6 | case CSSQueryTokenizer::TOK_COMPARE_SMALLER_THAN: |
|
690 | 6 | $operator_value = $p->getTokenString(($tok === CSSQueryTokenizer::TOK_COMPARE_EQUALS) ? 0 : -1); |
|
691 | 6 | $p->next_no_whitespace(); |
|
692 | |||
693 | 6 | if (($val = $this->parse_getIdentifier()) === false) { |
|
694 | return false; |
||
695 | } |
||
696 | |||
697 | 6 | $tok = $p->next_no_whitespace(); |
|
698 | 6 | break; |
|
699 | } |
||
700 | |||
701 | 6 | if ($operator_value && $val) { |
|
702 | 6 | $conditions['attributes'][] = array( |
|
703 | 6 | 'attribute' => $attribute, |
|
704 | 6 | 'operator_value' => $operator_value, |
|
705 | 6 | 'value' => $val, |
|
706 | 6 | 'match' => $match, |
|
707 | 6 | 'operator_result' => $last_mode, |
|
708 | 'compare' => $compare |
||
709 | 6 | ); |
|
710 | 6 | } else { |
|
711 | $conditions['attributes'][] = array( |
||
712 | 'attribute' => $attribute, |
||
713 | 'value' => $match, |
||
714 | 'operator_result' => $last_mode, |
||
715 | 'compare' => $compare |
||
716 | ); |
||
717 | } |
||
718 | |||
719 | switch($tok) { |
||
720 | 6 | case CSSQueryTokenizer::TOK_COMMA: |
|
721 | $tok = $p->next_no_whitespace(); |
||
722 | $last_mode = 'or'; |
||
723 | continue 2; |
||
724 | 6 | case CSSQueryTokenizer::TOK_PLUS: |
|
725 | $tok = $p->next_no_whitespace(); |
||
726 | $last_mode = 'and'; |
||
727 | continue 2; |
||
728 | 6 | case CSSQueryTokenizer::TOK_BRACKET_CLOSE: |
|
729 | 6 | $tok = $p->next(); |
|
730 | 6 | break 2; |
|
731 | default: |
||
732 | $this->error('Expected closing bracket or comma at pos %pos%!'); |
||
733 | return false; |
||
734 | } |
||
735 | } |
||
736 | 6 | } |
|
737 | |||
738 | 37 | if (count($conditions['attributes']) < 1) { |
|
739 | 23 | unset($conditions['attributes']); |
|
740 | 23 | } |
|
741 | |||
742 | 37 | while($tok === CSSQueryTokenizer::TOK_COLON) { |
|
743 | 3 | if (count($conditions) < 1) { |
|
744 | 2 | $conditions['tags'] = array(array( |
|
745 | 2 | 'tag' => '', |
|
746 | 'match' => false |
||
747 | 2 | )); |
|
748 | 2 | } |
|
749 | |||
750 | 3 | $tok = $p->next(); |
|
751 | 3 | if (($filter = $this->parse_getIdentifier()) === false) { |
|
752 | return false; |
||
753 | } |
||
754 | |||
755 | 3 | if (($tok = $p->next()) === CSSQueryTokenizer::TOK_BRACE_OPEN) { |
|
756 | $start = $p->pos; |
||
757 | $count = 1; |
||
758 | while ((($tok = $p->next()) !== CSSQueryTokenizer::TOK_NULL) && !(($tok === CSSQueryTokenizer::TOK_BRACE_CLOSE) && (--$count === 0))) { |
||
759 | if ($tok === CSSQueryTokenizer::TOK_BRACE_OPEN) { |
||
760 | ++$count; |
||
761 | } |
||
762 | } |
||
763 | |||
764 | |||
765 | if ($tok !== CSSQueryTokenizer::TOK_BRACE_CLOSE) { |
||
766 | $this->error('Expected closing brace at pos %pos%!'); |
||
767 | return false; |
||
768 | } |
||
769 | $len = $p->pos - 1 - $start; |
||
770 | $params = (($len > 0) ? substr($p->doc, $start + 1, $len) : ''); |
||
771 | $tok = $p->next(); |
||
772 | } else { |
||
773 | 3 | $params = ''; |
|
774 | } |
||
775 | |||
776 | 3 | $conditions['filters'][] = array('filter' => $filter, 'params' => $params); |
|
777 | 3 | } |
|
778 | 37 | if (count($conditions) < 1) { |
|
779 | $this->error('Invalid search pattern(2): No conditions found!'); |
||
780 | return false; |
||
781 | } |
||
782 | 37 | $conditions_all[] = $conditions; |
|
783 | |||
784 | 37 | if ($tok === CSSQueryTokenizer::TOK_WHITESPACE) { |
|
785 | $tok = $p->next_no_whitespace(); |
||
786 | } |
||
787 | |||
788 | 37 | if ($tok === CSSQueryTokenizer::TOK_COMMA) { |
|
789 | 2 | $tok = $p->next_no_whitespace(); |
|
790 | 2 | continue; |
|
791 | } else { |
||
792 | 37 | break; |
|
793 | } |
||
794 | } |
||
795 | |||
796 | 37 | return $conditions_all; |
|
797 | } |
||
798 | |||
799 | |||
800 | /** |
||
801 | * Evaluate root node using custom callback |
||
802 | * @param array $conditions {@link parse_conditions()} |
||
803 | * @param bool|int $recursive |
||
804 | * @param bool $check_root |
||
805 | * @return array |
||
806 | * @access private |
||
807 | */ |
||
808 | 37 | protected function parse_callback($conditions, $recursive = true, $check_root = false) { |
|
809 | 37 | return ($this->result = $this->root->getChildrenByMatch( |
|
810 | 37 | $conditions, |
|
811 | 37 | $recursive, |
|
812 | 37 | $check_root, |
|
813 | 37 | $this->custom_filter_map |
|
814 | 37 | )); |
|
815 | } |
||
816 | |||
817 | /** |
||
818 | * Parse first bit of query, only root node has to be evaluated now |
||
819 | * @param bool|int $recursive |
||
820 | * @return bool |
||
821 | * @internal Result of query is set in {@link $result} |
||
822 | * @access private |
||
823 | */ |
||
824 | 37 | protected function parse_single($recursive = true) { |
|
825 | 37 | if (($c = $this->parse_conditions()) === false) { |
|
826 | return false; |
||
827 | } |
||
828 | |||
829 | 37 | $this->parse_callback($c, $recursive, $this->search_root); |
|
830 | 37 | return true; |
|
831 | } |
||
832 | |||
833 | /** |
||
834 | * Evaluate sibling nodes |
||
835 | * @return bool |
||
836 | * @internal Result of query is set in {@link $result} |
||
837 | * @access private |
||
838 | */ |
||
839 | protected function parse_adjacent() { |
||
840 | $tmp = $this->result; |
||
841 | $this->result = array(); |
||
842 | if (($c = $this->parse_conditions()) === false) { |
||
843 | return false; |
||
844 | } |
||
845 | |||
846 | foreach($tmp as $t) { |
||
847 | if (($sibling = $t->getNextSibling()) !== false) { |
||
848 | if ($sibling->match($c, true, $this->custom_filter_map)) { |
||
849 | $this->result[] = $sibling; |
||
850 | } |
||
851 | } |
||
852 | } |
||
853 | |||
854 | return true; |
||
855 | } |
||
856 | |||
857 | /** |
||
858 | * Evaluate {@link $result} |
||
859 | * @param bool $parent Evaluate parent nodes |
||
860 | * @param bool|int $recursive |
||
861 | * @return bool |
||
862 | * @internal Result of query is set in {@link $result} |
||
863 | * @access private |
||
864 | */ |
||
865 | protected function parse_result($parent = false, $recursive = true) { |
||
866 | $tmp = $this->result; |
||
867 | $tmp_res = array(); |
||
868 | if (($c = $this->parse_conditions()) === false) { |
||
869 | return false; |
||
870 | } |
||
871 | |||
872 | foreach(array_keys($tmp) as $t) { |
||
873 | $this->root = (($parent) ? $tmp[$t]->parent : $tmp[$t]); |
||
874 | $this->parse_callback($c, $recursive); |
||
875 | foreach(array_keys($this->result) as $r) { |
||
876 | if (!in_array($this->result[$r], $tmp_res, true)) { |
||
877 | $tmp_res[] = $this->result[$r]; |
||
878 | } |
||
879 | } |
||
880 | } |
||
881 | $this->result = $tmp_res; |
||
882 | return true; |
||
883 | } |
||
884 | |||
885 | /** |
||
886 | * Parse full query |
||
887 | * @return bool |
||
888 | * @internal Result of query is set in {@link $result} |
||
889 | * @access private |
||
890 | */ |
||
891 | 37 | protected function parse() { |
|
892 | 37 | $p =& $this->parser; |
|
893 | 37 | $p->setPos(0); |
|
894 | 37 | $this->result = array(); |
|
895 | |||
896 | 37 | if (!$this->parse_single()) { |
|
897 | return false; |
||
898 | } |
||
899 | |||
900 | 37 | while (count($this->result) > 0) { |
|
901 | 37 | switch($p->token) { |
|
902 | 37 | case CSSQueryTokenizer::TOK_CHILD: |
|
903 | $this->parser->next_no_whitespace(); |
||
904 | if (!$this->parse_result(false, 1)) { |
||
905 | return false; |
||
906 | } |
||
907 | break; |
||
908 | |||
909 | 37 | case CSSQueryTokenizer::TOK_SIBLING: |
|
910 | $this->parser->next_no_whitespace(); |
||
911 | if (!$this->parse_result(true, 1)) { |
||
912 | return false; |
||
913 | } |
||
914 | break; |
||
915 | |||
916 | 37 | case CSSQueryTokenizer::TOK_PLUS: |
|
917 | $this->parser->next_no_whitespace(); |
||
918 | if (!$this->parse_adjacent()) { |
||
919 | return false; |
||
920 | } |
||
921 | break; |
||
922 | |||
923 | 37 | case CSSQueryTokenizer::TOK_ALL: |
|
924 | 37 | case CSSQueryTokenizer::TOK_IDENTIFIER: |
|
925 | 37 | case CSSQueryTokenizer::TOK_STRING: |
|
926 | 37 | case CSSQueryTokenizer::TOK_BRACE_OPEN: |
|
927 | 37 | case CSSQueryTokenizer::TOK_BRACKET_OPEN: |
|
928 | 37 | case CSSQueryTokenizer::TOK_ID: |
|
929 | 37 | case CSSQueryTokenizer::TOK_CLASS: |
|
930 | 37 | case CSSQueryTokenizer::TOK_COLON: |
|
931 | if (!$this->parse_result()) { |
||
932 | return false; |
||
933 | } |
||
934 | break; |
||
935 | |||
936 | 37 | case CSSQueryTokenizer::TOK_NULL: |
|
937 | 37 | break 2; |
|
938 | |||
939 | default: |
||
940 | $this->error('Invalid search pattern(3): No result modifier found!'); |
||
941 | return false; |
||
942 | } |
||
943 | } |
||
944 | |||
945 | 37 | return true; |
|
946 | } |
||
947 | } |
||
948 | |||
949 | ?> |
||
950 |