Total Complexity | 86 |
Total Lines | 604 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
26 | class Parser |
||
27 | { |
||
28 | protected $scanner; |
||
29 | protected $buffer = ''; |
||
30 | protected $handler; |
||
31 | private $strict = false; |
||
32 | |||
33 | protected $DEBUG = false; |
||
34 | |||
35 | /** |
||
36 | * Construct a new CSS parser object. This will attempt to |
||
37 | * parse the string as a CSS selector. As it parses, it will |
||
38 | * send events to the EventHandler implementation. |
||
39 | * |
||
40 | * @param string $string |
||
41 | * @param EventHandler $handler |
||
42 | */ |
||
43 | public function __construct(string $string, EventHandler $handler) |
||
49 | } |
||
50 | |||
51 | /** |
||
52 | * Parse the selector. |
||
53 | * |
||
54 | * This begins an event-based parsing process that will |
||
55 | * fire events as the selector is handled. A EventHandler |
||
56 | * implementation will be responsible for handling the events. |
||
57 | * |
||
58 | * @throws ParseException |
||
59 | * @throws Exception |
||
60 | */ |
||
61 | public function parse(): void |
||
81 | } |
||
82 | } |
||
83 | } |
||
84 | |||
85 | /** |
||
86 | * A restricted parser that can only parse simple selectors. |
||
87 | * The pseudoClass handler for this parser will throw an |
||
88 | * exception if it encounters a pseudo-element or the |
||
89 | * negation pseudo-class. |
||
90 | * |
||
91 | * @deprecated This is not used anywhere in QueryPath and |
||
92 | * may be removed. |
||
93 | *//* |
||
94 | public function parseSimpleSelector() { |
||
95 | while ($this->scanner->token !== FALSE) { |
||
96 | if ($this->DEBUG) print "SIMPLE SELECTOR\n"; |
||
97 | $this->allElements(); |
||
98 | $this->elementName(); |
||
99 | $this->elementClass(); |
||
100 | $this->elementID(); |
||
101 | $this->pseudoClass(TRUE); // Operate in restricted mode. |
||
102 | $this->attribute(); |
||
103 | |||
104 | // TODO: Need to add failure conditions here. |
||
105 | } |
||
106 | }*/ |
||
107 | |||
108 | /** |
||
109 | * Handle an entire CSS selector. |
||
110 | * |
||
111 | * @throws ParseException |
||
112 | * @throws Exception |
||
113 | */ |
||
114 | private function selector(): void |
||
115 | { |
||
116 | if ($this->DEBUG) { |
||
117 | print 'SELECTOR' . $this->scanner->position() . PHP_EOL; |
||
118 | } |
||
119 | |||
120 | $this->consumeWhitespace(); // Remove leading whitespace |
||
121 | $this->simpleSelectors(); |
||
122 | $this->combinator(); |
||
123 | } |
||
124 | |||
125 | /** |
||
126 | * Consume whitespace and return a count of the number of whitespace consumed. |
||
127 | * |
||
128 | * @throws \QueryPath\CSS\ParseException |
||
129 | * @throws Exception |
||
130 | */ |
||
131 | private function consumeWhitespace(): int |
||
132 | { |
||
133 | if ($this->DEBUG) { |
||
134 | echo 'CONSUME WHITESPACE' . PHP_EOL; |
||
135 | } |
||
136 | |||
137 | $white = 0; |
||
138 | while ($this->scanner->token === Token::WHITE) { |
||
139 | $this->scanner->nextToken(); |
||
140 | ++$white; |
||
141 | } |
||
142 | |||
143 | return $white; |
||
144 | } |
||
145 | |||
146 | /** |
||
147 | * Handle one of the five combinators: '>', '+', ' ', '~', and ','. |
||
148 | * This will call the appropriate event handlers. |
||
149 | * |
||
150 | * @see EventHandler::directDescendant(), |
||
151 | * @see EventHandler::adjacent(), |
||
152 | * @see EventHandler::anyDescendant(), |
||
153 | * @see EventHandler::anotherSelector(). |
||
154 | * @throws ParseException |
||
155 | * @throws \QueryPath\Exception |
||
156 | */ |
||
157 | private function combinator(): void |
||
158 | { |
||
159 | if ($this->DEBUG) { |
||
160 | echo 'COMBINATOR' . PHP_EOL; |
||
161 | } |
||
162 | /* |
||
163 | * Problem: ' ' and ' > ' are both valid combinators. |
||
164 | * So we have to track whitespace consumption to see |
||
165 | * if we are hitting the ' ' combinator or if the |
||
166 | * selector just has whitespace padding another combinator. |
||
167 | */ |
||
168 | |||
169 | // Flag to indicate that post-checks need doing |
||
170 | $inCombinator = false; |
||
171 | $white = $this->consumeWhitespace(); |
||
172 | $t = $this->scanner->token; |
||
173 | |||
174 | if ($t === Token::RANGLE) { |
||
175 | $this->handler->directDescendant(); |
||
176 | $this->scanner->nextToken(); |
||
177 | $inCombinator = true; |
||
178 | //$this->simpleSelectors(); |
||
179 | } elseif ($t === Token::PLUS) { |
||
180 | $this->handler->adjacent(); |
||
181 | $this->scanner->nextToken(); |
||
182 | $inCombinator = true; |
||
183 | //$this->simpleSelectors(); |
||
184 | } elseif ($t === Token::COMMA) { |
||
185 | $this->handler->anotherSelector(); |
||
186 | $this->scanner->nextToken(); |
||
187 | $inCombinator = true; |
||
188 | //$this->scanner->selectors(); |
||
189 | } elseif ($t === Token::TILDE) { |
||
190 | $this->handler->sibling(); |
||
191 | $this->scanner->nextToken(); |
||
192 | $inCombinator = true; |
||
193 | } |
||
194 | |||
195 | // Check that we don't get two combinators in a row. |
||
196 | if ($inCombinator) { |
||
197 | if ($this->DEBUG) { |
||
198 | print 'COMBINATOR: ' . Token::name($t) . "\n"; |
||
199 | } |
||
200 | $this->consumeWhitespace(); |
||
201 | if ($this->isCombinator($this->scanner->token)) { |
||
202 | throw new ParseException('Illegal combinator: Cannot have two combinators in sequence.'); |
||
203 | } |
||
204 | } // Check to see if we have whitespace combinator: |
||
205 | elseif ($white > 0) { |
||
206 | if ($this->DEBUG) { |
||
207 | echo 'COMBINATOR: any descendant' . PHP_EOL; |
||
208 | } |
||
209 | $this->handler->anyDescendant(); |
||
210 | } else { |
||
211 | if ($this->DEBUG) { |
||
212 | echo 'COMBINATOR: no combinator found.' . PHP_EOL; |
||
213 | } |
||
214 | } |
||
215 | } |
||
216 | |||
217 | /** |
||
218 | * Check if the token is a combinator. |
||
219 | * |
||
220 | * @param int $tok |
||
221 | * @return bool |
||
222 | */ |
||
223 | private function isCombinator(int $tok): bool |
||
224 | { |
||
225 | return in_array($tok, [Token::PLUS, Token::RANGLE, Token::COMMA, Token::TILDE], true); |
||
226 | } |
||
227 | |||
228 | /** |
||
229 | * Handle a simple selector. |
||
230 | * |
||
231 | * @throws ParseException |
||
232 | */ |
||
233 | private function simpleSelectors(): void |
||
244 | } |
||
245 | |||
246 | /** |
||
247 | * Handles CSS ID selectors. |
||
248 | * This will call EventHandler::elementID(). |
||
249 | * |
||
250 | * @throws \QueryPath\CSS\ParseException |
||
251 | * @throws Exception |
||
252 | */ |
||
253 | private function elementID(): void |
||
254 | { |
||
255 | if ($this->DEBUG) { |
||
256 | echo 'ELEMENT ID' . PHP_EOL; |
||
257 | } |
||
258 | |||
259 | if ($this->scanner->token === Token::OCTO) { |
||
260 | $this->scanner->nextToken(); |
||
261 | if ($this->scanner->token !== Token::CHAR) { |
||
262 | throw new ParseException("Expected string after #"); |
||
263 | } |
||
264 | $id = $this->scanner->getNameString(); |
||
265 | $this->handler->elementID($id); |
||
266 | } |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * Handles CSS class selectors. |
||
271 | * This will call the EventHandler::elementClass() method. |
||
272 | */ |
||
273 | private function elementClass(): void |
||
274 | { |
||
275 | if ($this->DEBUG) { |
||
276 | print 'ELEMENT CLASS' . PHP_EOL; |
||
277 | } |
||
278 | if ($this->scanner->token == Token::DOT) { |
||
279 | $this->scanner->nextToken(); |
||
280 | $this->consumeWhitespace(); // We're very fault tolerent. This should prob through error. |
||
281 | $cssClass = $this->scanner->getNameString(); |
||
282 | $this->handler->elementClass($cssClass); |
||
283 | } |
||
284 | } |
||
285 | |||
286 | /** |
||
287 | * Handle a pseudo-class and pseudo-element. |
||
288 | * |
||
289 | * CSS 3 selectors support separate pseudo-elements, using :: instead |
||
290 | * of : for separator. This is now supported, and calls the pseudoElement |
||
291 | * handler, EventHandler::pseudoElement(). |
||
292 | * |
||
293 | * This will call EventHandler::pseudoClass() when a |
||
294 | * pseudo-class is parsed. |
||
295 | * |
||
296 | * @throws ParseException |
||
297 | * @throws Exception |
||
298 | */ |
||
299 | private function pseudoClass($restricted = false): void |
||
300 | { |
||
301 | if ($this->DEBUG) { |
||
302 | echo 'PSEUDO-CLASS' . PHP_EOL; |
||
303 | } |
||
304 | if ($this->scanner->token === Token::COLON) { |
||
305 | // Check for CSS 3 pseudo element: |
||
306 | $isPseudoElement = false; |
||
307 | if ($this->scanner->nextToken() === Token::COLON) { |
||
308 | $isPseudoElement = true; |
||
309 | $this->scanner->nextToken(); |
||
310 | } |
||
311 | |||
312 | $name = $this->scanner->getNameString(); |
||
313 | if ($restricted && $name === 'not') { |
||
314 | throw new ParseException("The 'not' pseudo-class is illegal in this context."); |
||
315 | } |
||
316 | |||
317 | $value = NULL; |
||
318 | if ($this->scanner->token === Token::LPAREN) { |
||
319 | if ($isPseudoElement) { |
||
320 | throw new ParseException('Illegal left paren. Pseudo-Element cannot have arguments.'); |
||
321 | } |
||
322 | $value = $this->pseudoClassValue(); |
||
323 | } |
||
324 | |||
325 | // FIXME: This should throw errors when pseudo element has values. |
||
326 | if ($isPseudoElement) { |
||
327 | if ($restricted) { |
||
328 | throw new ParseException('Pseudo-Elements are illegal in this context.'); |
||
329 | } |
||
330 | $this->handler->pseudoElement($name); |
||
331 | $this->consumeWhitespace(); |
||
332 | |||
333 | // Per the spec, pseudo-elements must be the last items in a selector, so we |
||
334 | // check to make sure that we are either at the end of the stream or that a |
||
335 | // new selector is starting. Only one pseudo-element is allowed per selector. |
||
336 | if ($this->scanner->token !== false && $this->scanner->token !== Token::COMMA) { |
||
337 | throw new ParseException('A Pseudo-Element must be the last item in a selector.'); |
||
338 | } |
||
339 | } else { |
||
340 | $this->handler->pseudoClass($name, $value); |
||
341 | } |
||
342 | } |
||
343 | } |
||
344 | |||
345 | /** |
||
346 | * Get the value of a pseudo-classes. |
||
347 | * |
||
348 | * @return string |
||
349 | * Returns the value found from a pseudo-class. |
||
350 | * |
||
351 | * @todo Pseudoclasses can be passed pseudo-elements and |
||
352 | * other pseudo-classes as values, which means :pseudo(::pseudo) |
||
353 | * is legal. |
||
354 | */ |
||
355 | private function pseudoClassValue() |
||
356 | { |
||
357 | if ($this->scanner->token === Token::LPAREN) { |
||
358 | $buf = ''; |
||
359 | |||
360 | // For now, just leave pseudoClass value vague. |
||
361 | /* |
||
362 | // We have to peek to see if next char is a colon because |
||
363 | // pseudo-classes and pseudo-elements are legal strings here. |
||
364 | print $this->scanner->peek(); |
||
365 | if ($this->scanner->peek() == ':') { |
||
366 | print "Is pseudo\n"; |
||
367 | $this->scanner->nextToken(); |
||
368 | |||
369 | // Pseudo class |
||
370 | if ($this->scanner->token == Token::colon) { |
||
371 | $buf .= ':'; |
||
372 | $this->scanner->nextToken(); |
||
373 | // Pseudo element |
||
374 | if ($this->scanner->token == Token::colon) { |
||
375 | $buf .= ':'; |
||
376 | $this->scanner->nextToken(); |
||
377 | } |
||
378 | // Ident |
||
379 | $buf .= $this->scanner->getNameString(); |
||
380 | } |
||
381 | } |
||
382 | else { |
||
383 | print "fetching string.\n"; |
||
384 | $buf .= $this->scanner->getQuotedString(); |
||
385 | if ($this->scanner->token != Token::rparen) { |
||
386 | $this->throwError(Token::rparen, $this->scanner->token); |
||
387 | } |
||
388 | $this->scanner->nextToken(); |
||
389 | } |
||
390 | return $buf; |
||
391 | */ |
||
392 | //$buf .= $this->scanner->getQuotedString(); |
||
393 | $buf .= $this->scanner->getPseudoClassString(); |
||
394 | |||
395 | return $buf; |
||
396 | } |
||
397 | } |
||
398 | |||
399 | /** |
||
400 | * Handle element names. |
||
401 | * This will call the EventHandler::elementName(). |
||
402 | * |
||
403 | * This handles: |
||
404 | * <code> |
||
405 | * name (EventHandler::element()) |
||
406 | * |name (EventHandler::element()) |
||
407 | * ns|name (EventHandler::elementNS()) |
||
408 | * ns|* (EventHandler::elementNS()) |
||
409 | * </code> |
||
410 | */ |
||
411 | private function elementName() |
||
412 | { |
||
413 | if ($this->DEBUG) { |
||
414 | print "ELEMENT NAME\n"; |
||
415 | } |
||
416 | if ($this->scanner->token === Token::PIPE) { |
||
417 | // We have '|name', which is equiv to 'name' |
||
418 | $this->scanner->nextToken(); |
||
419 | $this->consumeWhitespace(); |
||
420 | $elementName = $this->scanner->getNameString(); |
||
421 | $this->handler->element($elementName); |
||
422 | } elseif ($this->scanner->token === Token::CHAR) { |
||
423 | $elementName = $this->scanner->getNameString(); |
||
424 | if ($this->scanner->token == Token::PIPE) { |
||
425 | // Get ns|name |
||
426 | $elementNS = $elementName; |
||
427 | $this->scanner->nextToken(); |
||
428 | $this->consumeWhitespace(); |
||
429 | if ($this->scanner->token === Token::STAR) { |
||
430 | // We have ns|* |
||
431 | $this->handler->anyElementInNS($elementNS); |
||
432 | $this->scanner->nextToken(); |
||
433 | } elseif ($this->scanner->token !== Token::CHAR) { |
||
434 | $this->throwError(Token::CHAR, $this->scanner->token); |
||
435 | } else { |
||
436 | $elementName = $this->scanner->getNameString(); |
||
437 | // We have ns|name |
||
438 | $this->handler->elementNS($elementName, $elementNS); |
||
439 | } |
||
440 | |||
441 | } else { |
||
442 | $this->handler->element($elementName); |
||
443 | } |
||
444 | } |
||
445 | } |
||
446 | |||
447 | /** |
||
448 | * Check for all elements designators. Due to the new CSS 3 namespace |
||
449 | * support, this is slightly more complicated, now, as it handles |
||
450 | * the *|name and *|* cases as well as *. |
||
451 | * |
||
452 | * Calls EventHandler::anyElement() or EventHandler::elementName(). |
||
453 | */ |
||
454 | private function allElements() |
||
455 | { |
||
456 | if ($this->scanner->token === Token::STAR) { |
||
457 | $this->scanner->nextToken(); |
||
458 | if ($this->scanner->token === Token::PIPE) { |
||
459 | $this->scanner->nextToken(); |
||
460 | if ($this->scanner->token === Token::STAR) { |
||
461 | // We got *|*. According to spec, this requires |
||
462 | // that the element has a namespace, so we pass it on |
||
463 | // to the handler: |
||
464 | $this->scanner->nextToken(); |
||
465 | $this->handler->anyElementInNS('*'); |
||
466 | } else { |
||
467 | // We got *|name, which means the name MUST be in a namespce, |
||
468 | // so we pass this off to elementNameNS(). |
||
469 | $name = $this->scanner->getNameString(); |
||
470 | $this->handler->elementNS($name, '*'); |
||
471 | } |
||
472 | } else { |
||
473 | $this->handler->anyElement(); |
||
474 | } |
||
475 | } |
||
476 | } |
||
477 | |||
478 | /** |
||
479 | * Handler an attribute. |
||
480 | * An attribute can be in one of two forms: |
||
481 | * <code>[attrName]</code> |
||
482 | * or |
||
483 | * <code>[attrName="AttrValue"]</code> |
||
484 | * |
||
485 | * This may call the following event handlers: EventHandler::attribute(). |
||
486 | * |
||
487 | * @throws \QueryPath\CSS\ParseException |
||
488 | * @throws Exception |
||
489 | */ |
||
490 | private function attribute() |
||
491 | { |
||
492 | if ($this->scanner->token === Token::LSQUARE) { |
||
493 | $attrVal = $op = $ns = NULL; |
||
494 | |||
495 | $this->scanner->nextToken(); |
||
496 | $this->consumeWhitespace(); |
||
497 | |||
498 | if ($this->scanner->token === Token::AT) { |
||
499 | if ($this->strict) { |
||
500 | throw new ParseException('The @ is illegal in attributes.'); |
||
501 | } |
||
502 | |||
503 | $this->scanner->nextToken(); |
||
504 | $this->consumeWhitespace(); |
||
505 | } |
||
506 | |||
507 | if ($this->scanner->token === Token::STAR) { |
||
508 | // Global namespace... requires that attr be prefixed, |
||
509 | // so we pass this on to a namespace handler. |
||
510 | $ns = '*'; |
||
511 | $this->scanner->nextToken(); |
||
512 | } |
||
513 | if ($this->scanner->token === Token::PIPE) { |
||
514 | // Skip this. It's a global namespace. |
||
515 | $this->scanner->nextToken(); |
||
516 | $this->consumeWhitespace(); |
||
517 | } |
||
518 | |||
519 | $attrName = $this->scanner->getNameString(); |
||
520 | $this->consumeWhitespace(); |
||
521 | |||
522 | // Check for namespace attribute: ns|attr. We have to peek() to make |
||
523 | // sure that we haven't hit the |= operator, which looks the same. |
||
524 | if ($this->scanner->token === Token::PIPE && $this->scanner->peek() !== '=') { |
||
525 | // We have a namespaced attribute. |
||
526 | $ns = $attrName; |
||
527 | $this->scanner->nextToken(); |
||
528 | $attrName = $this->scanner->getNameString(); |
||
529 | $this->consumeWhitespace(); |
||
530 | } |
||
531 | |||
532 | // Note: We require that operators do not have spaces |
||
533 | // between characters, e.g. ~= , not ~ =. |
||
534 | |||
535 | // Get the operator: |
||
536 | switch ($this->scanner->token) { |
||
537 | case Token::EQ: |
||
538 | $this->consumeWhitespace(); |
||
539 | $op = EventHandler::IS_EXACTLY; |
||
540 | break; |
||
541 | case Token::TILDE: |
||
542 | if ($this->scanner->nextToken() !== Token::EQ) { |
||
543 | $this->throwError(Token::EQ, $this->scanner->token); |
||
544 | } |
||
545 | $op = EventHandler::CONTAINS_WITH_SPACE; |
||
546 | break; |
||
547 | case Token::PIPE: |
||
548 | if ($this->scanner->nextToken() !== Token::EQ) { |
||
549 | $this->throwError(Token::EQ, $this->scanner->token); |
||
550 | } |
||
551 | $op = EventHandler::CONTAINS_WITH_HYPHEN; |
||
552 | break; |
||
553 | case Token::STAR: |
||
554 | if ($this->scanner->nextToken() !== Token::EQ) { |
||
555 | $this->throwError(Token::EQ, $this->scanner->token); |
||
556 | } |
||
557 | $op = EventHandler::CONTAINS_IN_STRING; |
||
558 | break; |
||
559 | case Token::DOLLAR; |
||
560 | if ($this->scanner->nextToken() !== Token::EQ) { |
||
561 | $this->throwError(Token::EQ, $this->scanner->token); |
||
562 | } |
||
563 | $op = EventHandler::ENDS_WITH; |
||
564 | break; |
||
565 | case Token::CARAT: |
||
566 | if ($this->scanner->nextToken() !== Token::EQ) { |
||
567 | $this->throwError(Token::EQ, $this->scanner->token); |
||
568 | } |
||
569 | $op = EventHandler::BEGINS_WITH; |
||
570 | break; |
||
571 | } |
||
572 | |||
573 | if (isset($op)) { |
||
574 | // Consume '=' and go on. |
||
575 | $this->scanner->nextToken(); |
||
576 | $this->consumeWhitespace(); |
||
577 | |||
578 | // So... here we have a problem. The grammer suggests that the |
||
579 | // value here is String1 or String2, both of which are enclosed |
||
580 | // in quotes of some sort, and both of which allow lots of special |
||
581 | // characters. But the spec itself includes examples like this: |
||
582 | // [lang=fr] |
||
583 | // So some bareword support is assumed. To get around this, we assume |
||
584 | // that bare words follow the NAME rules, while quoted strings follow |
||
585 | // the String1/String2 rules. |
||
586 | |||
587 | if ($this->scanner->token === Token::QUOTE || $this->scanner->token === Token::SQUOTE) { |
||
588 | $attrVal = $this->scanner->getQuotedString(); |
||
589 | } else { |
||
590 | $attrVal = $this->scanner->getNameString(); |
||
591 | } |
||
592 | |||
593 | if ($this->DEBUG) { |
||
594 | print "ATTR: $attrVal AND OP: $op\n"; |
||
595 | } |
||
596 | } |
||
597 | |||
598 | $this->consumeWhitespace(); |
||
599 | |||
600 | if ($this->scanner->token !== Token::RSQUARE) { |
||
601 | $this->throwError(Token::RSQUARE, $this->scanner->token); |
||
602 | } |
||
603 | |||
604 | if (isset($ns)) { |
||
605 | $this->handler->attributeNS($attrName, $ns, $attrVal, $op); |
||
606 | } elseif (isset($attrVal)) { |
||
607 | $this->handler->attribute($attrName, $attrVal, $op); |
||
608 | } else { |
||
609 | $this->handler->attribute($attrName); |
||
610 | } |
||
611 | $this->scanner->nextToken(); |
||
612 | } |
||
613 | } |
||
614 | |||
615 | /** |
||
616 | * Utility for throwing a consistantly-formatted parse error. |
||
617 | */ |
||
618 | private function throwError($expected, $got) |
||
622 | } |
||
623 | |||
624 | /** |
||
625 | * @return Scanner |
||
626 | */ |
||
627 | public function getScanner(): Scanner |
||
628 | { |
||
630 | } |
||
631 | |||
632 | } |
||
633 | |||
634 |