Total Complexity | 104 |
Total Lines | 725 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
53 | class Parser |
||
54 | { |
||
55 | /** |
||
56 | * List of pragmas. |
||
57 | * |
||
58 | * @var array |
||
59 | */ |
||
60 | protected $_pragmas = null; |
||
61 | |||
62 | /** |
||
63 | * List of skipped tokens. |
||
64 | * |
||
65 | * @var array |
||
66 | */ |
||
67 | protected $_skip = null; |
||
68 | |||
69 | /** |
||
70 | * Associative array (token name => token regex), to be defined in |
||
71 | * precedence order. |
||
72 | * |
||
73 | * @var array |
||
74 | */ |
||
75 | protected $_tokens = null; |
||
76 | |||
77 | /** |
||
78 | * Rules, to be defined as associative array, name => Rule object. |
||
79 | * |
||
80 | * @var Rule[] |
||
81 | */ |
||
82 | protected $_rules = null; |
||
83 | |||
84 | /** |
||
85 | * Lexer iterator. |
||
86 | * |
||
87 | * @var Buffer |
||
88 | */ |
||
89 | protected $_tokenSequence = null; |
||
90 | |||
91 | /** |
||
92 | * Possible token causing an error. |
||
93 | * |
||
94 | * @var array |
||
95 | */ |
||
96 | protected $_errorToken = null; |
||
97 | |||
98 | /** |
||
99 | * Trace of activated rules. |
||
100 | * |
||
101 | * @var array |
||
102 | */ |
||
103 | protected $_trace = []; |
||
104 | |||
105 | /** |
||
106 | * Stack of todo list. |
||
107 | * |
||
108 | * @var array |
||
109 | */ |
||
110 | protected $_todo = null; |
||
111 | |||
112 | /** |
||
113 | * AST. |
||
114 | * |
||
115 | * @var TreeNode |
||
116 | */ |
||
117 | protected $_tree = null; |
||
118 | |||
119 | /** |
||
120 | * Current depth while building the trace. |
||
121 | * |
||
122 | * @var int |
||
123 | */ |
||
124 | protected $_depth = -1; |
||
125 | |||
126 | |||
127 | |||
128 | /** |
||
129 | * Construct the parser. |
||
130 | * |
||
131 | * @param array $tokens Tokens. |
||
132 | * @param array $rules Rules. |
||
133 | * @param array $pragmas Pragmas. |
||
134 | */ |
||
135 | public function __construct( |
||
136 | array $tokens = [], |
||
137 | array $rules = [], |
||
138 | array $pragmas = [] |
||
139 | ) { |
||
140 | $this->_tokens = $tokens; |
||
141 | $this->_rules = $rules; |
||
142 | $this->_pragmas = $pragmas; |
||
143 | |||
144 | return; |
||
145 | } |
||
146 | |||
147 | /** |
||
148 | * Parse :-). |
||
149 | * |
||
150 | * @param string $text Text to parse. |
||
151 | * @param string $rule The axiom, i.e. root rule. |
||
152 | * @param bool $tree Whether build tree or not. |
||
153 | * |
||
154 | * @return mixed |
||
155 | * |
||
156 | * @throws UnexpectedToken |
||
157 | */ |
||
158 | final public function parse(string $text, ?string $rule = null, bool $tree = true) |
||
159 | { |
||
160 | $k = 1024; |
||
161 | |||
162 | if (isset($this->_pragmas['parser.lookahead'])) { |
||
163 | $k = max(0, intval($this->_pragmas['parser.lookahead'])); |
||
164 | } |
||
165 | |||
166 | $lexer = new Lexer($this->_pragmas); |
||
167 | $this->_tokenSequence = new Iterator\Buffer( |
||
168 | $lexer->lexMe($text, $this->_tokens), |
||
169 | $k |
||
170 | ); |
||
171 | $this->_tokenSequence->rewind(); |
||
172 | |||
173 | $this->_errorToken = null; |
||
174 | $this->_trace = []; |
||
175 | $this->_todo = []; |
||
176 | |||
177 | if (false === array_key_exists($rule, $this->_rules)) { |
||
178 | $rule = $this->getRootRule(); |
||
179 | } |
||
180 | |||
181 | $closeRule = new Rule\Ekzit($rule, 0); |
||
182 | $openRule = new Rule\Entry($rule, 0, [$closeRule]); |
||
183 | $this->_todo = [$closeRule, $openRule]; |
||
184 | |||
185 | do { |
||
186 | $out = $this->unfold(); |
||
187 | |||
188 | if (null !== $out && |
||
189 | 'EOF' === $this->_tokenSequence->current()['token']) { |
||
190 | break; |
||
191 | } |
||
192 | |||
193 | if (false === $this->backtrack()) { |
||
194 | $token = $this->_errorToken; |
||
195 | |||
196 | if (null === $this->_errorToken) { |
||
197 | $token = $this->_tokenSequence->current(); |
||
198 | } |
||
199 | |||
200 | $offset = $token['offset']; |
||
201 | $line = 1; |
||
202 | $column = 1; |
||
203 | |||
204 | if (!empty($text)) { |
||
205 | if (0 === $offset) { |
||
206 | $leftnl = 0; |
||
207 | } else { |
||
208 | $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; |
||
209 | } |
||
210 | |||
211 | $rightnl = strpos($text, "\n", $offset); |
||
212 | $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; |
||
213 | $column = $offset - $leftnl + (0 === $leftnl); |
||
214 | |||
215 | if (false !== $rightnl) { |
||
216 | $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); |
||
217 | } |
||
218 | } |
||
219 | |||
220 | throw new Compiler\Exception\UnexpectedToken( |
||
221 | 'Unexpected token "%s" (%s) at line %d and column %d:' . |
||
222 | "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', |
||
223 | 0, |
||
224 | [ |
||
225 | $token['value'], |
||
226 | $token['token'], |
||
227 | $line, |
||
228 | $column, |
||
229 | $text, |
||
230 | ], |
||
231 | $line, |
||
232 | $column |
||
233 | ); |
||
234 | } |
||
235 | } while (true); |
||
236 | |||
237 | if (false === $tree) { |
||
238 | return true; |
||
239 | } |
||
240 | |||
241 | $tree = $this->_buildTree(); |
||
242 | |||
243 | if (!($tree instanceof TreeNode)) { |
||
244 | throw new Compiler\Exception( |
||
245 | 'Parsing error: cannot build AST, the trace is corrupted.', |
||
246 | 1 |
||
247 | ); |
||
248 | } |
||
249 | |||
250 | return $this->_tree = $tree; |
||
251 | } |
||
252 | |||
253 | /** |
||
254 | * Unfold trace. |
||
255 | * |
||
256 | * @return mixed |
||
257 | */ |
||
258 | final protected function unfold() |
||
259 | { |
||
260 | while (0 < count($this->_todo)) { |
||
261 | $rule = array_pop($this->_todo); |
||
262 | |||
263 | if ($rule instanceof Rule\Ekzit) { |
||
264 | $rule->setDepth($this->_depth); |
||
265 | $this->_trace[] = $rule; |
||
266 | |||
267 | if (false === $rule->isTransitional()) { |
||
268 | --$this->_depth; |
||
269 | } |
||
270 | } else { |
||
271 | $ruleName = $rule->getRule(); |
||
272 | $next = $rule->getData(); |
||
273 | $zeRule = $this->_rules[$ruleName]; |
||
274 | $out = $this->_parse($zeRule, $next); |
||
275 | |||
276 | if (false === $out && false === $this->backtrack()) { |
||
277 | return null; |
||
278 | } |
||
279 | } |
||
280 | } |
||
281 | |||
282 | return true; |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Parse current rule. |
||
287 | * |
||
288 | * @param Rule $zeRule Current rule. |
||
289 | * @param int $next Next rule index. |
||
290 | */ |
||
291 | final protected function _parse(Rule $zeRule, int $next): bool |
||
292 | { |
||
293 | $regex = null; |
||
294 | if ($zeRule instanceof Rule\Token) { |
||
295 | $name = $this->_tokenSequence->current()['token']; |
||
296 | |||
297 | if ($zeRule->getTokenName() !== $name) { |
||
298 | return false; |
||
299 | } |
||
300 | |||
301 | $value = $this->_tokenSequence->current()['value']; |
||
302 | |||
303 | if (0 <= $unification = $zeRule->getUnificationIndex()) { |
||
304 | for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) { |
||
305 | $trace = $this->_trace[$i]; |
||
306 | |||
307 | if ($trace instanceof Rule\Entry) { |
||
308 | if (false === $trace->isTransitional()) { |
||
309 | if ($trace->getDepth() <= $this->_depth) { |
||
310 | break; |
||
311 | } |
||
312 | |||
313 | --$skip; |
||
314 | } |
||
315 | } elseif ($trace instanceof Rule\Ekzit && |
||
316 | false === $trace->isTransitional()) { |
||
317 | $skip += $trace->getDepth() > $this->_depth; |
||
318 | } |
||
319 | |||
320 | if (0 < $skip) { |
||
321 | continue; |
||
322 | } |
||
323 | |||
324 | if ($trace instanceof Rule\Token && |
||
325 | $unification === $trace->getUnificationIndex() && |
||
326 | $value !== $trace->getValue()) { |
||
327 | return false; |
||
328 | } |
||
329 | } |
||
330 | } |
||
331 | |||
332 | $current = $this->_tokenSequence->current(); |
||
333 | |||
334 | $namespace = $current['namespace']; |
||
335 | $offset = $current['offset']; |
||
336 | |||
337 | $zzeRule = clone $zeRule; |
||
338 | $zzeRule->setValue($value); |
||
339 | $zzeRule->setOffset($offset); |
||
340 | $zzeRule->setNamespace($namespace); |
||
341 | |||
342 | if (isset($this->_tokens[$namespace][$name])) { |
||
343 | $zzeRule->setRepresentation($this->_tokens[$namespace][$name]); |
||
344 | } else { |
||
345 | foreach ($this->_tokens[$namespace] as $_name => $regex) { |
||
346 | if (false === $pos = strpos($_name, ':')) { |
||
347 | continue; |
||
348 | } |
||
349 | |||
350 | $_name = substr($_name, 0, $pos); |
||
351 | |||
352 | if ($_name === $name) { |
||
353 | break; |
||
354 | } |
||
355 | } |
||
356 | |||
357 | $zzeRule->setRepresentation($regex); |
||
358 | } |
||
359 | |||
360 | array_pop($this->_todo); |
||
361 | $this->_trace[] = $zzeRule; |
||
362 | $this->_tokenSequence->next(); |
||
363 | $this->_errorToken = $this->_tokenSequence->current(); |
||
364 | |||
365 | return true; |
||
366 | } elseif ($zeRule instanceof Rule\Concatenation) { |
||
367 | if (false === $zeRule->isTransitional()) { |
||
368 | ++$this->_depth; |
||
369 | } |
||
370 | |||
371 | $this->_trace[] = new Rule\Entry( |
||
372 | $zeRule->getName(), |
||
373 | 0, |
||
374 | null, |
||
375 | $this->_depth |
||
376 | ); |
||
377 | $children = $zeRule->getChildren(); |
||
378 | |||
379 | for ($i = count($children) - 1; $i >= 0; --$i) { |
||
380 | $nextRule = $children[$i]; |
||
381 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
382 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
383 | } |
||
384 | |||
385 | return true; |
||
386 | } elseif ($zeRule instanceof Rule\Choice) { |
||
387 | $children = $zeRule->getChildren(); |
||
388 | |||
389 | if ($next >= count($children)) { |
||
390 | return false; |
||
391 | } |
||
392 | |||
393 | if (false === $zeRule->isTransitional()) { |
||
394 | ++$this->_depth; |
||
395 | } |
||
396 | |||
397 | $this->_trace[] = new Rule\Entry( |
||
398 | $zeRule->getName(), |
||
399 | $next, |
||
400 | $this->_todo, |
||
401 | $this->_depth |
||
402 | ); |
||
403 | $nextRule = $children[$next]; |
||
404 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
405 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
406 | |||
407 | return true; |
||
408 | } elseif ($zeRule instanceof Rule\Repetition) { |
||
409 | $nextRule = $zeRule->getChildren(); |
||
410 | |||
411 | if (0 === $next) { |
||
412 | $name = $zeRule->getName(); |
||
413 | $min = $zeRule->getMin(); |
||
414 | |||
415 | if (false === $zeRule->isTransitional()) { |
||
416 | ++$this->_depth; |
||
417 | } |
||
418 | |||
419 | $this->_trace[] = new Rule\Entry( |
||
420 | $name, |
||
421 | $min, |
||
422 | null, |
||
423 | $this->_depth |
||
424 | ); |
||
425 | array_pop($this->_todo); |
||
426 | $this->_todo[] = new Rule\Ekzit( |
||
427 | $name, |
||
428 | $min, |
||
429 | $this->_todo |
||
430 | ); |
||
431 | |||
432 | for ($i = 0; $i < $min; ++$i) { |
||
433 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
434 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
435 | } |
||
436 | |||
437 | return true; |
||
438 | } else { |
||
439 | $max = $zeRule->getMax(); |
||
440 | |||
441 | if (-1 !== $max && $next > $max) { |
||
442 | return false; |
||
443 | } |
||
444 | |||
445 | $this->_todo[] = new Rule\Ekzit( |
||
446 | $zeRule->getName(), |
||
447 | $next, |
||
448 | $this->_todo |
||
449 | ); |
||
450 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
451 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
452 | |||
453 | return true; |
||
454 | } |
||
455 | } |
||
456 | |||
457 | return false; |
||
458 | } |
||
459 | |||
460 | /** |
||
461 | * Backtrack the trace. |
||
462 | */ |
||
463 | final protected function backtrack(): bool |
||
496 | } |
||
497 | |||
498 | /** |
||
499 | * Build AST from trace. |
||
500 | * Walk through the trace iteratively and recursively. |
||
501 | * |
||
502 | * @param int $i Current trace index. |
||
503 | * @param array &$children Collected children. |
||
504 | */ |
||
505 | final protected function _buildTree(int $i = 0, array &$children = []) |
||
506 | { |
||
507 | $max = count($this->_trace); |
||
508 | |||
509 | while ($i < $max) { |
||
510 | $trace = $this->_trace[$i]; |
||
511 | |||
512 | if ($trace instanceof Rule\Entry) { |
||
513 | $ruleName = $trace->getRule(); |
||
514 | $rule = $this->_rules[$ruleName]; |
||
515 | $isRule = false === $trace->isTransitional(); |
||
516 | $nextTrace = $this->_trace[$i + 1]; |
||
517 | $id = $rule->getNodeId(); |
||
518 | |||
519 | // Optimization: Skip empty trace sequence. |
||
520 | if ($nextTrace instanceof Rule\Ekzit && |
||
521 | $ruleName === $nextTrace->getRule()) { |
||
522 | $i += 2; |
||
523 | |||
524 | continue; |
||
525 | } |
||
526 | |||
527 | if (true === $isRule) { |
||
528 | $children[] = $ruleName; |
||
529 | } |
||
530 | |||
531 | if (null !== $id) { |
||
532 | $children[] = [ |
||
533 | 'id' => $id, |
||
534 | 'options' => $rule->getNodeOptions(), |
||
535 | ]; |
||
536 | } |
||
537 | |||
538 | $i = $this->_buildTree($i + 1, $children); |
||
539 | |||
540 | if (false === $isRule) { |
||
541 | continue; |
||
542 | } |
||
543 | |||
544 | $handle = []; |
||
545 | $cId = null; |
||
546 | $cOptions = []; |
||
547 | |||
548 | do { |
||
549 | $pop = array_pop($children); |
||
550 | |||
551 | if (true === is_object($pop)) { |
||
552 | $handle[] = $pop; |
||
553 | } elseif (true === is_array($pop) && null === $cId) { |
||
554 | $cId = $pop['id']; |
||
555 | $cOptions = $pop['options']; |
||
556 | } elseif ($ruleName === $pop) { |
||
557 | break; |
||
558 | } |
||
559 | } while (null !== $pop); |
||
560 | |||
561 | if (null === $cId) { |
||
562 | $cId = $rule->getDefaultId(); |
||
563 | $cOptions = $rule->getDefaultOptions(); |
||
564 | } |
||
565 | |||
566 | if (null === $cId) { |
||
567 | for ($j = count($handle) - 1; $j >= 0; --$j) { |
||
568 | $children[] = $handle[$j]; |
||
569 | } |
||
570 | |||
571 | continue; |
||
572 | } |
||
573 | |||
574 | if (true === in_array('M', $cOptions) && |
||
575 | true === $this->mergeTree($children, $handle, $cId)) { |
||
576 | continue; |
||
577 | } |
||
578 | |||
579 | if (true === in_array('m', $cOptions) && |
||
580 | true === $this->mergeTree($children, $handle, $cId, true)) { |
||
581 | continue; |
||
582 | } |
||
583 | |||
584 | $cTree = new TreeNode($id ?: $cId); |
||
585 | |||
586 | foreach ($handle as $child) { |
||
587 | $child->setParent($cTree); |
||
588 | $cTree->prependChild($child); |
||
589 | } |
||
590 | |||
591 | $children[] = $cTree; |
||
592 | } elseif ($trace instanceof Rule\Ekzit) { |
||
593 | return $i + 1; |
||
594 | } else { |
||
595 | if (false === $trace->isKept()) { |
||
596 | ++$i; |
||
597 | |||
598 | continue; |
||
599 | } |
||
600 | |||
601 | $child = new TreeNode('token', [ |
||
602 | 'token' => $trace->getTokenName(), |
||
603 | 'value' => $trace->getValue(), |
||
604 | 'namespace' => $trace->getNamespace(), |
||
605 | 'offset' => $trace->getOffset(), |
||
606 | ]); |
||
607 | |||
608 | $children[] = $child; |
||
609 | ++$i; |
||
610 | } |
||
611 | } |
||
612 | |||
613 | return $children[0]; |
||
614 | } |
||
615 | |||
616 | /** |
||
617 | * Try to merge directly children into an existing node. |
||
618 | * |
||
619 | * @param array &$children Current children being gathering. |
||
620 | * @param array &$handle Children of the new node. |
||
621 | * @param string $cId Node ID. |
||
622 | * @param bool $recursive Whether we should merge recursively or |
||
623 | * not. |
||
624 | */ |
||
625 | final protected function mergeTree( |
||
656 | } |
||
657 | |||
658 | /** |
||
659 | * Merge recursively. |
||
660 | * Please, see self::mergeTree() to know the context. |
||
661 | * |
||
662 | * @param TreeNode $node Node that receives. |
||
663 | * @param TreeNode $newNode Node to merge. |
||
664 | */ |
||
665 | final protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode): void |
||
666 | { |
||
667 | $nNId = $newNode->getId(); |
||
668 | |||
669 | if ('token' === $nNId) { |
||
670 | $node->appendChild($newNode); |
||
671 | $newNode->setParent($node); |
||
672 | |||
673 | return; |
||
674 | } |
||
675 | |||
676 | $children = $node->getChildren(); |
||
677 | end($children); |
||
678 | $last = current($children); |
||
679 | |||
680 | if ($last->getId() !== $nNId) { |
||
681 | $node->appendChild($newNode); |
||
682 | $newNode->setParent($node); |
||
683 | |||
684 | return; |
||
685 | } |
||
686 | |||
687 | foreach ($newNode->getChildren() as $child) { |
||
688 | $this->mergeTreeRecursive($last, $child); |
||
689 | } |
||
690 | |||
691 | return; |
||
692 | } |
||
693 | |||
694 | /** |
||
695 | * Get AST. |
||
696 | */ |
||
697 | final public function getTree(): TreeNode |
||
698 | { |
||
699 | return $this->_tree; |
||
700 | } |
||
701 | |||
702 | /** |
||
703 | * Get trace. |
||
704 | * |
||
705 | * @return array |
||
706 | */ |
||
707 | final public function getTrace(): array |
||
708 | { |
||
709 | return $this->_trace; |
||
710 | } |
||
711 | |||
712 | /** |
||
713 | * Get pragmas. |
||
714 | * |
||
715 | * @return array |
||
716 | */ |
||
717 | final public function getPragmas(): array |
||
718 | { |
||
719 | return $this->_pragmas; |
||
720 | } |
||
721 | |||
722 | /** |
||
723 | * Get tokens. |
||
724 | * |
||
725 | * @return array |
||
726 | */ |
||
727 | final public function getTokens(): array |
||
728 | { |
||
729 | return $this->_tokens; |
||
730 | } |
||
731 | |||
732 | /** |
||
733 | * Get the lexer iterator. |
||
734 | */ |
||
735 | final public function getTokenSequence(): Buffer |
||
736 | { |
||
737 | return $this->_tokenSequence; |
||
738 | } |
||
739 | |||
740 | /** |
||
741 | * Get rule by name. |
||
742 | * |
||
743 | * @param string $name Rule name. |
||
744 | */ |
||
745 | final public function getRule(string $name): Rule |
||
746 | { |
||
747 | if (!isset($this->_rules[$name])) { |
||
748 | return null; |
||
|
|||
749 | } |
||
750 | |||
751 | return $this->_rules[$name]; |
||
752 | } |
||
753 | |||
754 | /** |
||
755 | * Get rules. |
||
756 | * |
||
757 | * @return Rule[] |
||
758 | */ |
||
759 | final public function getRules() |
||
762 | } |
||
763 | |||
764 | /** |
||
765 | * Get root rule. |
||
766 | */ |
||
767 | final public function getRootRule(): string |
||
768 | { |
||
769 | \assert([] !== $this->_rules); |
||
778 | } |
||
779 | } |
||
780 |