Total Complexity | 104 |
Total Lines | 727 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
53 | class Parser |
||
54 | { |
||
55 | /** |
||
56 | * List of pragmas. |
||
57 | * |
||
58 | * @var array |
||
59 | */ |
||
60 | protected $_pragmas = null; |
||
61 | |||
62 | /** |
||
63 | * List of skipped tokens. |
||
64 | * |
||
65 | * @var array |
||
66 | */ |
||
67 | protected $_skip = null; |
||
68 | |||
69 | /** |
||
70 | * Associative array (token name => token regex), to be defined in |
||
71 | * precedence order. |
||
72 | * |
||
73 | * @var array |
||
74 | */ |
||
75 | protected $_tokens = null; |
||
76 | |||
77 | /** |
||
78 | * Rules, to be defined as associative array, name => Rule object. |
||
79 | * |
||
80 | * @var Rule[] |
||
81 | */ |
||
82 | protected $_rules = null; |
||
83 | |||
84 | /** |
||
85 | * Lexer iterator. |
||
86 | * |
||
87 | * @var Buffer |
||
88 | */ |
||
89 | protected $_tokenSequence = null; |
||
90 | |||
91 | /** |
||
92 | * Possible token causing an error. |
||
93 | * |
||
94 | * @var array |
||
95 | */ |
||
96 | protected $_errorToken = null; |
||
97 | |||
98 | /** |
||
99 | * Trace of activated rules. |
||
100 | * |
||
101 | * @var array |
||
102 | */ |
||
103 | protected $_trace = []; |
||
104 | |||
105 | /** |
||
106 | * Stack of todo list. |
||
107 | * |
||
108 | * @var array |
||
109 | */ |
||
110 | protected $_todo = null; |
||
111 | |||
112 | /** |
||
113 | * AST. |
||
114 | * |
||
115 | * @var TreeNode |
||
116 | */ |
||
117 | protected $_tree = null; |
||
118 | |||
119 | /** |
||
120 | * Current depth while building the trace. |
||
121 | * |
||
122 | * @var int |
||
123 | */ |
||
124 | protected $_depth = -1; |
||
125 | |||
126 | |||
127 | |||
128 | /** |
||
129 | * Construct the parser. |
||
130 | * |
||
131 | * @param array $tokens Tokens. |
||
132 | * @param array $rules Rules. |
||
133 | * @param array $pragmas Pragmas. |
||
134 | */ |
||
135 | public function __construct( |
||
136 | array $tokens = [], |
||
137 | array $rules = [], |
||
138 | array $pragmas = [] |
||
139 | ) { |
||
140 | $this->_tokens = $tokens; |
||
141 | $this->_rules = $rules; |
||
142 | $this->_pragmas = $pragmas; |
||
143 | |||
144 | return; |
||
145 | } |
||
146 | |||
147 | /** |
||
148 | * Parse :-). |
||
149 | * |
||
150 | * @param string $text Text to parse. |
||
151 | * @param string $rule The axiom, i.e. root rule. |
||
152 | * @param bool $tree Whether build tree or not. |
||
153 | * |
||
154 | * @return mixed |
||
155 | * |
||
156 | * @throws UnexpectedToken |
||
157 | */ |
||
158 | final public function parse(string $text, ?string $rule = null, bool $tree = true) |
||
159 | { |
||
160 | $k = 1024; |
||
161 | |||
162 | if (isset($this->_pragmas['parser.lookahead'])) { |
||
163 | $k = max(0, intval($this->_pragmas['parser.lookahead'])); |
||
164 | } |
||
165 | |||
166 | $lexer = new Lexer($this->_pragmas); |
||
167 | $this->_tokenSequence = new Iterator\Buffer( |
||
168 | $lexer->lexMe($text, $this->_tokens), |
||
169 | $k |
||
170 | ); |
||
171 | $this->_tokenSequence->rewind(); |
||
172 | |||
173 | $this->_errorToken = null; |
||
174 | $this->_trace = []; |
||
175 | $this->_todo = []; |
||
176 | |||
177 | if (false === array_key_exists($rule, $this->_rules)) { |
||
178 | $rule = $this->getRootRule(); |
||
179 | } |
||
180 | |||
181 | $closeRule = new Rule\Ekzit($rule, 0); |
||
182 | $openRule = new Rule\Entry($rule, 0, [$closeRule]); |
||
183 | $this->_todo = [$closeRule, $openRule]; |
||
184 | |||
185 | do { |
||
186 | $out = $this->unfold(); |
||
187 | |||
188 | if (null !== $out && |
||
189 | 'EOF' === $this->_tokenSequence->current()['token']) { |
||
190 | break; |
||
191 | } |
||
192 | |||
193 | if (false === $this->backtrack()) { |
||
194 | $token = $this->_errorToken; |
||
195 | |||
196 | if (null === $this->_errorToken) { |
||
197 | $token = $this->_tokenSequence->current(); |
||
198 | } |
||
199 | |||
200 | $offset = $token['offset']; |
||
201 | $line = 1; |
||
202 | $column = 1; |
||
203 | |||
204 | if (!empty($text)) { |
||
205 | if (0 === $offset) { |
||
206 | $leftnl = 0; |
||
207 | } else { |
||
208 | $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; |
||
209 | } |
||
210 | |||
211 | $rightnl = strpos($text, "\n", $offset); |
||
212 | $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; |
||
213 | $column = $offset - $leftnl + (0 === $leftnl); |
||
214 | |||
215 | if (false !== $rightnl) { |
||
216 | $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); |
||
217 | } |
||
218 | } |
||
219 | |||
220 | throw new Compiler\Exception\UnexpectedToken( |
||
221 | 'Unexpected token "%s" (%s) at line %d and column %d:' . |
||
222 | "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', |
||
223 | 0, |
||
224 | [ |
||
225 | $token['value'], |
||
226 | $token['token'], |
||
227 | $line, |
||
228 | $column, |
||
229 | $text, |
||
230 | ], |
||
231 | $line, |
||
232 | $column |
||
233 | ); |
||
234 | } |
||
235 | } while (true); |
||
236 | |||
237 | if (false === $tree) { |
||
238 | return true; |
||
239 | } |
||
240 | |||
241 | $tree = $this->_buildTree(); |
||
242 | |||
243 | if (!($tree instanceof TreeNode)) { |
||
244 | throw new Compiler\Exception( |
||
245 | 'Parsing error: cannot build AST, the trace is corrupted.', |
||
246 | 1 |
||
247 | ); |
||
248 | } |
||
249 | |||
250 | return $this->_tree = $tree; |
||
251 | } |
||
252 | |||
253 | /** |
||
254 | * Unfold trace. |
||
255 | * |
||
256 | * @return mixed |
||
257 | */ |
||
258 | final protected function unfold() |
||
259 | { |
||
260 | while (0 < count($this->_todo)) { |
||
261 | $rule = array_pop($this->_todo); |
||
262 | |||
263 | if ($rule instanceof Rule\Ekzit) { |
||
264 | $rule->setDepth($this->_depth); |
||
265 | $this->_trace[] = $rule; |
||
266 | |||
267 | if (false === $rule->isTransitional()) { |
||
268 | --$this->_depth; |
||
269 | } |
||
270 | } else { |
||
271 | $ruleName = $rule->getRule(); |
||
272 | $next = $rule->getData(); |
||
273 | $zeRule = $this->_rules[$ruleName]; |
||
274 | $out = $this->_parse($zeRule, $next); |
||
275 | |||
276 | if (false === $out && false === $this->backtrack()) { |
||
277 | return null; |
||
278 | } |
||
279 | } |
||
280 | } |
||
281 | |||
282 | return true; |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Parse current rule. |
||
287 | * |
||
288 | * @param Rule $zeRule Current rule. |
||
289 | * @param int $next Next rule index. |
||
290 | */ |
||
291 | final protected function _parse(Rule $zeRule, int $next): bool |
||
292 | { |
||
293 | $regex = null; |
||
294 | if ($zeRule instanceof Rule\Token) { |
||
295 | $name = $this->_tokenSequence->current()['token']; |
||
296 | |||
297 | if ($zeRule->getTokenName() !== $name) { |
||
298 | return false; |
||
299 | } |
||
300 | |||
301 | $value = $this->_tokenSequence->current()['value']; |
||
302 | |||
303 | if (0 <= $unification = $zeRule->getUnificationIndex()) { |
||
304 | for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) { |
||
305 | $trace = $this->_trace[$i]; |
||
306 | |||
307 | if ($trace instanceof Rule\Entry) { |
||
308 | if (false === $trace->isTransitional()) { |
||
309 | if ($trace->getDepth() <= $this->_depth) { |
||
310 | break; |
||
311 | } |
||
312 | |||
313 | --$skip; |
||
314 | } |
||
315 | } elseif ($trace instanceof Rule\Ekzit && |
||
316 | false === $trace->isTransitional()) { |
||
317 | $skip += $trace->getDepth() > $this->_depth; |
||
318 | } |
||
319 | |||
320 | if (0 < $skip) { |
||
321 | continue; |
||
322 | } |
||
323 | |||
324 | if ($trace instanceof Rule\Token && |
||
325 | $unification === $trace->getUnificationIndex() && |
||
326 | $value !== $trace->getValue()) { |
||
327 | return false; |
||
328 | } |
||
329 | } |
||
330 | } |
||
331 | |||
332 | $current = $this->_tokenSequence->current(); |
||
333 | |||
334 | $namespace = $current['namespace']; |
||
335 | $offset = $current['offset']; |
||
336 | |||
337 | $zzeRule = clone $zeRule; |
||
338 | $zzeRule->setValue($value); |
||
339 | $zzeRule->setOffset($offset); |
||
340 | $zzeRule->setNamespace($namespace); |
||
341 | |||
342 | if (isset($this->_tokens[$namespace][$name])) { |
||
343 | $zzeRule->setRepresentation($this->_tokens[$namespace][$name]); |
||
344 | } else { |
||
345 | foreach ($this->_tokens[$namespace] as $_name => $regex) { |
||
346 | if (false === $pos = strpos($_name, ':')) { |
||
347 | continue; |
||
348 | } |
||
349 | |||
350 | $_name = substr($_name, 0, $pos); |
||
351 | |||
352 | if ($_name === $name) { |
||
353 | break; |
||
354 | } |
||
355 | } |
||
356 | |||
357 | $zzeRule->setRepresentation($regex); |
||
358 | } |
||
359 | |||
360 | array_pop($this->_todo); |
||
361 | $this->_trace[] = $zzeRule; |
||
362 | $this->_tokenSequence->next(); |
||
363 | $this->_errorToken = $this->_tokenSequence->current(); |
||
364 | |||
365 | return true; |
||
366 | } elseif ($zeRule instanceof Rule\Concatenation) { |
||
367 | if (false === $zeRule->isTransitional()) { |
||
368 | ++$this->_depth; |
||
369 | } |
||
370 | |||
371 | $this->_trace[] = new Rule\Entry( |
||
372 | $zeRule->getName(), |
||
373 | 0, |
||
374 | null, |
||
375 | $this->_depth |
||
376 | ); |
||
377 | $children = $zeRule->getChildren(); |
||
378 | |||
379 | for ($i = count($children) - 1; $i >= 0; --$i) { |
||
380 | $nextRule = $children[$i]; |
||
381 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
382 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
383 | } |
||
384 | |||
385 | return true; |
||
386 | } elseif ($zeRule instanceof Rule\Choice) { |
||
387 | $children = $zeRule->getChildren(); |
||
388 | |||
389 | if ($next >= count($children)) { |
||
390 | return false; |
||
391 | } |
||
392 | |||
393 | if (false === $zeRule->isTransitional()) { |
||
394 | ++$this->_depth; |
||
395 | } |
||
396 | |||
397 | $this->_trace[] = new Rule\Entry( |
||
398 | $zeRule->getName(), |
||
399 | $next, |
||
400 | $this->_todo, |
||
401 | $this->_depth |
||
402 | ); |
||
403 | $nextRule = $children[$next]; |
||
404 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
405 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
406 | |||
407 | return true; |
||
408 | } elseif ($zeRule instanceof Rule\Repetition) { |
||
409 | $nextRule = $zeRule->getChildren(); |
||
410 | |||
411 | if (0 === $next) { |
||
412 | $name = $zeRule->getName(); |
||
413 | $min = $zeRule->getMin(); |
||
414 | |||
415 | if (false === $zeRule->isTransitional()) { |
||
416 | ++$this->_depth; |
||
417 | } |
||
418 | |||
419 | $this->_trace[] = new Rule\Entry( |
||
420 | $name, |
||
421 | $min, |
||
422 | null, |
||
423 | $this->_depth |
||
424 | ); |
||
425 | array_pop($this->_todo); |
||
426 | $this->_todo[] = new Rule\Ekzit( |
||
427 | $name, |
||
428 | $min, |
||
429 | $this->_todo |
||
430 | ); |
||
431 | |||
432 | for ($i = 0; $i < $min; ++$i) { |
||
433 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
434 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
435 | } |
||
436 | |||
437 | return true; |
||
438 | } else { |
||
439 | $max = $zeRule->getMax(); |
||
440 | |||
441 | if (-1 !== $max && $next > $max) { |
||
442 | return false; |
||
443 | } |
||
444 | |||
445 | $this->_todo[] = new Rule\Ekzit( |
||
446 | $zeRule->getName(), |
||
447 | $next, |
||
448 | $this->_todo |
||
449 | ); |
||
450 | $this->_todo[] = new Rule\Ekzit($nextRule, 0); |
||
451 | $this->_todo[] = new Rule\Entry($nextRule, 0); |
||
452 | |||
453 | return true; |
||
454 | } |
||
455 | } |
||
456 | |||
457 | return false; |
||
458 | } |
||
459 | |||
460 | /** |
||
461 | * Backtrack the trace. |
||
462 | */ |
||
463 | final protected function backtrack(): bool |
||
464 | { |
||
465 | $found = false; |
||
466 | |||
467 | do { |
||
468 | $last = array_pop($this->_trace); |
||
469 | |||
470 | if ($last instanceof Rule\Entry) { |
||
471 | $zeRule = $this->_rules[$last->getRule()]; |
||
472 | $found = $zeRule instanceof Rule\Choice; |
||
473 | } elseif ($last instanceof Rule\Ekzit) { |
||
474 | $zeRule = $this->_rules[$last->getRule()]; |
||
475 | $found = $zeRule instanceof Rule\Repetition; |
||
476 | } elseif ($last instanceof Rule\Token) { |
||
477 | $this->_tokenSequence->previous(); |
||
478 | |||
479 | if (false === $this->_tokenSequence->valid()) { |
||
480 | return false; |
||
481 | } |
||
482 | } |
||
483 | } while (0 < count($this->_trace) && false === $found); |
||
484 | |||
485 | if (false === $found) { |
||
486 | return false; |
||
487 | } |
||
488 | |||
489 | $rule = $last->getRule(); |
||
490 | $next = $last->getData() + 1; |
||
491 | $this->_depth = $last->getDepth(); |
||
492 | $this->_todo = $last->getTodo(); |
||
493 | $this->_todo[] = new Rule\Entry($rule, $next); |
||
494 | |||
495 | return true; |
||
496 | } |
||
497 | |||
498 | /** |
||
499 | * Build AST from trace. |
||
500 | * Walk through the trace iteratively and recursively. |
||
501 | * |
||
502 | * @param int $i Current trace index. |
||
503 | * @param array &$children Collected children. |
||
504 | * |
||
505 | * @return TreeNode|int |
||
506 | */ |
||
507 | final protected function _buildTree(int $i = 0, array &$children = []) |
||
508 | { |
||
509 | $max = count($this->_trace); |
||
510 | |||
511 | while ($i < $max) { |
||
512 | $trace = $this->_trace[$i]; |
||
513 | |||
514 | if ($trace instanceof Rule\Entry) { |
||
515 | $ruleName = $trace->getRule(); |
||
516 | $rule = $this->_rules[$ruleName]; |
||
517 | $isRule = false === $trace->isTransitional(); |
||
518 | $nextTrace = $this->_trace[$i + 1]; |
||
519 | $id = $rule->getNodeId(); |
||
520 | |||
521 | // Optimization: Skip empty trace sequence. |
||
522 | if ($nextTrace instanceof Rule\Ekzit && |
||
523 | $ruleName === $nextTrace->getRule()) { |
||
524 | $i += 2; |
||
525 | |||
526 | continue; |
||
527 | } |
||
528 | |||
529 | if (true === $isRule) { |
||
530 | $children[] = $ruleName; |
||
531 | } |
||
532 | |||
533 | if (null !== $id) { |
||
534 | $children[] = [ |
||
535 | 'id' => $id, |
||
536 | 'options' => $rule->getNodeOptions(), |
||
537 | ]; |
||
538 | } |
||
539 | |||
540 | $i = $this->_buildTree($i + 1, $children); |
||
541 | |||
542 | if (false === $isRule) { |
||
543 | continue; |
||
544 | } |
||
545 | |||
546 | $handle = []; |
||
547 | $cId = null; |
||
548 | $cOptions = []; |
||
549 | |||
550 | do { |
||
551 | $pop = array_pop($children); |
||
552 | |||
553 | if (true === is_object($pop)) { |
||
554 | $handle[] = $pop; |
||
555 | } elseif (true === is_array($pop) && null === $cId) { |
||
556 | $cId = $pop['id']; |
||
557 | $cOptions = $pop['options']; |
||
558 | } elseif ($ruleName === $pop) { |
||
559 | break; |
||
560 | } |
||
561 | } while (null !== $pop); |
||
562 | |||
563 | if (null === $cId) { |
||
564 | $cId = $rule->getDefaultId(); |
||
565 | $cOptions = $rule->getDefaultOptions(); |
||
566 | } |
||
567 | |||
568 | if (null === $cId) { |
||
569 | for ($j = count($handle) - 1; $j >= 0; --$j) { |
||
570 | $children[] = $handle[$j]; |
||
571 | } |
||
572 | |||
573 | continue; |
||
574 | } |
||
575 | |||
576 | if (true === in_array('M', $cOptions) && |
||
577 | true === $this->mergeTree($children, $handle, $cId)) { |
||
578 | continue; |
||
579 | } |
||
580 | |||
581 | if (true === in_array('m', $cOptions) && |
||
582 | true === $this->mergeTree($children, $handle, $cId, true)) { |
||
583 | continue; |
||
584 | } |
||
585 | |||
586 | $cTree = new TreeNode($id ?: $cId); |
||
587 | |||
588 | foreach ($handle as $child) { |
||
589 | $child->setParent($cTree); |
||
590 | $cTree->prependChild($child); |
||
591 | } |
||
592 | |||
593 | $children[] = $cTree; |
||
594 | } elseif ($trace instanceof Rule\Ekzit) { |
||
595 | return $i + 1; |
||
596 | } else { |
||
597 | if (false === $trace->isKept()) { |
||
598 | ++$i; |
||
599 | |||
600 | continue; |
||
601 | } |
||
602 | |||
603 | $child = new TreeNode('token', [ |
||
604 | 'token' => $trace->getTokenName(), |
||
605 | 'value' => $trace->getValue(), |
||
606 | 'namespace' => $trace->getNamespace(), |
||
607 | 'offset' => $trace->getOffset(), |
||
608 | ]); |
||
609 | |||
610 | $children[] = $child; |
||
611 | ++$i; |
||
612 | } |
||
613 | } |
||
614 | |||
615 | return $children[0]; |
||
616 | } |
||
617 | |||
618 | /** |
||
619 | * Try to merge directly children into an existing node. |
||
620 | * |
||
621 | * @param array &$children Current children being gathering. |
||
622 | * @param array &$handle Children of the new node. |
||
623 | * @param string $cId Node ID. |
||
624 | * @param bool $recursive Whether we should merge recursively or |
||
625 | * not. |
||
626 | */ |
||
627 | final protected function mergeTree( |
||
658 | } |
||
659 | |||
660 | /** |
||
661 | * Merge recursively. |
||
662 | * Please, see self::mergeTree() to know the context. |
||
663 | * |
||
664 | * @param TreeNode $node Node that receives. |
||
665 | * @param TreeNode $newNode Node to merge. |
||
666 | */ |
||
667 | final protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode): void |
||
668 | { |
||
669 | $nNId = $newNode->getId(); |
||
670 | |||
671 | if ('token' === $nNId) { |
||
672 | $node->appendChild($newNode); |
||
673 | $newNode->setParent($node); |
||
674 | |||
675 | return; |
||
676 | } |
||
677 | |||
678 | $children = $node->getChildren(); |
||
679 | end($children); |
||
680 | $last = current($children); |
||
681 | |||
682 | if ($last->getId() !== $nNId) { |
||
683 | $node->appendChild($newNode); |
||
684 | $newNode->setParent($node); |
||
685 | |||
686 | return; |
||
687 | } |
||
688 | |||
689 | foreach ($newNode->getChildren() as $child) { |
||
690 | $this->mergeTreeRecursive($last, $child); |
||
691 | } |
||
692 | |||
693 | return; |
||
694 | } |
||
695 | |||
696 | /** |
||
697 | * Get AST. |
||
698 | */ |
||
699 | final public function getTree(): TreeNode |
||
700 | { |
||
701 | return $this->_tree; |
||
702 | } |
||
703 | |||
704 | /** |
||
705 | * Get trace. |
||
706 | * |
||
707 | * @return array |
||
708 | */ |
||
709 | final public function getTrace(): array |
||
710 | { |
||
711 | return $this->_trace; |
||
712 | } |
||
713 | |||
714 | /** |
||
715 | * Get pragmas. |
||
716 | * |
||
717 | * @return array |
||
718 | */ |
||
719 | final public function getPragmas(): array |
||
720 | { |
||
721 | return $this->_pragmas; |
||
722 | } |
||
723 | |||
724 | /** |
||
725 | * Get tokens. |
||
726 | * |
||
727 | * @return array |
||
728 | */ |
||
729 | final public function getTokens(): array |
||
730 | { |
||
731 | return $this->_tokens; |
||
732 | } |
||
733 | |||
734 | /** |
||
735 | * Get the lexer iterator. |
||
736 | */ |
||
737 | final public function getTokenSequence(): Buffer |
||
738 | { |
||
739 | return $this->_tokenSequence; |
||
740 | } |
||
741 | |||
742 | /** |
||
743 | * Get rule by name. |
||
744 | * |
||
745 | * @param string $name Rule name. |
||
746 | */ |
||
747 | final public function getRule(string $name): Rule |
||
748 | { |
||
749 | if (!isset($this->_rules[$name])) { |
||
750 | return null; |
||
|
|||
751 | } |
||
752 | |||
753 | return $this->_rules[$name]; |
||
754 | } |
||
755 | |||
756 | /** |
||
757 | * Get rules. |
||
758 | * |
||
759 | * @return Rule[] |
||
760 | */ |
||
761 | final public function getRules(): array |
||
764 | } |
||
765 | |||
766 | /** |
||
767 | * Get root rule. |
||
768 | */ |
||
769 | final public function getRootRule(): string |
||
770 | { |
||
780 | } |
||
781 | } |
||
782 |