Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
43 | class Parser |
||
44 | { |
||
45 | /** @type string BLOCK_SCALAR_HEADER_PATTERN */ |
||
46 | const BLOCK_SCALAR_HEADER_PATTERN = |
||
47 | "(?P<separator>\\||>)(?P<modifiers>\\+|\\-|\\d+|\\+\\d+|\\-\\d+|\\d+\\+|\\d+\\-)?(?P<comments> +#.*)?"; |
||
48 | |||
49 | |||
50 | /** @type int $offset */ |
||
51 | protected $offset = 0; |
||
52 | /** @type array $lines */ |
||
53 | protected $lines = []; |
||
54 | /** @type int $currentLineNb */ |
||
55 | protected $currentLineNb = -1; |
||
56 | /** @type string $currentLine */ |
||
57 | protected $currentLine = ""; |
||
58 | /** @type array $refs */ |
||
59 | protected $refs = []; |
||
60 | |||
61 | |||
62 | /** |
||
63 | * Constructor |
||
64 | * |
||
65 | * @param int $offset The offset of YAML document (used for line numbers in error messages) |
||
66 | * |
||
67 | * @return Parser |
||
|
|||
68 | */ |
||
69 | public function __construct($offset = 0) |
||
73 | |||
74 | /** |
||
75 | * Parses a YAML string to a PHP value |
||
76 | * |
||
77 | * @param string $value A YAML string |
||
78 | * |
||
79 | * @throws ParseException If the YAML is not valid |
||
80 | * @return mixed A PHP value |
||
81 | */ |
||
82 | public function parse($value) |
||
332 | |||
333 | /** |
||
334 | * Returns the current line number (takes the offset into account) |
||
335 | * |
||
336 | * @return int The current line number |
||
337 | */ |
||
338 | protected function getRealCurrentLineNb() |
||
342 | |||
343 | /** |
||
344 | * Returns the current line indentation |
||
345 | * |
||
346 | * @return int The current line indentation |
||
347 | */ |
||
348 | protected function getCurrentLineIndentation() |
||
352 | |||
353 | /** |
||
354 | * Returns the next embed block of YAML |
||
355 | * |
||
356 | * @param int $indentation The indent level at which the block is to be read, or null for default |
||
357 | * @param bool $inSequence True if the enclosing data structure is a sequence |
||
358 | * |
||
359 | * @throws ParseException When indentation problem are detected |
||
360 | * @return string A YAML string |
||
361 | */ |
||
362 | protected function getNextEmbedBlock($indentation = null, $inSequence = false) |
||
363 | { |
||
364 | $oldLineIndentation = $this->getCurrentLineIndentation(); |
||
365 | $blockScalarIndentations = []; |
||
366 | |||
367 | if ($this->isBlockScalarHeader()) { |
||
368 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
369 | } |
||
370 | |||
371 | if (!$this->moveToNextLine()) { |
||
372 | return; |
||
373 | } |
||
374 | |||
375 | if ($indentation === null) { |
||
376 | $newIndent = $this->getCurrentLineIndentation(); |
||
377 | |||
378 | $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem(); |
||
379 | |||
380 | if (!$this->isCurrentLineEmpty() && $newIndent === 0 && !$unindentedEmbedBlock) { |
||
381 | throw new ParseException("Indentation problem.", $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
382 | } |
||
383 | } else { |
||
384 | $newIndent = $indentation; |
||
385 | } |
||
386 | |||
387 | $data = []; |
||
388 | if ($this->getCurrentLineIndentation() >= $newIndent) { |
||
389 | $data[] = substr($this->currentLine, $newIndent); |
||
390 | } else { |
||
391 | $this->moveToPreviousLine(); |
||
392 | return; |
||
393 | } |
||
394 | |||
395 | if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && $data[0][0] === "-") { |
||
396 | // the previous line contained a dash but no item content, this line is a sequence item |
||
397 | // with the same indentation and therefore no nested list or mapping |
||
398 | $this->moveToPreviousLine(); |
||
399 | |||
400 | return; |
||
401 | } |
||
402 | |||
403 | $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); |
||
404 | |||
405 | if (empty($blockScalarIndentations) && $this->isBlockScalarHeader()) { |
||
406 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
407 | } |
||
408 | |||
409 | $previousLineIndentation = $this->getCurrentLineIndentation(); |
||
410 | |||
411 | while ($this->moveToNextLine()) { |
||
412 | $indent = $this->getCurrentLineIndentation(); |
||
413 | |||
414 | // terminate all block scalars that are more indented than the current line |
||
415 | if (!empty($blockScalarIndentations) && $indent < $previousLineIndentation && rtrim($this->currentLine) !== "") { |
||
416 | foreach ($blockScalarIndentations as $key => $blockScalarIndentation) { |
||
417 | if ($blockScalarIndentation >= $this->getCurrentLineIndentation()) { |
||
418 | unset($blockScalarIndentations[$key]); |
||
419 | } |
||
420 | } |
||
421 | } |
||
422 | |||
423 | if (empty($blockScalarIndentations) && !$this->isCurrentLineComment() && $this->isBlockScalarHeader()) { |
||
424 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
425 | } |
||
426 | |||
427 | $previousLineIndentation = $indent; |
||
428 | |||
429 | if ($isItUnindentedCollection && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) { |
||
430 | $this->moveToPreviousLine(); |
||
431 | break; |
||
432 | } |
||
433 | |||
434 | if ($this->isCurrentLineBlank()) { |
||
435 | $data[] = substr($this->currentLine, $newIndent); |
||
436 | continue; |
||
437 | } |
||
438 | |||
439 | // we ignore "comment" lines only when we are not inside a scalar block |
||
440 | if (empty($blockScalarIndentations) && $this->isCurrentLineComment()) { |
||
441 | continue; |
||
442 | } |
||
443 | |||
444 | if ($indent >= $newIndent) { |
||
445 | $data[] = substr($this->currentLine, $newIndent); |
||
446 | } elseif ($indent === 0) { |
||
447 | $this->moveToPreviousLine(); |
||
448 | |||
449 | break; |
||
450 | } else { |
||
451 | throw new ParseException("Indentation problem.", $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
452 | } |
||
453 | } |
||
454 | |||
455 | return implode("\n", $data); |
||
456 | } |
||
457 | |||
458 | /** |
||
459 | * Moves the parser to the next line |
||
460 | * |
||
461 | * @return bool |
||
462 | */ |
||
463 | protected function moveToNextLine() |
||
464 | { |
||
465 | if ($this->currentLineNb >= count($this->lines) - 1) { |
||
466 | return false; |
||
467 | } |
||
468 | |||
469 | $this->currentLine = $this->lines[++$this->currentLineNb]; |
||
470 | |||
471 | return true; |
||
472 | } |
||
473 | |||
474 | /** |
||
475 | * Moves the parser to the previous line |
||
476 | * |
||
477 | * @return void |
||
478 | */ |
||
479 | protected function moveToPreviousLine() |
||
483 | |||
484 | /** |
||
485 | * Parses a YAML value |
||
486 | * |
||
487 | * @param string $value A YAML value |
||
488 | * @param string $context The parser context (either sequence or mapping) |
||
489 | * |
||
490 | * @throws ParseException When reference does not exist |
||
491 | * @return mixed A PHP value |
||
492 | */ |
||
493 | protected function parseValue($value, $context) |
||
494 | { |
||
495 | if (strpos($value, "*") === 0) { |
||
496 | View Code Duplication | if (($pos = strpos($value, "#")) !== false) { |
|
497 | $value = substr($value, 1, $pos - 2); |
||
498 | } else { |
||
499 | $value = substr($value, 1); |
||
500 | } |
||
501 | |||
502 | if (!array_key_exists($value, $this->refs)) { |
||
503 | throw new ParseException(sprintf("Reference \"%s\" does not exist.", $value), $this->currentLine); |
||
504 | } |
||
505 | |||
506 | return $this->refs[$value]; |
||
507 | } |
||
508 | |||
509 | if (preg_match("/^" . self::BLOCK_SCALAR_HEADER_PATTERN . "$/", $value, $matches)) { |
||
510 | $modifiers = isset($matches["modifiers"]) ? $matches["modifiers"] : ""; |
||
511 | |||
512 | return $this->parseBlockScalar( |
||
513 | $matches["separator"], |
||
514 | preg_replace("#\\d+#", "", $modifiers), |
||
515 | (int)abs($modifiers) |
||
516 | ); |
||
517 | } |
||
518 | |||
519 | try { |
||
520 | $parsedValue = Inline::parse($value, $this->refs); |
||
521 | |||
522 | if ($context === "mapping" && $value[0] !== "\"" && $value[0] !== "'" && $value[0] !== "[" && $value[0] !== "{" && $value[0] !== "!" && strpos($parsedValue, ": ") !== false) { |
||
523 | throw new ParseException("A colon cannot be used in an unquoted mapping value."); |
||
524 | } |
||
525 | |||
526 | return $parsedValue; |
||
527 | } catch (ParseException $e) { |
||
528 | $e->setParsedLine($this->getRealCurrentLineNb() + 1); |
||
529 | $e->setSnippet($this->currentLine); |
||
530 | |||
531 | throw $e; |
||
532 | } |
||
533 | } |
||
534 | |||
535 | /** |
||
536 | * Parses a block scalar |
||
537 | * |
||
538 | * @param string $separator The style indicator that was used to begin this block scalar (| or >) |
||
539 | * @param string $indicator The chomping indicator that was used to begin this block scalar (+ or -) |
||
540 | * @param int $indentation The indentation indicator that was used to begin this block scalar |
||
541 | * |
||
542 | * @return string The text value |
||
543 | */ |
||
544 | protected function parseBlockScalar($style, $chomping = "", $indentation = 0) |
||
545 | { |
||
546 | $notEOF = $this->moveToNextLine(); |
||
547 | if (!$notEOF) { |
||
548 | return ""; |
||
549 | } |
||
550 | |||
551 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
552 | $blockLines = []; |
||
553 | |||
554 | // leading blank lines are consumed before determining indentation |
||
555 | while ($notEOF && $isCurrentLineBlank) { |
||
556 | // newline only if not EOF |
||
557 | if ($notEOF = $this->moveToNextLine()) { |
||
558 | $blockLines[] = ""; |
||
559 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
560 | } |
||
561 | } |
||
562 | |||
563 | // determine indentation if not specified |
||
564 | if ($indentation === 0) { |
||
565 | if (preg_match("/^ +/", $this->currentLine, $matches)) { |
||
566 | $indentation = strlen($matches[0]); |
||
567 | } |
||
568 | } |
||
569 | |||
570 | if ($indentation > 0) { |
||
571 | $pattern = sprintf("/^ {%d}(.*)$/", $indentation); |
||
572 | |||
573 | while ($notEOF && ($isCurrentLineBlank || preg_match($pattern, $this->currentLine, $matches))) { |
||
574 | if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { |
||
575 | $blockLines[] = substr($this->currentLine, $indentation); |
||
576 | } elseif ($isCurrentLineBlank) { |
||
577 | $blockLines[] = ""; |
||
578 | } else { |
||
579 | $blockLines[] = $matches[1]; |
||
580 | } |
||
581 | |||
582 | // newline only if not EOF |
||
583 | if ($notEOF = $this->moveToNextLine()) { |
||
584 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
585 | } |
||
586 | } |
||
587 | } elseif ($notEOF) { |
||
588 | $blockLines[] = ""; |
||
589 | } |
||
590 | |||
591 | if ($notEOF) { |
||
592 | $blockLines[] = ""; |
||
593 | $this->moveToPreviousLine(); |
||
594 | } |
||
595 | |||
596 | // folded style |
||
597 | if ($style === ">") { |
||
598 | $text = ""; |
||
599 | $previousLineIndented = false; |
||
600 | $previousLineBlank = false; |
||
601 | |||
602 | for ($i = 0; $i < count($blockLines); ++$i) { |
||
603 | if ($blockLines[$i] === "") { |
||
604 | $text .= "\n"; |
||
605 | $previousLineIndented = false; |
||
606 | $previousLineBlank = true; |
||
607 | } elseif ($blockLines[$i][0] === " ") { |
||
608 | $text .= "\n" . $blockLines[$i]; |
||
609 | $previousLineIndented = true; |
||
610 | $previousLineBlank = false; |
||
611 | View Code Duplication | } elseif ($previousLineIndented) { |
|
612 | $text .= "\n" . $blockLines[$i]; |
||
613 | $previousLineIndented = false; |
||
614 | $previousLineBlank = false; |
||
615 | } elseif ($previousLineBlank || 0 === $i) { |
||
616 | $text .= $blockLines[$i]; |
||
617 | $previousLineIndented = false; |
||
618 | $previousLineBlank = false; |
||
619 | View Code Duplication | } else { |
|
620 | $text .= " " . $blockLines[$i]; |
||
621 | $previousLineIndented = false; |
||
622 | $previousLineBlank = false; |
||
623 | } |
||
624 | } |
||
625 | } else { |
||
626 | $text = implode("\n", $blockLines); |
||
627 | } |
||
628 | |||
629 | // deal with trailing newlines |
||
630 | if ($chomping === "") { |
||
631 | $text = preg_replace("/\\n+$/", "\n", $text); |
||
632 | } elseif ($chomping === "-") { |
||
633 | $text = preg_replace("/\\n+$/", "", $text); |
||
634 | } |
||
635 | |||
636 | return $text; |
||
637 | } |
||
638 | |||
639 | /** |
||
640 | * Returns true if the next line is indented |
||
641 | * |
||
642 | * @return bool Returns true if the next line is indented, false otherwise |
||
643 | */ |
||
644 | protected function isNextLineIndented() |
||
645 | { |
||
646 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
647 | $EOF = !$this->moveToNextLine(); |
||
648 | |||
649 | while (!$EOF && $this->isCurrentLineEmpty()) { |
||
650 | $EOF = !$this->moveToNextLine(); |
||
651 | } |
||
652 | |||
653 | if ($EOF) { |
||
654 | return false; |
||
655 | } |
||
656 | |||
657 | $ret = false; |
||
658 | if ($this->getCurrentLineIndentation() > $currentIndentation) { |
||
659 | $ret = true; |
||
660 | } |
||
661 | |||
662 | $this->moveToPreviousLine(); |
||
663 | |||
664 | return $ret; |
||
665 | } |
||
666 | |||
667 | /** |
||
668 | * Returns true if the current line is blank or if it is a comment line |
||
669 | * |
||
670 | * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise |
||
671 | */ |
||
672 | protected function isCurrentLineEmpty() |
||
676 | |||
677 | /** |
||
678 | * Returns true if the current line is blank |
||
679 | * |
||
680 | * @return bool Returns true if the current line is blank, false otherwise |
||
681 | */ |
||
682 | protected function isCurrentLineBlank() |
||
686 | |||
687 | /** |
||
688 | * Returns true if the current line is a comment line |
||
689 | * |
||
690 | * @return bool Returns true if the current line is a comment line, false otherwise |
||
691 | */ |
||
692 | protected function isCurrentLineComment() |
||
693 | { |
||
694 | //checking explicitly the first char of the trim is faster than loops or strpos |
||
695 | $ltrimmedLine = ltrim($this->currentLine, " "); |
||
696 | |||
697 | return $ltrimmedLine !== "" && $ltrimmedLine[0] === "#"; |
||
698 | } |
||
699 | |||
700 | /** |
||
701 | * Cleanups a YAML string to be parsed |
||
702 | * |
||
703 | * @param string $value The input YAML string |
||
704 | * |
||
705 | * @return string A cleaned up YAML string |
||
706 | */ |
||
707 | protected function cleanup($value) |
||
708 | { |
||
709 | $value = str_replace(["\r\n", "\r"], "\n", $value); |
||
710 | |||
711 | // strip YAML header |
||
712 | $count = 0; |
||
713 | $value = preg_replace("#^\\%YAML[: ][\\d\\.]+.*\\n#u", "", $value, -1, $count); |
||
714 | $this->offset += $count; |
||
715 | |||
716 | // remove leading comments |
||
717 | $trimmedValue = preg_replace("#^(\\#.*?\\n)+#s", "", $value, -1, $count); |
||
718 | if ($count == 1) { |
||
719 | // items have been removed, update the offset |
||
720 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
721 | $value = $trimmedValue; |
||
722 | } |
||
723 | |||
724 | // remove start of the document marker (---) |
||
725 | $trimmedValue = preg_replace("#^\\-\\-\\-.*?\\n#s", "", $value, -1, $count); |
||
726 | if ($count == 1) { |
||
727 | // items have been removed, update the offset |
||
728 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
729 | $value = $trimmedValue; |
||
730 | |||
731 | // remove end of the document marker (...) |
||
732 | $value = preg_replace("#\\.\\.\\.\\s*$#", "", $value); |
||
733 | } |
||
734 | |||
735 | return $value; |
||
736 | } |
||
737 | |||
738 | /** |
||
739 | * Returns true if the next line starts unindented collection |
||
740 | * |
||
741 | * @return bool Returns true if the next line starts unindented collection, false otherwise |
||
742 | */ |
||
743 | protected function isNextLineUnIndentedCollection() |
||
744 | { |
||
745 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
746 | $notEOF = $this->moveToNextLine(); |
||
747 | |||
748 | while ($notEOF && $this->isCurrentLineEmpty()) { |
||
749 | $notEOF = $this->moveToNextLine(); |
||
750 | } |
||
751 | |||
752 | if ($notEOF === false) { |
||
753 | return false; |
||
754 | } |
||
755 | |||
756 | $ret = false; |
||
757 | if ($this->getCurrentLineIndentation() == $currentIndentation && |
||
758 | $this->isStringUnIndentedCollectionItem()) { |
||
759 | $ret = true; |
||
760 | } |
||
761 | |||
762 | $this->moveToPreviousLine(); |
||
763 | |||
764 | return $ret; |
||
765 | } |
||
766 | |||
767 | /** |
||
768 | * Returns true if the string is un-indented collection item |
||
769 | * |
||
770 | * @return bool Returns true if the string is un-indented collection item, false otherwise |
||
771 | */ |
||
772 | protected function isStringUnIndentedCollectionItem() |
||
776 | |||
777 | /** |
||
778 | * Tests whether or not the current line is the header of a block scalar. |
||
779 | * |
||
780 | * @return bool |
||
781 | */ |
||
782 | protected function isBlockScalarHeader() |
||
786 | } |
||
787 |
Adding a
@return
annotation to a constructor is not recommended, since a constructor does not have a meaningful return value.Please refer to the PHP core documentation on constructors.