Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 21 | class Parser |
||
| 22 | { |
||
| 23 | const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?'; |
||
| 24 | // BC - wrongly named |
||
| 25 | const FOLDED_SCALAR_PATTERN = self::BLOCK_SCALAR_HEADER_PATTERN; |
||
| 26 | |||
| 27 | private $offset = 0; |
||
| 28 | private $totalNumberOfLines; |
||
| 29 | private $lines = array(); |
||
| 30 | private $currentLineNb = -1; |
||
| 31 | private $currentLine = ''; |
||
| 32 | private $refs = array(); |
||
| 33 | private $skippedLineNumbers = array(); |
||
| 34 | private $locallySkippedLineNumbers = array(); |
||
| 35 | |||
| 36 | /** |
||
| 37 | * Constructor. |
||
| 38 | * |
||
| 39 | * @param int $offset The offset of YAML document (used for line numbers in error messages) |
||
| 40 | * @param int|null $totalNumberOfLines The overall number of lines being parsed |
||
| 41 | * @param int[] $skippedLineNumbers Number of comment lines that have been skipped by the parser |
||
| 42 | */ |
||
| 43 | public function __construct($offset = 0, $totalNumberOfLines = null, array $skippedLineNumbers = array()) |
||
| 49 | |||
| 50 | /** |
||
| 51 | * Parses a YAML string to a PHP value. |
||
| 52 | * |
||
| 53 | * @param string $value A YAML string |
||
| 54 | * @param bool $exceptionOnInvalidType true if an exception must be thrown on invalid types (a PHP resource or object), false otherwise |
||
| 55 | * @param bool $objectSupport true if object support is enabled, false otherwise |
||
| 56 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
| 57 | * |
||
| 58 | * @return mixed A PHP value |
||
| 59 | * |
||
| 60 | * @throws ParseException If the YAML is not valid |
||
| 61 | */ |
||
| 62 | public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) |
||
| 101 | |||
| 102 | private function doParse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) |
||
| 302 | |||
| 303 | private function parseBlock($offset, $yaml, $exceptionOnInvalidType, $objectSupport, $objectForMap) |
||
| 304 | { |
||
| 305 | $skippedLineNumbers = $this->skippedLineNumbers; |
||
| 306 | |||
| 307 | foreach ($this->locallySkippedLineNumbers as $lineNumber) { |
||
| 308 | if ($lineNumber < $offset) { |
||
| 309 | continue; |
||
| 310 | } |
||
| 311 | |||
| 312 | $skippedLineNumbers[] = $lineNumber; |
||
| 313 | } |
||
| 314 | |||
| 315 | $parser = new self($offset, $this->totalNumberOfLines, $skippedLineNumbers); |
||
| 316 | $parser->refs = &$this->refs; |
||
| 317 | |||
| 318 | return $parser->doParse($yaml, $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
| 319 | } |
||
| 320 | |||
| 321 | /** |
||
| 322 | * Returns the current line number (takes the offset into account). |
||
| 323 | * |
||
| 324 | * @return int The current line number |
||
| 325 | */ |
||
| 326 | private function getRealCurrentLineNb() |
||
| 340 | |||
| 341 | /** |
||
| 342 | * Returns the current line indentation. |
||
| 343 | * |
||
| 344 | * @return int The current line indentation |
||
| 345 | */ |
||
| 346 | private function getCurrentLineIndentation() |
||
| 350 | |||
| 351 | /** |
||
| 352 | * Returns the next embed block of YAML. |
||
| 353 | * |
||
| 354 | * @param int $indentation The indent level at which the block is to be read, or null for default |
||
| 355 | * @param bool $inSequence True if the enclosing data structure is a sequence |
||
| 356 | * |
||
| 357 | * @return string A YAML string |
||
| 358 | * |
||
| 359 | * @throws ParseException When indentation problem are detected |
||
| 360 | */ |
||
| 361 | private function getNextEmbedBlock($indentation = null, $inSequence = false) |
||
| 465 | |||
| 466 | /** |
||
| 467 | * Moves the parser to the next line. |
||
| 468 | * |
||
| 469 | * @return bool |
||
| 470 | */ |
||
| 471 | View Code Duplication | private function moveToNextLine() |
|
| 481 | |||
| 482 | /** |
||
| 483 | * Moves the parser to the previous line. |
||
| 484 | * |
||
| 485 | * @return bool |
||
| 486 | */ |
||
| 487 | View Code Duplication | private function moveToPreviousLine() |
|
| 497 | |||
| 498 | /** |
||
| 499 | * Parses a YAML value. |
||
| 500 | * |
||
| 501 | * @param string $value A YAML value |
||
| 502 | * @param bool $exceptionOnInvalidType True if an exception must be thrown on invalid types false otherwise |
||
| 503 | * @param bool $objectSupport True if object support is enabled, false otherwise |
||
| 504 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
| 505 | * @param string $context The parser context (either sequence or mapping) |
||
| 506 | * |
||
| 507 | * @return mixed A PHP value |
||
| 508 | * |
||
| 509 | * @throws ParseException When reference does not exist |
||
| 510 | */ |
||
| 511 | private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $objectForMap, $context) |
||
| 551 | |||
| 552 | /** |
||
| 553 | * Parses a block scalar. |
||
| 554 | * |
||
| 555 | * @param string $style The style indicator that was used to begin this block scalar (| or >) |
||
| 556 | * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -) |
||
| 557 | * @param int $indentation The indentation indicator that was used to begin this block scalar |
||
| 558 | * |
||
| 559 | * @return string The text value |
||
| 560 | */ |
||
| 561 | private function parseBlockScalar($style, $chomping = '', $indentation = 0) |
||
| 562 | { |
||
| 563 | $notEOF = $this->moveToNextLine(); |
||
| 564 | if (!$notEOF) { |
||
| 565 | return ''; |
||
| 566 | } |
||
| 567 | |||
| 568 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 569 | $blockLines = array(); |
||
| 570 | |||
| 571 | // leading blank lines are consumed before determining indentation |
||
| 572 | while ($notEOF && $isCurrentLineBlank) { |
||
| 573 | // newline only if not EOF |
||
| 574 | if ($notEOF = $this->moveToNextLine()) { |
||
| 575 | $blockLines[] = ''; |
||
| 576 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 577 | } |
||
| 578 | } |
||
| 579 | |||
| 580 | // determine indentation if not specified |
||
| 581 | if (0 === $indentation) { |
||
| 582 | if (self::preg_match('/^ +/', $this->currentLine, $matches)) { |
||
| 583 | $indentation = strlen($matches[0]); |
||
| 584 | } |
||
| 585 | } |
||
| 586 | |||
| 587 | if ($indentation > 0) { |
||
| 588 | $pattern = sprintf('/^ {%d}(.*)$/', $indentation); |
||
| 589 | |||
| 590 | while ( |
||
| 591 | $notEOF && ( |
||
| 592 | $isCurrentLineBlank || |
||
| 593 | self::preg_match($pattern, $this->currentLine, $matches) |
||
| 594 | ) |
||
| 595 | ) { |
||
| 596 | if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { |
||
| 597 | $blockLines[] = substr($this->currentLine, $indentation); |
||
| 598 | } elseif ($isCurrentLineBlank) { |
||
| 599 | $blockLines[] = ''; |
||
| 600 | } else { |
||
| 601 | $blockLines[] = $matches[1]; |
||
| 602 | } |
||
| 603 | |||
| 604 | // newline only if not EOF |
||
| 605 | if ($notEOF = $this->moveToNextLine()) { |
||
| 606 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 607 | } |
||
| 608 | } |
||
| 609 | } elseif ($notEOF) { |
||
| 610 | $blockLines[] = ''; |
||
| 611 | } |
||
| 612 | |||
| 613 | if ($notEOF) { |
||
| 614 | $blockLines[] = ''; |
||
| 615 | $this->moveToPreviousLine(); |
||
| 616 | } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) { |
||
| 617 | $blockLines[] = ''; |
||
| 618 | } |
||
| 619 | |||
| 620 | // folded style |
||
| 621 | if ('>' === $style) { |
||
| 622 | $text = ''; |
||
| 623 | $previousLineIndented = false; |
||
| 624 | $previousLineBlank = false; |
||
| 625 | |||
| 626 | for ($i = 0, $blockLinesCount = count($blockLines); $i < $blockLinesCount; ++$i) { |
||
| 627 | if ('' === $blockLines[$i]) { |
||
| 628 | $text .= "\n"; |
||
| 629 | $previousLineIndented = false; |
||
| 630 | $previousLineBlank = true; |
||
| 631 | } elseif (' ' === $blockLines[$i][0]) { |
||
| 632 | $text .= "\n".$blockLines[$i]; |
||
| 633 | $previousLineIndented = true; |
||
| 634 | $previousLineBlank = false; |
||
| 635 | View Code Duplication | } elseif ($previousLineIndented) { |
|
| 636 | $text .= "\n".$blockLines[$i]; |
||
| 637 | $previousLineIndented = false; |
||
| 638 | $previousLineBlank = false; |
||
| 639 | } elseif ($previousLineBlank || 0 === $i) { |
||
| 640 | $text .= $blockLines[$i]; |
||
| 641 | $previousLineIndented = false; |
||
| 642 | $previousLineBlank = false; |
||
| 643 | View Code Duplication | } else { |
|
| 644 | $text .= ' '.$blockLines[$i]; |
||
| 645 | $previousLineIndented = false; |
||
| 646 | $previousLineBlank = false; |
||
| 647 | } |
||
| 648 | } |
||
| 649 | } else { |
||
| 650 | $text = implode("\n", $blockLines); |
||
| 651 | } |
||
| 652 | |||
| 653 | // deal with trailing newlines |
||
| 654 | if ('' === $chomping) { |
||
| 655 | $text = preg_replace('/\n+$/', "\n", $text); |
||
| 656 | } elseif ('-' === $chomping) { |
||
| 657 | $text = preg_replace('/\n+$/', '', $text); |
||
| 658 | } |
||
| 659 | |||
| 660 | return $text; |
||
| 661 | } |
||
| 662 | |||
| 663 | /** |
||
| 664 | * Returns true if the next line is indented. |
||
| 665 | * |
||
| 666 | * @return bool Returns true if the next line is indented, false otherwise |
||
| 667 | */ |
||
| 668 | private function isNextLineIndented() |
||
| 687 | |||
| 688 | /** |
||
| 689 | * Returns true if the current line is blank or if it is a comment line. |
||
| 690 | * |
||
| 691 | * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise |
||
| 692 | */ |
||
| 693 | private function isCurrentLineEmpty() |
||
| 697 | |||
| 698 | /** |
||
| 699 | * Returns true if the current line is blank. |
||
| 700 | * |
||
| 701 | * @return bool Returns true if the current line is blank, false otherwise |
||
| 702 | */ |
||
| 703 | private function isCurrentLineBlank() |
||
| 707 | |||
| 708 | /** |
||
| 709 | * Returns true if the current line is a comment line. |
||
| 710 | * |
||
| 711 | * @return bool Returns true if the current line is a comment line, false otherwise |
||
| 712 | */ |
||
| 713 | private function isCurrentLineComment() |
||
| 720 | |||
| 721 | private function isCurrentLineLastLineInDocument() |
||
| 725 | |||
| 726 | /** |
||
| 727 | * Cleanups a YAML string to be parsed. |
||
| 728 | * |
||
| 729 | * @param string $value The input YAML string |
||
| 730 | * |
||
| 731 | * @return string A cleaned up YAML string |
||
| 732 | */ |
||
| 733 | private function cleanup($value) |
||
| 763 | |||
| 764 | /** |
||
| 765 | * Returns true if the next line starts unindented collection. |
||
| 766 | * |
||
| 767 | * @return bool Returns true if the next line starts unindented collection, false otherwise |
||
| 768 | */ |
||
| 769 | private function isNextLineUnIndentedCollection() |
||
| 788 | |||
| 789 | /** |
||
| 790 | * Returns true if the string is un-indented collection item. |
||
| 791 | * |
||
| 792 | * @return bool Returns true if the string is un-indented collection item, false otherwise |
||
| 793 | */ |
||
| 794 | private function isStringUnIndentedCollectionItem() |
||
| 798 | |||
| 799 | /** |
||
| 800 | * Tests whether or not the current line is the header of a block scalar. |
||
| 801 | * |
||
| 802 | * @return bool |
||
| 803 | */ |
||
| 804 | private function isBlockScalarHeader() |
||
| 808 | |||
| 809 | /** |
||
| 810 | * A local wrapper for `preg_match` which will throw a ParseException if there |
||
| 811 | * is an internal error in the PCRE engine. |
||
| 812 | * |
||
| 813 | * This avoids us needing to check for "false" every time PCRE is used |
||
| 814 | * in the YAML engine |
||
| 815 | * |
||
| 816 | * @throws ParseException on a PCRE internal error |
||
| 817 | * |
||
| 818 | * @see preg_last_error() |
||
| 819 | * |
||
| 820 | * @internal |
||
| 821 | */ |
||
| 822 | public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0) |
||
| 850 | } |
||
| 851 |