Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 43 | class Parser |
||
| 44 | { |
||
| 45 | /** @type string BLOCK_SCALAR_HEADER_PATTERN */ |
||
| 46 | const BLOCK_SCALAR_HEADER_PATTERN = |
||
| 47 | "(?P<separator>\\||>)(?P<modifiers>\\+|\\-|\\d+|\\+\\d+|\\-\\d+|\\d+\\+|\\d+\\-)?(?P<comments> +#.*)?"; |
||
| 48 | |||
| 49 | |||
| 50 | /** @type int $offset */ |
||
| 51 | protected $offset = 0; |
||
| 52 | /** @type array $lines */ |
||
| 53 | protected $lines = []; |
||
| 54 | /** @type int $currentLineNb */ |
||
| 55 | protected $currentLineNb = -1; |
||
| 56 | /** @type string $currentLine */ |
||
| 57 | protected $currentLine = ""; |
||
| 58 | /** @type array $refs */ |
||
| 59 | protected $refs = []; |
||
| 60 | |||
| 61 | |||
| 62 | /** |
||
| 63 | * Constructor |
||
| 64 | * |
||
| 65 | * @param int $offset The offset of YAML document (used for line numbers in error messages) |
||
| 66 | * |
||
| 67 | * @return Parser |
||
|
|
|||
| 68 | */ |
||
| 69 | public function __construct($offset = 0) |
||
| 73 | |||
| 74 | /** |
||
| 75 | * Parses a YAML string to a PHP value |
||
| 76 | * |
||
| 77 | * @param string $value A YAML string |
||
| 78 | * |
||
| 79 | * @throws ParseException If the YAML is not valid |
||
| 80 | * @return mixed A PHP value |
||
| 81 | */ |
||
| 82 | public function parse($value) |
||
| 332 | |||
| 333 | /** |
||
| 334 | * Returns the current line number (takes the offset into account) |
||
| 335 | * |
||
| 336 | * @return int The current line number |
||
| 337 | */ |
||
| 338 | protected function getRealCurrentLineNb() |
||
| 342 | |||
| 343 | /** |
||
| 344 | * Returns the current line indentation |
||
| 345 | * |
||
| 346 | * @return int The current line indentation |
||
| 347 | */ |
||
| 348 | protected function getCurrentLineIndentation() |
||
| 352 | |||
| 353 | /** |
||
| 354 | * Returns the next embed block of YAML |
||
| 355 | * |
||
| 356 | * @param int $indentation The indent level at which the block is to be read, or null for default |
||
| 357 | * @param bool $inSequence True if the enclosing data structure is a sequence |
||
| 358 | * |
||
| 359 | * @throws ParseException When indentation problem are detected |
||
| 360 | * @return string A YAML string |
||
| 361 | */ |
||
| 362 | protected function getNextEmbedBlock($indentation = null, $inSequence = false) |
||
| 363 | { |
||
| 364 | $oldLineIndentation = $this->getCurrentLineIndentation(); |
||
| 365 | $blockScalarIndentations = []; |
||
| 366 | |||
| 367 | if ($this->isBlockScalarHeader()) { |
||
| 368 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
| 369 | } |
||
| 370 | |||
| 371 | if (!$this->moveToNextLine()) { |
||
| 372 | return; |
||
| 373 | } |
||
| 374 | |||
| 375 | if ($indentation === null) { |
||
| 376 | $newIndent = $this->getCurrentLineIndentation(); |
||
| 377 | |||
| 378 | $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem(); |
||
| 379 | |||
| 380 | if (!$this->isCurrentLineEmpty() && $newIndent === 0 && !$unindentedEmbedBlock) { |
||
| 381 | throw new ParseException("Indentation problem.", $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
| 382 | } |
||
| 383 | } else { |
||
| 384 | $newIndent = $indentation; |
||
| 385 | } |
||
| 386 | |||
| 387 | $data = []; |
||
| 388 | if ($this->getCurrentLineIndentation() >= $newIndent) { |
||
| 389 | $data[] = substr($this->currentLine, $newIndent); |
||
| 390 | } else { |
||
| 391 | $this->moveToPreviousLine(); |
||
| 392 | return; |
||
| 393 | } |
||
| 394 | |||
| 395 | if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && $data[0][0] === "-") { |
||
| 396 | // the previous line contained a dash but no item content, this line is a sequence item |
||
| 397 | // with the same indentation and therefore no nested list or mapping |
||
| 398 | $this->moveToPreviousLine(); |
||
| 399 | |||
| 400 | return; |
||
| 401 | } |
||
| 402 | |||
| 403 | $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem(); |
||
| 404 | |||
| 405 | if (empty($blockScalarIndentations) && $this->isBlockScalarHeader()) { |
||
| 406 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
| 407 | } |
||
| 408 | |||
| 409 | $previousLineIndentation = $this->getCurrentLineIndentation(); |
||
| 410 | |||
| 411 | while ($this->moveToNextLine()) { |
||
| 412 | $indent = $this->getCurrentLineIndentation(); |
||
| 413 | |||
| 414 | // terminate all block scalars that are more indented than the current line |
||
| 415 | if (!empty($blockScalarIndentations) && $indent < $previousLineIndentation && rtrim($this->currentLine) !== "") { |
||
| 416 | foreach ($blockScalarIndentations as $key => $blockScalarIndentation) { |
||
| 417 | if ($blockScalarIndentation >= $this->getCurrentLineIndentation()) { |
||
| 418 | unset($blockScalarIndentations[$key]); |
||
| 419 | } |
||
| 420 | } |
||
| 421 | } |
||
| 422 | |||
| 423 | if (empty($blockScalarIndentations) && !$this->isCurrentLineComment() && $this->isBlockScalarHeader()) { |
||
| 424 | $blockScalarIndentations[] = $this->getCurrentLineIndentation(); |
||
| 425 | } |
||
| 426 | |||
| 427 | $previousLineIndentation = $indent; |
||
| 428 | |||
| 429 | if ($isItUnindentedCollection && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) { |
||
| 430 | $this->moveToPreviousLine(); |
||
| 431 | break; |
||
| 432 | } |
||
| 433 | |||
| 434 | if ($this->isCurrentLineBlank()) { |
||
| 435 | $data[] = substr($this->currentLine, $newIndent); |
||
| 436 | continue; |
||
| 437 | } |
||
| 438 | |||
| 439 | // we ignore "comment" lines only when we are not inside a scalar block |
||
| 440 | if (empty($blockScalarIndentations) && $this->isCurrentLineComment()) { |
||
| 441 | continue; |
||
| 442 | } |
||
| 443 | |||
| 444 | if ($indent >= $newIndent) { |
||
| 445 | $data[] = substr($this->currentLine, $newIndent); |
||
| 446 | } elseif ($indent === 0) { |
||
| 447 | $this->moveToPreviousLine(); |
||
| 448 | |||
| 449 | break; |
||
| 450 | } else { |
||
| 451 | throw new ParseException("Indentation problem.", $this->getRealCurrentLineNb() + 1, $this->currentLine); |
||
| 452 | } |
||
| 453 | } |
||
| 454 | |||
| 455 | return implode("\n", $data); |
||
| 456 | } |
||
| 457 | |||
| 458 | /** |
||
| 459 | * Moves the parser to the next line |
||
| 460 | * |
||
| 461 | * @return bool |
||
| 462 | */ |
||
| 463 | protected function moveToNextLine() |
||
| 464 | { |
||
| 465 | if ($this->currentLineNb >= count($this->lines) - 1) { |
||
| 466 | return false; |
||
| 467 | } |
||
| 468 | |||
| 469 | $this->currentLine = $this->lines[++$this->currentLineNb]; |
||
| 470 | |||
| 471 | return true; |
||
| 472 | } |
||
| 473 | |||
| 474 | /** |
||
| 475 | * Moves the parser to the previous line |
||
| 476 | * |
||
| 477 | * @return void |
||
| 478 | */ |
||
| 479 | protected function moveToPreviousLine() |
||
| 483 | |||
| 484 | /** |
||
| 485 | * Parses a YAML value |
||
| 486 | * |
||
| 487 | * @param string $value A YAML value |
||
| 488 | * @param string $context The parser context (either sequence or mapping) |
||
| 489 | * |
||
| 490 | * @throws ParseException When reference does not exist |
||
| 491 | * @return mixed A PHP value |
||
| 492 | */ |
||
| 493 | protected function parseValue($value, $context) |
||
| 494 | { |
||
| 495 | if (strpos($value, "*") === 0) { |
||
| 496 | View Code Duplication | if (($pos = strpos($value, "#")) !== false) { |
|
| 497 | $value = substr($value, 1, $pos - 2); |
||
| 498 | } else { |
||
| 499 | $value = substr($value, 1); |
||
| 500 | } |
||
| 501 | |||
| 502 | if (!array_key_exists($value, $this->refs)) { |
||
| 503 | throw new ParseException(sprintf("Reference \"%s\" does not exist.", $value), $this->currentLine); |
||
| 504 | } |
||
| 505 | |||
| 506 | return $this->refs[$value]; |
||
| 507 | } |
||
| 508 | |||
| 509 | if (preg_match("/^" . self::BLOCK_SCALAR_HEADER_PATTERN . "$/", $value, $matches)) { |
||
| 510 | $modifiers = isset($matches["modifiers"]) ? $matches["modifiers"] : ""; |
||
| 511 | |||
| 512 | return $this->parseBlockScalar( |
||
| 513 | $matches["separator"], |
||
| 514 | preg_replace("#\\d+#", "", $modifiers), |
||
| 515 | (int)abs($modifiers) |
||
| 516 | ); |
||
| 517 | } |
||
| 518 | |||
| 519 | try { |
||
| 520 | $parsedValue = Inline::parse($value, $this->refs); |
||
| 521 | |||
| 522 | if ($context === "mapping" && $value[0] !== "\"" && $value[0] !== "'" && $value[0] !== "[" && $value[0] !== "{" && $value[0] !== "!" && strpos($parsedValue, ": ") !== false) { |
||
| 523 | throw new ParseException("A colon cannot be used in an unquoted mapping value."); |
||
| 524 | } |
||
| 525 | |||
| 526 | return $parsedValue; |
||
| 527 | } catch (ParseException $e) { |
||
| 528 | $e->setParsedLine($this->getRealCurrentLineNb() + 1); |
||
| 529 | $e->setSnippet($this->currentLine); |
||
| 530 | |||
| 531 | throw $e; |
||
| 532 | } |
||
| 533 | } |
||
| 534 | |||
| 535 | /** |
||
| 536 | * Parses a block scalar |
||
| 537 | * |
||
| 538 | * @param string $separator The style indicator that was used to begin this block scalar (| or >) |
||
| 539 | * @param string $indicator The chomping indicator that was used to begin this block scalar (+ or -) |
||
| 540 | * @param int $indentation The indentation indicator that was used to begin this block scalar |
||
| 541 | * |
||
| 542 | * @return string The text value |
||
| 543 | */ |
||
| 544 | protected function parseBlockScalar($style, $chomping = "", $indentation = 0) |
||
| 545 | { |
||
| 546 | $notEOF = $this->moveToNextLine(); |
||
| 547 | if (!$notEOF) { |
||
| 548 | return ""; |
||
| 549 | } |
||
| 550 | |||
| 551 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 552 | $blockLines = []; |
||
| 553 | |||
| 554 | // leading blank lines are consumed before determining indentation |
||
| 555 | while ($notEOF && $isCurrentLineBlank) { |
||
| 556 | // newline only if not EOF |
||
| 557 | if ($notEOF = $this->moveToNextLine()) { |
||
| 558 | $blockLines[] = ""; |
||
| 559 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 560 | } |
||
| 561 | } |
||
| 562 | |||
| 563 | // determine indentation if not specified |
||
| 564 | if ($indentation === 0) { |
||
| 565 | if (preg_match("/^ +/", $this->currentLine, $matches)) { |
||
| 566 | $indentation = strlen($matches[0]); |
||
| 567 | } |
||
| 568 | } |
||
| 569 | |||
| 570 | if ($indentation > 0) { |
||
| 571 | $pattern = sprintf("/^ {%d}(.*)$/", $indentation); |
||
| 572 | |||
| 573 | while ($notEOF && ($isCurrentLineBlank || preg_match($pattern, $this->currentLine, $matches))) { |
||
| 574 | if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { |
||
| 575 | $blockLines[] = substr($this->currentLine, $indentation); |
||
| 576 | } elseif ($isCurrentLineBlank) { |
||
| 577 | $blockLines[] = ""; |
||
| 578 | } else { |
||
| 579 | $blockLines[] = $matches[1]; |
||
| 580 | } |
||
| 581 | |||
| 582 | // newline only if not EOF |
||
| 583 | if ($notEOF = $this->moveToNextLine()) { |
||
| 584 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
| 585 | } |
||
| 586 | } |
||
| 587 | } elseif ($notEOF) { |
||
| 588 | $blockLines[] = ""; |
||
| 589 | } |
||
| 590 | |||
| 591 | if ($notEOF) { |
||
| 592 | $blockLines[] = ""; |
||
| 593 | $this->moveToPreviousLine(); |
||
| 594 | } |
||
| 595 | |||
| 596 | // folded style |
||
| 597 | if ($style === ">") { |
||
| 598 | $text = ""; |
||
| 599 | $previousLineIndented = false; |
||
| 600 | $previousLineBlank = false; |
||
| 601 | |||
| 602 | for ($i = 0; $i < count($blockLines); ++$i) { |
||
| 603 | if ($blockLines[$i] === "") { |
||
| 604 | $text .= "\n"; |
||
| 605 | $previousLineIndented = false; |
||
| 606 | $previousLineBlank = true; |
||
| 607 | } elseif ($blockLines[$i][0] === " ") { |
||
| 608 | $text .= "\n" . $blockLines[$i]; |
||
| 609 | $previousLineIndented = true; |
||
| 610 | $previousLineBlank = false; |
||
| 611 | View Code Duplication | } elseif ($previousLineIndented) { |
|
| 612 | $text .= "\n" . $blockLines[$i]; |
||
| 613 | $previousLineIndented = false; |
||
| 614 | $previousLineBlank = false; |
||
| 615 | } elseif ($previousLineBlank || 0 === $i) { |
||
| 616 | $text .= $blockLines[$i]; |
||
| 617 | $previousLineIndented = false; |
||
| 618 | $previousLineBlank = false; |
||
| 619 | View Code Duplication | } else { |
|
| 620 | $text .= " " . $blockLines[$i]; |
||
| 621 | $previousLineIndented = false; |
||
| 622 | $previousLineBlank = false; |
||
| 623 | } |
||
| 624 | } |
||
| 625 | } else { |
||
| 626 | $text = implode("\n", $blockLines); |
||
| 627 | } |
||
| 628 | |||
| 629 | // deal with trailing newlines |
||
| 630 | if ($chomping === "") { |
||
| 631 | $text = preg_replace("/\\n+$/", "\n", $text); |
||
| 632 | } elseif ($chomping === "-") { |
||
| 633 | $text = preg_replace("/\\n+$/", "", $text); |
||
| 634 | } |
||
| 635 | |||
| 636 | return $text; |
||
| 637 | } |
||
| 638 | |||
| 639 | /** |
||
| 640 | * Returns true if the next line is indented |
||
| 641 | * |
||
| 642 | * @return bool Returns true if the next line is indented, false otherwise |
||
| 643 | */ |
||
| 644 | protected function isNextLineIndented() |
||
| 645 | { |
||
| 646 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
| 647 | $EOF = !$this->moveToNextLine(); |
||
| 648 | |||
| 649 | while (!$EOF && $this->isCurrentLineEmpty()) { |
||
| 650 | $EOF = !$this->moveToNextLine(); |
||
| 651 | } |
||
| 652 | |||
| 653 | if ($EOF) { |
||
| 654 | return false; |
||
| 655 | } |
||
| 656 | |||
| 657 | $ret = false; |
||
| 658 | if ($this->getCurrentLineIndentation() > $currentIndentation) { |
||
| 659 | $ret = true; |
||
| 660 | } |
||
| 661 | |||
| 662 | $this->moveToPreviousLine(); |
||
| 663 | |||
| 664 | return $ret; |
||
| 665 | } |
||
| 666 | |||
| 667 | /** |
||
| 668 | * Returns true if the current line is blank or if it is a comment line |
||
| 669 | * |
||
| 670 | * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise |
||
| 671 | */ |
||
| 672 | protected function isCurrentLineEmpty() |
||
| 676 | |||
| 677 | /** |
||
| 678 | * Returns true if the current line is blank |
||
| 679 | * |
||
| 680 | * @return bool Returns true if the current line is blank, false otherwise |
||
| 681 | */ |
||
| 682 | protected function isCurrentLineBlank() |
||
| 686 | |||
| 687 | /** |
||
| 688 | * Returns true if the current line is a comment line |
||
| 689 | * |
||
| 690 | * @return bool Returns true if the current line is a comment line, false otherwise |
||
| 691 | */ |
||
| 692 | protected function isCurrentLineComment() |
||
| 693 | { |
||
| 694 | //checking explicitly the first char of the trim is faster than loops or strpos |
||
| 695 | $ltrimmedLine = ltrim($this->currentLine, " "); |
||
| 696 | |||
| 697 | return $ltrimmedLine !== "" && $ltrimmedLine[0] === "#"; |
||
| 698 | } |
||
| 699 | |||
| 700 | /** |
||
| 701 | * Cleanups a YAML string to be parsed |
||
| 702 | * |
||
| 703 | * @param string $value The input YAML string |
||
| 704 | * |
||
| 705 | * @return string A cleaned up YAML string |
||
| 706 | */ |
||
| 707 | protected function cleanup($value) |
||
| 708 | { |
||
| 709 | $value = str_replace(["\r\n", "\r"], "\n", $value); |
||
| 710 | |||
| 711 | // strip YAML header |
||
| 712 | $count = 0; |
||
| 713 | $value = preg_replace("#^\\%YAML[: ][\\d\\.]+.*\\n#u", "", $value, -1, $count); |
||
| 714 | $this->offset += $count; |
||
| 715 | |||
| 716 | // remove leading comments |
||
| 717 | $trimmedValue = preg_replace("#^(\\#.*?\\n)+#s", "", $value, -1, $count); |
||
| 718 | if ($count == 1) { |
||
| 719 | // items have been removed, update the offset |
||
| 720 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
| 721 | $value = $trimmedValue; |
||
| 722 | } |
||
| 723 | |||
| 724 | // remove start of the document marker (---) |
||
| 725 | $trimmedValue = preg_replace("#^\\-\\-\\-.*?\\n#s", "", $value, -1, $count); |
||
| 726 | if ($count == 1) { |
||
| 727 | // items have been removed, update the offset |
||
| 728 | $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n"); |
||
| 729 | $value = $trimmedValue; |
||
| 730 | |||
| 731 | // remove end of the document marker (...) |
||
| 732 | $value = preg_replace("#\\.\\.\\.\\s*$#", "", $value); |
||
| 733 | } |
||
| 734 | |||
| 735 | return $value; |
||
| 736 | } |
||
| 737 | |||
| 738 | /** |
||
| 739 | * Returns true if the next line starts unindented collection |
||
| 740 | * |
||
| 741 | * @return bool Returns true if the next line starts unindented collection, false otherwise |
||
| 742 | */ |
||
| 743 | protected function isNextLineUnIndentedCollection() |
||
| 744 | { |
||
| 745 | $currentIndentation = $this->getCurrentLineIndentation(); |
||
| 746 | $notEOF = $this->moveToNextLine(); |
||
| 747 | |||
| 748 | while ($notEOF && $this->isCurrentLineEmpty()) { |
||
| 749 | $notEOF = $this->moveToNextLine(); |
||
| 750 | } |
||
| 751 | |||
| 752 | if ($notEOF === false) { |
||
| 753 | return false; |
||
| 754 | } |
||
| 755 | |||
| 756 | $ret = false; |
||
| 757 | if ($this->getCurrentLineIndentation() == $currentIndentation && |
||
| 758 | $this->isStringUnIndentedCollectionItem()) { |
||
| 759 | $ret = true; |
||
| 760 | } |
||
| 761 | |||
| 762 | $this->moveToPreviousLine(); |
||
| 763 | |||
| 764 | return $ret; |
||
| 765 | } |
||
| 766 | |||
| 767 | /** |
||
| 768 | * Returns true if the string is un-indented collection item |
||
| 769 | * |
||
| 770 | * @return bool Returns true if the string is un-indented collection item, false otherwise |
||
| 771 | */ |
||
| 772 | protected function isStringUnIndentedCollectionItem() |
||
| 776 | |||
| 777 | /** |
||
| 778 | * Tests whether or not the current line is the header of a block scalar. |
||
| 779 | * |
||
| 780 | * @return bool |
||
| 781 | */ |
||
| 782 | protected function isBlockScalarHeader() |
||
| 786 | } |
||
| 787 |
Adding a
@returnannotation to a constructor is not recommended, since a constructor does not have a meaningful return value.Please refer to the PHP core documentation on constructors.