Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Parser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class Parser |
||
22 | { |
||
23 | const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?'; |
||
24 | // BC - wrongly named |
||
25 | const FOLDED_SCALAR_PATTERN = self::BLOCK_SCALAR_HEADER_PATTERN; |
||
26 | |||
27 | private $offset = 0; |
||
28 | private $totalNumberOfLines; |
||
29 | private $lines = array(); |
||
30 | private $currentLineNb = -1; |
||
31 | private $currentLine = ''; |
||
32 | private $refs = array(); |
||
33 | private $skippedLineNumbers = array(); |
||
34 | private $locallySkippedLineNumbers = array(); |
||
35 | |||
36 | /** |
||
37 | * Constructor. |
||
38 | * |
||
39 | * @param int $offset The offset of YAML document (used for line numbers in error messages) |
||
40 | * @param int|null $totalNumberOfLines The overall number of lines being parsed |
||
41 | * @param int[] $skippedLineNumbers Number of comment lines that have been skipped by the parser |
||
42 | */ |
||
43 | public function __construct($offset = 0, $totalNumberOfLines = null, array $skippedLineNumbers = array()) |
||
49 | |||
50 | /** |
||
51 | * Parses a YAML string to a PHP value. |
||
52 | * |
||
53 | * @param string $value A YAML string |
||
54 | * @param bool $exceptionOnInvalidType true if an exception must be thrown on invalid types (a PHP resource or object), false otherwise |
||
55 | * @param bool $objectSupport true if object support is enabled, false otherwise |
||
56 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
57 | * |
||
58 | * @return mixed A PHP value |
||
59 | * |
||
60 | * @throws ParseException If the YAML is not valid |
||
61 | */ |
||
62 | public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) |
||
276 | |||
277 | private function parseBlock($offset, $yaml, $exceptionOnInvalidType, $objectSupport, $objectForMap) |
||
278 | { |
||
279 | $skippedLineNumbers = $this->skippedLineNumbers; |
||
280 | |||
281 | foreach ($this->locallySkippedLineNumbers as $lineNumber) { |
||
282 | if ($lineNumber < $offset) { |
||
283 | continue; |
||
284 | } |
||
285 | |||
286 | $skippedLineNumbers[] = $lineNumber; |
||
287 | } |
||
288 | |||
289 | $parser = new self($offset, $this->totalNumberOfLines, $skippedLineNumbers); |
||
290 | $parser->refs = &$this->refs; |
||
291 | |||
292 | return $parser->parse($yaml, $exceptionOnInvalidType, $objectSupport, $objectForMap); |
||
293 | } |
||
294 | |||
295 | /** |
||
296 | * Returns the current line number (takes the offset into account). |
||
297 | * |
||
298 | * @return int The current line number |
||
299 | */ |
||
300 | private function getRealCurrentLineNb() |
||
314 | |||
315 | /** |
||
316 | * Returns the current line indentation. |
||
317 | * |
||
318 | * @return int The current line indentation |
||
319 | */ |
||
320 | private function getCurrentLineIndentation() |
||
324 | |||
325 | /** |
||
326 | * Returns the next embed block of YAML. |
||
327 | * |
||
328 | * @param int $indentation The indent level at which the block is to be read, or null for default |
||
329 | * @param bool $inSequence True if the enclosing data structure is a sequence |
||
330 | * |
||
331 | * @return string A YAML string |
||
332 | * |
||
333 | * @throws ParseException When indentation problem are detected |
||
334 | */ |
||
335 | private function getNextEmbedBlock($indentation = null, $inSequence = false) |
||
439 | |||
440 | /** |
||
441 | * Moves the parser to the next line. |
||
442 | * |
||
443 | * @return bool |
||
444 | */ |
||
445 | View Code Duplication | private function moveToNextLine() |
|
455 | |||
456 | /** |
||
457 | * Moves the parser to the previous line. |
||
458 | * |
||
459 | * @return bool |
||
460 | */ |
||
461 | View Code Duplication | private function moveToPreviousLine() |
|
471 | |||
472 | /** |
||
473 | * Parses a YAML value. |
||
474 | * |
||
475 | * @param string $value A YAML value |
||
476 | * @param bool $exceptionOnInvalidType True if an exception must be thrown on invalid types false otherwise |
||
477 | * @param bool $objectSupport True if object support is enabled, false otherwise |
||
478 | * @param bool $objectForMap true if maps should return a stdClass instead of array() |
||
479 | * @param string $context The parser context (either sequence or mapping) |
||
480 | * |
||
481 | * @return mixed A PHP value |
||
482 | * |
||
483 | * @throws ParseException When reference does not exist |
||
484 | */ |
||
485 | private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $objectForMap, $context) |
||
525 | |||
526 | /** |
||
527 | * Parses a block scalar. |
||
528 | * |
||
529 | * @param string $style The style indicator that was used to begin this block scalar (| or >) |
||
530 | * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -) |
||
531 | * @param int $indentation The indentation indicator that was used to begin this block scalar |
||
532 | * |
||
533 | * @return string The text value |
||
534 | */ |
||
535 | private function parseBlockScalar($style, $chomping = '', $indentation = 0) |
||
536 | { |
||
537 | $notEOF = $this->moveToNextLine(); |
||
538 | if (!$notEOF) { |
||
539 | return ''; |
||
540 | } |
||
541 | |||
542 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
543 | $blockLines = array(); |
||
544 | |||
545 | // leading blank lines are consumed before determining indentation |
||
546 | while ($notEOF && $isCurrentLineBlank) { |
||
547 | // newline only if not EOF |
||
548 | if ($notEOF = $this->moveToNextLine()) { |
||
549 | $blockLines[] = ''; |
||
550 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
551 | } |
||
552 | } |
||
553 | |||
554 | // determine indentation if not specified |
||
555 | if (0 === $indentation) { |
||
556 | if (self::preg_match('/^ +/', $this->currentLine, $matches)) { |
||
557 | $indentation = strlen($matches[0]); |
||
558 | } |
||
559 | } |
||
560 | |||
561 | if ($indentation > 0) { |
||
562 | $pattern = sprintf('/^ {%d}(.*)$/', $indentation); |
||
563 | |||
564 | while ( |
||
565 | $notEOF && ( |
||
566 | $isCurrentLineBlank || |
||
567 | self::preg_match($pattern, $this->currentLine, $matches) |
||
568 | ) |
||
569 | ) { |
||
570 | if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { |
||
571 | $blockLines[] = substr($this->currentLine, $indentation); |
||
572 | } elseif ($isCurrentLineBlank) { |
||
573 | $blockLines[] = ''; |
||
574 | } else { |
||
575 | $blockLines[] = $matches[1]; |
||
576 | } |
||
577 | |||
578 | // newline only if not EOF |
||
579 | if ($notEOF = $this->moveToNextLine()) { |
||
580 | $isCurrentLineBlank = $this->isCurrentLineBlank(); |
||
581 | } |
||
582 | } |
||
583 | } elseif ($notEOF) { |
||
584 | $blockLines[] = ''; |
||
585 | } |
||
586 | |||
587 | if ($notEOF) { |
||
588 | $blockLines[] = ''; |
||
589 | $this->moveToPreviousLine(); |
||
590 | } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) { |
||
591 | $blockLines[] = ''; |
||
592 | } |
||
593 | |||
594 | // folded style |
||
595 | if ('>' === $style) { |
||
596 | $text = ''; |
||
597 | $previousLineIndented = false; |
||
598 | $previousLineBlank = false; |
||
599 | |||
600 | for ($i = 0, $blockLinesCount = count($blockLines); $i < $blockLinesCount; ++$i) { |
||
601 | if ('' === $blockLines[$i]) { |
||
602 | $text .= "\n"; |
||
603 | $previousLineIndented = false; |
||
604 | $previousLineBlank = true; |
||
605 | } elseif (' ' === $blockLines[$i][0]) { |
||
606 | $text .= "\n".$blockLines[$i]; |
||
607 | $previousLineIndented = true; |
||
608 | $previousLineBlank = false; |
||
609 | View Code Duplication | } elseif ($previousLineIndented) { |
|
610 | $text .= "\n".$blockLines[$i]; |
||
611 | $previousLineIndented = false; |
||
612 | $previousLineBlank = false; |
||
613 | } elseif ($previousLineBlank || 0 === $i) { |
||
614 | $text .= $blockLines[$i]; |
||
615 | $previousLineIndented = false; |
||
616 | $previousLineBlank = false; |
||
617 | View Code Duplication | } else { |
|
618 | $text .= ' '.$blockLines[$i]; |
||
619 | $previousLineIndented = false; |
||
620 | $previousLineBlank = false; |
||
621 | } |
||
622 | } |
||
623 | } else { |
||
624 | $text = implode("\n", $blockLines); |
||
625 | } |
||
626 | |||
627 | // deal with trailing newlines |
||
628 | if ('' === $chomping) { |
||
629 | $text = preg_replace('/\n+$/', "\n", $text); |
||
630 | } elseif ('-' === $chomping) { |
||
631 | $text = preg_replace('/\n+$/', '', $text); |
||
632 | } |
||
633 | |||
634 | return $text; |
||
635 | } |
||
636 | |||
637 | /** |
||
638 | * Returns true if the next line is indented. |
||
639 | * |
||
640 | * @return bool Returns true if the next line is indented, false otherwise |
||
641 | */ |
||
642 | private function isNextLineIndented() |
||
661 | |||
662 | /** |
||
663 | * Returns true if the current line is blank or if it is a comment line. |
||
664 | * |
||
665 | * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise |
||
666 | */ |
||
667 | private function isCurrentLineEmpty() |
||
671 | |||
672 | /** |
||
673 | * Returns true if the current line is blank. |
||
674 | * |
||
675 | * @return bool Returns true if the current line is blank, false otherwise |
||
676 | */ |
||
677 | private function isCurrentLineBlank() |
||
681 | |||
682 | /** |
||
683 | * Returns true if the current line is a comment line. |
||
684 | * |
||
685 | * @return bool Returns true if the current line is a comment line, false otherwise |
||
686 | */ |
||
687 | private function isCurrentLineComment() |
||
694 | |||
695 | private function isCurrentLineLastLineInDocument() |
||
699 | |||
700 | /** |
||
701 | * Cleanups a YAML string to be parsed. |
||
702 | * |
||
703 | * @param string $value The input YAML string |
||
704 | * |
||
705 | * @return string A cleaned up YAML string |
||
706 | */ |
||
707 | private function cleanup($value) |
||
737 | |||
738 | /** |
||
739 | * Returns true if the next line starts unindented collection. |
||
740 | * |
||
741 | * @return bool Returns true if the next line starts unindented collection, false otherwise |
||
742 | */ |
||
743 | private function isNextLineUnIndentedCollection() |
||
762 | |||
763 | /** |
||
764 | * Returns true if the string is un-indented collection item. |
||
765 | * |
||
766 | * @return bool Returns true if the string is un-indented collection item, false otherwise |
||
767 | */ |
||
768 | private function isStringUnIndentedCollectionItem() |
||
772 | |||
773 | /** |
||
774 | * Tests whether or not the current line is the header of a block scalar. |
||
775 | * |
||
776 | * @return bool |
||
777 | */ |
||
778 | private function isBlockScalarHeader() |
||
782 | |||
783 | /** |
||
784 | * A local wrapper for `preg_match` which will throw a ParseException if there |
||
785 | * is an internal error in the PCRE engine. |
||
786 | * |
||
787 | * This avoids us needing to check for "false" every time PCRE is used |
||
788 | * in the YAML engine |
||
789 | * |
||
790 | * @throws ParseException on a PCRE internal error |
||
791 | * |
||
792 | * @see preg_last_error() |
||
793 | * |
||
794 | * @internal |
||
795 | */ |
||
796 | public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0) |
||
824 | } |
||
825 |
In PHP, under loose comparison (like
==
, or!=
, orswitch
conditions), values of different types might be equal.For
string
values, the empty string''
is a special case, in particular the following results might be unexpected: