Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDiff, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 7 | class HtmlDiff extends AbstractDiff |
||
| 8 | { |
||
| 9 | protected $wordIndices; |
||
| 10 | protected $oldTables; |
||
| 11 | protected $newTables; |
||
| 12 | protected $insertSpaceInReplace = false; |
||
| 13 | protected $newIsolatedDiffTags; |
||
| 14 | protected $oldIsolatedDiffTags; |
||
| 15 | protected $isolatedDiffTags = array ( |
||
| 16 | 'ol' => '[[REPLACE_ORDERED_LIST]]', |
||
| 17 | 'ul' => '[[REPLACE_UNORDERED_LIST]]', |
||
| 18 | 'sub' => '[[REPLACE_SUB_SCRIPT]]', |
||
| 19 | 'sup' => '[[REPLACE_SUPER_SCRIPT]]', |
||
| 20 | 'dl' => '[[REPLACE_DEFINITION_LIST]]', |
||
| 21 | 'table' => '[[REPLACE_TABLE]]', |
||
| 22 | 'strong' => '[[REPLACE_STRONG]]', |
||
| 23 | 'b' => '[[REPLACE_B]]', |
||
| 24 | 'em' => '[[REPLACE_EM]]', |
||
| 25 | 'i' => '[[REPLACE_I]]', |
||
| 26 | 'a' => '[[REPLACE_A]]', |
||
| 27 | ); |
||
| 28 | protected $useTableDiffing = true; |
||
| 29 | |||
| 30 | public function setUseTableDiffing($bool) |
||
| 31 | { |
||
| 32 | $this->useTableDiffing = $bool; |
||
| 33 | |||
| 34 | return $this; |
||
| 35 | } |
||
| 36 | |||
| 37 | /** |
||
| 38 | * @param boolean $boolean |
||
| 39 | * @return HtmlDiff |
||
| 40 | */ |
||
| 41 | public function setInsertSpaceInReplace($boolean) |
||
| 42 | { |
||
| 43 | $this->insertSpaceInReplace = $boolean; |
||
| 44 | |||
| 45 | return $this; |
||
| 46 | } |
||
| 47 | |||
| 48 | /** |
||
| 49 | * @return boolean |
||
| 50 | */ |
||
| 51 | public function getInsertSpaceInReplace() |
||
| 52 | { |
||
| 53 | return $this->insertSpaceInReplace; |
||
| 54 | } |
||
| 55 | |||
| 56 | 11 | public function build() |
|
| 57 | { |
||
| 58 | 11 | $this->splitInputsToWords(); |
|
| 59 | 11 | $this->replaceIsolatedDiffTags(); |
|
| 60 | 11 | $this->indexNewWords(); |
|
| 61 | |||
| 62 | 11 | $operations = $this->operations(); |
|
| 63 | 11 | foreach ($operations as $item) { |
|
| 64 | 11 | $this->performOperation( $item ); |
|
| 65 | 11 | } |
|
| 66 | |||
| 67 | 11 | return $this->content; |
|
| 68 | } |
||
| 69 | |||
| 70 | 11 | protected function indexNewWords() |
|
| 71 | { |
||
| 72 | 11 | $this->wordIndices = array(); |
|
| 73 | 11 | foreach ($this->newWords as $i => $word) { |
|
| 74 | 11 | if ( $this->isTag( $word ) ) { |
|
| 75 | 8 | $word = $this->stripTagAttributes( $word ); |
|
| 76 | 8 | } |
|
| 77 | 11 | if ( isset( $this->wordIndices[ $word ] ) ) { |
|
| 78 | 11 | $this->wordIndices[ $word ][] = $i; |
|
| 79 | 11 | } else { |
|
| 80 | 11 | $this->wordIndices[ $word ] = array( $i ); |
|
| 81 | } |
||
| 82 | 11 | } |
|
| 83 | 11 | } |
|
| 84 | |||
| 85 | 11 | protected function replaceIsolatedDiffTags() |
|
| 86 | { |
||
| 87 | 11 | $this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords); |
|
| 88 | 11 | $this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords); |
|
| 89 | 11 | } |
|
| 90 | |||
| 91 | 11 | protected function createIsolatedDiffTagPlaceholders(&$words) |
|
| 92 | { |
||
| 93 | 11 | $openIsolatedDiffTags = 0; |
|
| 94 | 11 | $isolatedDiffTagIndicies = array(); |
|
| 95 | 11 | $isolatedDiffTagStart = 0; |
|
| 96 | 11 | $currentIsolatedDiffTag = null; |
|
| 97 | 11 | foreach ($words as $index => $word) { |
|
| 98 | 11 | $openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag); |
|
| 99 | 11 | if ($openIsolatedDiffTag) { |
|
| 100 | 11 | if ($openIsolatedDiffTags === 0) { |
|
| 101 | 11 | $isolatedDiffTagStart = $index; |
|
| 102 | 11 | } |
|
| 103 | 11 | $openIsolatedDiffTags++; |
|
| 104 | 11 | $currentIsolatedDiffTag = $openIsolatedDiffTag; |
|
| 105 | 11 | } elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) { |
|
| 106 | 10 | $openIsolatedDiffTags--; |
|
| 107 | 10 | if ($openIsolatedDiffTags == 0) { |
|
| 108 | 10 | $isolatedDiffTagIndicies[] = array ('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag); |
|
|
|
|||
| 109 | 10 | $currentIsolatedDiffTag = null; |
|
| 110 | 10 | } |
|
| 111 | 10 | } |
|
| 112 | 11 | } |
|
| 113 | 11 | $isolatedDiffTagScript = array(); |
|
| 114 | 11 | $offset = 0; |
|
| 115 | 11 | foreach ($isolatedDiffTagIndicies as $isolatedDiffTagIndex) { |
|
| 116 | 10 | $start = $isolatedDiffTagIndex['start'] - $offset; |
|
| 117 | 10 | $placeholderString = $this->isolatedDiffTags[$isolatedDiffTagIndex['tagType']]; |
|
| 118 | 10 | $isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString); |
|
| 119 | 10 | $offset += $isolatedDiffTagIndex['length'] - 1; |
|
| 120 | 11 | } |
|
| 121 | |||
| 122 | 11 | return $isolatedDiffTagScript; |
|
| 123 | |||
| 124 | } |
||
| 125 | |||
| 126 | 11 | View Code Duplication | protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
| 127 | { |
||
| 128 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
| 129 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
| 130 | 11 | if (preg_match("#<".$key."[^>]*>\\s*#iU", $item)) { |
|
| 131 | 11 | return $key; |
|
| 132 | } |
||
| 133 | 11 | } |
|
| 134 | |||
| 135 | 11 | return false; |
|
| 136 | } |
||
| 137 | |||
| 138 | 11 | View Code Duplication | protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
| 139 | { |
||
| 140 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
| 141 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
| 142 | 11 | if (preg_match("#</".$key."[^>]*>\\s*#iU", $item)) { |
|
| 143 | 10 | return $key; |
|
| 144 | } |
||
| 145 | 11 | } |
|
| 146 | |||
| 147 | 11 | return false; |
|
| 148 | } |
||
| 149 | |||
| 150 | 11 | protected function performOperation($operation) |
|
| 151 | { |
||
| 152 | 11 | switch ($operation->action) { |
|
| 153 | 11 | case 'equal' : |
|
| 154 | 11 | $this->processEqualOperation( $operation ); |
|
| 155 | 11 | break; |
|
| 156 | 9 | case 'delete' : |
|
| 157 | 5 | $this->processDeleteOperation( $operation, "diffdel" ); |
|
| 158 | 5 | break; |
|
| 159 | 9 | case 'insert' : |
|
| 160 | 8 | $this->processInsertOperation( $operation, "diffins"); |
|
| 161 | 8 | break; |
|
| 162 | 7 | case 'replace': |
|
| 163 | 7 | $this->processReplaceOperation( $operation ); |
|
| 164 | 7 | break; |
|
| 165 | default: |
||
| 166 | break; |
||
| 167 | 11 | } |
|
| 168 | 11 | } |
|
| 169 | |||
| 170 | 7 | protected function processReplaceOperation($operation) |
|
| 175 | |||
| 176 | 9 | View Code Duplication | protected function processInsertOperation($operation, $cssClass) |
| 177 | { |
||
| 178 | 9 | $text = array(); |
|
| 179 | 9 | foreach ($this->newWords as $pos => $s) { |
|
| 180 | 9 | if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { |
|
| 181 | 9 | if (in_array($s, $this->isolatedDiffTags) && isset($this->newIsolatedDiffTags[$pos])) { |
|
| 182 | 4 | foreach ($this->newIsolatedDiffTags[$pos] as $word) { |
|
| 183 | 4 | $text[] = $word; |
|
| 184 | 4 | } |
|
| 185 | 4 | } else { |
|
| 192 | |||
| 193 | 9 | View Code Duplication | protected function processDeleteOperation($operation, $cssClass) |
| 209 | |||
| 210 | /** |
||
| 211 | * @param Operation $operation |
||
| 212 | * @param int $pos |
||
| 213 | * @param string $placeholder |
||
| 214 | * @param bool $stripWrappingTags |
||
| 215 | * |
||
| 216 | * @return string |
||
| 217 | */ |
||
| 218 | 7 | protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true) |
|
| 233 | |||
| 234 | 5 | protected function diffElements($oldText, $newText, $stripWrappingTags = true) |
|
| 255 | |||
| 256 | 4 | View Code Duplication | protected function diffList($oldText, $newText) |
| 263 | |||
| 264 | View Code Duplication | protected function diffTables($oldText, $newText) |
|
| 271 | |||
| 272 | /** |
||
| 273 | * @param string $oldText |
||
| 274 | * @param string $newText |
||
| 275 | * |
||
| 276 | * @return string |
||
| 277 | */ |
||
| 278 | 1 | protected function diffLinks($oldText, $newText) |
|
| 293 | |||
| 294 | 11 | View Code Duplication | protected function processEqualOperation($operation) |
| 310 | |||
| 311 | /** |
||
| 312 | * @param string $text |
||
| 313 | * @param string $attribute |
||
| 314 | * |
||
| 315 | * @return null|string |
||
| 316 | */ |
||
| 317 | 1 | protected function getAttributeFromTag($text, $attribute) |
|
| 326 | |||
| 327 | 7 | protected function isListPlaceholder($text) |
|
| 331 | |||
| 332 | /** |
||
| 333 | * @param string $text |
||
| 334 | * |
||
| 335 | * @return bool |
||
| 336 | */ |
||
| 337 | 5 | public function isLinkPlaceholder($text) |
|
| 341 | |||
| 342 | /** |
||
| 343 | * @param string $text |
||
| 344 | * @param array|string $types |
||
| 345 | * @param bool $strict |
||
| 346 | * |
||
| 347 | * @return bool |
||
| 348 | */ |
||
| 349 | 7 | protected function isPlaceholderType($text, $types, $strict = true) |
|
| 366 | |||
| 367 | 5 | protected function isTablePlaceholder($text) |
|
| 373 | |||
| 374 | 7 | protected function findIsolatedDiffTagsInOld($operation, $posInNew) |
|
| 380 | |||
| 381 | 9 | protected function insertTag($tag, $cssClass, &$words) |
|
| 436 | |||
| 437 | 9 | protected function checkCondition($word, $condition) |
|
| 441 | |||
| 442 | 10 | protected function wrapText($text, $tagName, $cssClass) |
|
| 446 | |||
| 447 | 9 | protected function extractConsecutiveWords(&$words, $condition) |
|
| 481 | |||
| 482 | 11 | protected function isTag($item) |
|
| 486 | |||
| 487 | 11 | protected function isOpeningTag($item) |
|
| 491 | |||
| 492 | 11 | protected function isClosingTag($item) |
|
| 496 | |||
| 497 | 11 | protected function operations() |
|
| 530 | |||
| 531 | 11 | protected function matchingBlocks() |
|
| 538 | |||
| 539 | 11 | protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks) |
|
| 552 | |||
| 553 | 8 | protected function stripTagAttributes($word) |
|
| 559 | |||
| 560 | 11 | protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew) |
|
| 615 | } |
||
| 616 |
Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.