Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDiff, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
7 | class HtmlDiff extends AbstractDiff |
||
8 | { |
||
9 | protected $wordIndices; |
||
10 | protected $oldTables; |
||
11 | protected $newTables; |
||
12 | protected $insertSpaceInReplace = false; |
||
13 | protected $newIsolatedDiffTags; |
||
14 | protected $oldIsolatedDiffTags; |
||
15 | protected $isolatedDiffTags = array ( |
||
16 | 'ol' => '[[REPLACE_ORDERED_LIST]]', |
||
17 | 'ul' => '[[REPLACE_UNORDERED_LIST]]', |
||
18 | 'sub' => '[[REPLACE_SUB_SCRIPT]]', |
||
19 | 'sup' => '[[REPLACE_SUPER_SCRIPT]]', |
||
20 | 'dl' => '[[REPLACE_DEFINITION_LIST]]', |
||
21 | 'table' => '[[REPLACE_TABLE]]', |
||
22 | 'strong' => '[[REPLACE_STRONG]]', |
||
23 | 'b' => '[[REPLACE_B]]', |
||
24 | 'em' => '[[REPLACE_EM]]', |
||
25 | 'i' => '[[REPLACE_I]]', |
||
26 | 'a' => '[[REPLACE_A]]', |
||
27 | ); |
||
28 | protected $useTableDiffing = true; |
||
29 | |||
30 | public function setUseTableDiffing($bool) |
||
31 | { |
||
32 | $this->useTableDiffing = $bool; |
||
33 | |||
34 | return $this; |
||
35 | } |
||
36 | |||
37 | /** |
||
38 | * @param boolean $boolean |
||
39 | * @return HtmlDiff |
||
40 | */ |
||
41 | public function setInsertSpaceInReplace($boolean) |
||
42 | { |
||
43 | $this->insertSpaceInReplace = $boolean; |
||
44 | |||
45 | return $this; |
||
46 | } |
||
47 | |||
48 | /** |
||
49 | * @return boolean |
||
50 | */ |
||
51 | public function getInsertSpaceInReplace() |
||
52 | { |
||
53 | return $this->insertSpaceInReplace; |
||
54 | } |
||
55 | |||
56 | 11 | public function build() |
|
57 | { |
||
58 | 11 | $this->splitInputsToWords(); |
|
59 | 11 | $this->replaceIsolatedDiffTags(); |
|
60 | 11 | $this->indexNewWords(); |
|
61 | |||
62 | 11 | $operations = $this->operations(); |
|
63 | 11 | foreach ($operations as $item) { |
|
64 | 11 | $this->performOperation( $item ); |
|
65 | 11 | } |
|
66 | |||
67 | 11 | return $this->content; |
|
68 | } |
||
69 | |||
70 | 11 | protected function indexNewWords() |
|
71 | { |
||
72 | 11 | $this->wordIndices = array(); |
|
73 | 11 | foreach ($this->newWords as $i => $word) { |
|
74 | 11 | if ( $this->isTag( $word ) ) { |
|
75 | 8 | $word = $this->stripTagAttributes( $word ); |
|
76 | 8 | } |
|
77 | 11 | if ( isset( $this->wordIndices[ $word ] ) ) { |
|
78 | 11 | $this->wordIndices[ $word ][] = $i; |
|
79 | 11 | } else { |
|
80 | 11 | $this->wordIndices[ $word ] = array( $i ); |
|
81 | } |
||
82 | 11 | } |
|
83 | 11 | } |
|
84 | |||
85 | 11 | protected function replaceIsolatedDiffTags() |
|
86 | { |
||
87 | 11 | $this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords); |
|
88 | 11 | $this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords); |
|
89 | 11 | } |
|
90 | |||
91 | 11 | protected function createIsolatedDiffTagPlaceholders(&$words) |
|
92 | { |
||
93 | 11 | $openIsolatedDiffTags = 0; |
|
94 | 11 | $isolatedDiffTagIndicies = array(); |
|
95 | 11 | $isolatedDiffTagStart = 0; |
|
96 | 11 | $currentIsolatedDiffTag = null; |
|
97 | 11 | foreach ($words as $index => $word) { |
|
98 | 11 | $openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag); |
|
99 | 11 | if ($openIsolatedDiffTag) { |
|
100 | 11 | if ($openIsolatedDiffTags === 0) { |
|
101 | 11 | $isolatedDiffTagStart = $index; |
|
102 | 11 | } |
|
103 | 11 | $openIsolatedDiffTags++; |
|
104 | 11 | $currentIsolatedDiffTag = $openIsolatedDiffTag; |
|
105 | 11 | } elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) { |
|
106 | 10 | $openIsolatedDiffTags--; |
|
107 | 10 | if ($openIsolatedDiffTags == 0) { |
|
108 | 10 | $isolatedDiffTagIndicies[] = array ('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag); |
|
|
|||
109 | 10 | $currentIsolatedDiffTag = null; |
|
110 | 10 | } |
|
111 | 10 | } |
|
112 | 11 | } |
|
113 | 11 | $isolatedDiffTagScript = array(); |
|
114 | 11 | $offset = 0; |
|
115 | 11 | foreach ($isolatedDiffTagIndicies as $isolatedDiffTagIndex) { |
|
116 | 10 | $start = $isolatedDiffTagIndex['start'] - $offset; |
|
117 | 10 | $placeholderString = $this->isolatedDiffTags[$isolatedDiffTagIndex['tagType']]; |
|
118 | 10 | $isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString); |
|
119 | 10 | $offset += $isolatedDiffTagIndex['length'] - 1; |
|
120 | 11 | } |
|
121 | |||
122 | 11 | return $isolatedDiffTagScript; |
|
123 | |||
124 | } |
||
125 | |||
126 | 11 | View Code Duplication | protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
127 | { |
||
128 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
129 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
130 | 11 | if (preg_match("#<".$key."[^>]*>\\s*#iU", $item)) { |
|
131 | 11 | return $key; |
|
132 | } |
||
133 | 11 | } |
|
134 | |||
135 | 11 | return false; |
|
136 | } |
||
137 | |||
138 | 11 | View Code Duplication | protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
139 | { |
||
140 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
141 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
142 | 11 | if (preg_match("#</".$key."[^>]*>\\s*#iU", $item)) { |
|
143 | 10 | return $key; |
|
144 | } |
||
145 | 11 | } |
|
146 | |||
147 | 11 | return false; |
|
148 | } |
||
149 | |||
150 | 11 | protected function performOperation($operation) |
|
151 | { |
||
152 | 11 | switch ($operation->action) { |
|
153 | 11 | case 'equal' : |
|
154 | 11 | $this->processEqualOperation( $operation ); |
|
155 | 11 | break; |
|
156 | 9 | case 'delete' : |
|
157 | 5 | $this->processDeleteOperation( $operation, "diffdel" ); |
|
158 | 5 | break; |
|
159 | 9 | case 'insert' : |
|
160 | 8 | $this->processInsertOperation( $operation, "diffins"); |
|
161 | 8 | break; |
|
162 | 7 | case 'replace': |
|
163 | 7 | $this->processReplaceOperation( $operation ); |
|
164 | 7 | break; |
|
165 | default: |
||
166 | break; |
||
167 | 11 | } |
|
168 | 11 | } |
|
169 | |||
170 | 7 | protected function processReplaceOperation($operation) |
|
175 | |||
176 | 9 | View Code Duplication | protected function processInsertOperation($operation, $cssClass) |
177 | { |
||
178 | 9 | $text = array(); |
|
179 | 9 | foreach ($this->newWords as $pos => $s) { |
|
180 | 9 | if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { |
|
181 | 9 | if (in_array($s, $this->isolatedDiffTags) && isset($this->newIsolatedDiffTags[$pos])) { |
|
182 | 4 | foreach ($this->newIsolatedDiffTags[$pos] as $word) { |
|
183 | 4 | $text[] = $word; |
|
184 | 4 | } |
|
185 | 4 | } else { |
|
192 | |||
193 | 9 | View Code Duplication | protected function processDeleteOperation($operation, $cssClass) |
209 | |||
210 | /** |
||
211 | * @param Operation $operation |
||
212 | * @param int $pos |
||
213 | * @param string $placeholder |
||
214 | * @param bool $stripWrappingTags |
||
215 | * |
||
216 | * @return string |
||
217 | */ |
||
218 | 7 | protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true) |
|
233 | |||
234 | 5 | protected function diffElements($oldText, $newText, $stripWrappingTags = true) |
|
255 | |||
256 | 4 | View Code Duplication | protected function diffList($oldText, $newText) |
263 | |||
264 | View Code Duplication | protected function diffTables($oldText, $newText) |
|
271 | |||
272 | /** |
||
273 | * @param string $oldText |
||
274 | * @param string $newText |
||
275 | * |
||
276 | * @return string |
||
277 | */ |
||
278 | 1 | protected function diffLinks($oldText, $newText) |
|
293 | |||
294 | 11 | View Code Duplication | protected function processEqualOperation($operation) |
310 | |||
311 | /** |
||
312 | * @param string $text |
||
313 | * @param string $attribute |
||
314 | * |
||
315 | * @return null|string |
||
316 | */ |
||
317 | 1 | protected function getAttributeFromTag($text, $attribute) |
|
326 | |||
327 | 7 | protected function isListPlaceholder($text) |
|
331 | |||
332 | /** |
||
333 | * @param string $text |
||
334 | * |
||
335 | * @return bool |
||
336 | */ |
||
337 | 5 | public function isLinkPlaceholder($text) |
|
341 | |||
342 | /** |
||
343 | * @param string $text |
||
344 | * @param array|string $types |
||
345 | * @param bool $strict |
||
346 | * |
||
347 | * @return bool |
||
348 | */ |
||
349 | 7 | protected function isPlaceholderType($text, $types, $strict = true) |
|
366 | |||
367 | 5 | protected function isTablePlaceholder($text) |
|
373 | |||
374 | 7 | protected function findIsolatedDiffTagsInOld($operation, $posInNew) |
|
380 | |||
381 | 9 | protected function insertTag($tag, $cssClass, &$words) |
|
436 | |||
437 | 9 | protected function checkCondition($word, $condition) |
|
441 | |||
442 | 10 | protected function wrapText($text, $tagName, $cssClass) |
|
446 | |||
447 | 9 | protected function extractConsecutiveWords(&$words, $condition) |
|
481 | |||
482 | 11 | protected function isTag($item) |
|
486 | |||
487 | 11 | protected function isOpeningTag($item) |
|
491 | |||
492 | 11 | protected function isClosingTag($item) |
|
496 | |||
497 | 11 | protected function operations() |
|
530 | |||
531 | 11 | protected function matchingBlocks() |
|
538 | |||
539 | 11 | protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks) |
|
552 | |||
553 | 8 | protected function stripTagAttributes($word) |
|
559 | |||
560 | 11 | protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew) |
|
615 | } |
||
616 |
Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.