Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDiff, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
7 | class HtmlDiff extends AbstractDiff |
||
8 | { |
||
9 | protected $wordIndices; |
||
10 | protected $oldTables; |
||
11 | protected $newTables; |
||
12 | protected $insertSpaceInReplace = false; |
||
13 | protected $newIsolatedDiffTags; |
||
14 | protected $oldIsolatedDiffTags; |
||
15 | protected $isolatedDiffTags = array ( |
||
16 | 'ol' => '[[REPLACE_ORDERED_LIST]]', |
||
17 | 'ul' => '[[REPLACE_UNORDERED_LIST]]', |
||
18 | 'sub' => '[[REPLACE_SUB_SCRIPT]]', |
||
19 | 'sup' => '[[REPLACE_SUPER_SCRIPT]]', |
||
20 | 'dl' => '[[REPLACE_DEFINITION_LIST]]', |
||
21 | 'table' => '[[REPLACE_TABLE]]', |
||
22 | 'strong' => '[[REPLACE_STRONG]]', |
||
23 | 'b' => '[[REPLACE_B]]', |
||
24 | 'em' => '[[REPLACE_EM]]', |
||
25 | 'i' => '[[REPLACE_I]]', |
||
26 | 'a' => '[[REPLACE_A]]', |
||
27 | ); |
||
28 | protected $useTableDiffing = true; |
||
29 | |||
30 | public function setUseTableDiffing($bool) |
||
31 | { |
||
32 | $this->useTableDiffing = $bool; |
||
33 | |||
34 | return $this; |
||
35 | } |
||
36 | |||
37 | /** |
||
38 | * @param boolean $boolean |
||
39 | * @return HtmlDiff |
||
40 | */ |
||
41 | public function setInsertSpaceInReplace($boolean) |
||
42 | { |
||
43 | $this->insertSpaceInReplace = $boolean; |
||
44 | |||
45 | return $this; |
||
46 | } |
||
47 | |||
48 | /** |
||
49 | * @return boolean |
||
50 | */ |
||
51 | public function getInsertSpaceInReplace() |
||
52 | { |
||
53 | return $this->insertSpaceInReplace; |
||
54 | } |
||
55 | |||
56 | 11 | public function build() |
|
57 | { |
||
58 | 11 | $this->splitInputsToWords(); |
|
59 | 11 | $this->replaceIsolatedDiffTags(); |
|
60 | 11 | $this->indexNewWords(); |
|
61 | |||
62 | 11 | $operations = $this->operations(); |
|
63 | 11 | foreach ($operations as $item) { |
|
64 | 11 | $this->performOperation( $item ); |
|
65 | 11 | } |
|
66 | |||
67 | 11 | return $this->content; |
|
68 | } |
||
69 | |||
70 | 11 | protected function indexNewWords() |
|
71 | { |
||
72 | 11 | $this->wordIndices = array(); |
|
73 | 11 | foreach ($this->newWords as $i => $word) { |
|
74 | 11 | if ( $this->isTag( $word ) ) { |
|
75 | 8 | $word = $this->stripTagAttributes( $word ); |
|
76 | 8 | } |
|
77 | 11 | if ( isset( $this->wordIndices[ $word ] ) ) { |
|
78 | 11 | $this->wordIndices[ $word ][] = $i; |
|
79 | 11 | } else { |
|
80 | 11 | $this->wordIndices[ $word ] = array( $i ); |
|
81 | } |
||
82 | 11 | } |
|
83 | 11 | } |
|
84 | |||
85 | 11 | protected function replaceIsolatedDiffTags() |
|
86 | { |
||
87 | 11 | $this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords); |
|
88 | 11 | $this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords); |
|
89 | 11 | } |
|
90 | |||
91 | 11 | protected function createIsolatedDiffTagPlaceholders(&$words) |
|
92 | { |
||
93 | 11 | $openIsolatedDiffTags = 0; |
|
94 | 11 | $isolatedDiffTagIndicies = array(); |
|
95 | 11 | $isolatedDiffTagStart = 0; |
|
96 | 11 | $currentIsolatedDiffTag = null; |
|
97 | 11 | foreach ($words as $index => $word) { |
|
98 | 11 | $openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag); |
|
99 | 11 | if ($openIsolatedDiffTag) { |
|
100 | 11 | if ($openIsolatedDiffTags === 0) { |
|
101 | 11 | $isolatedDiffTagStart = $index; |
|
102 | 11 | } |
|
103 | 11 | $openIsolatedDiffTags++; |
|
104 | 11 | $currentIsolatedDiffTag = $openIsolatedDiffTag; |
|
105 | 11 | } elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) { |
|
106 | 10 | $openIsolatedDiffTags--; |
|
107 | 10 | if ($openIsolatedDiffTags == 0) { |
|
108 | 10 | $isolatedDiffTagIndicies[] = array ('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag); |
|
|
|||
109 | 10 | $currentIsolatedDiffTag = null; |
|
110 | 10 | } |
|
111 | 10 | } |
|
112 | 11 | } |
|
113 | 11 | $isolatedDiffTagScript = array(); |
|
114 | 11 | $offset = 0; |
|
115 | 11 | foreach ($isolatedDiffTagIndicies as $isolatedDiffTagIndex) { |
|
116 | 10 | $start = $isolatedDiffTagIndex['start'] - $offset; |
|
117 | 10 | $placeholderString = $this->isolatedDiffTags[$isolatedDiffTagIndex['tagType']]; |
|
118 | 10 | $isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString); |
|
119 | 10 | $offset += $isolatedDiffTagIndex['length'] - 1; |
|
120 | 11 | } |
|
121 | |||
122 | 11 | return $isolatedDiffTagScript; |
|
123 | |||
124 | } |
||
125 | |||
126 | 11 | View Code Duplication | protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
127 | { |
||
128 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
129 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
130 | 11 | if (preg_match("#<".$key."[^>]*>\\s*#iU", $item)) { |
|
131 | 11 | return $key; |
|
132 | } |
||
133 | 11 | } |
|
134 | |||
135 | 11 | return false; |
|
136 | } |
||
137 | |||
138 | 11 | View Code Duplication | protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null) |
139 | { |
||
140 | 11 | $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->isolatedDiffTags[$currentIsolatedDiffTag]) : $this->isolatedDiffTags; |
|
141 | 11 | foreach ($tagsToMatch as $key => $value) { |
|
142 | 11 | if (preg_match("#</".$key."[^>]*>\\s*#iU", $item)) { |
|
143 | 10 | return $key; |
|
144 | } |
||
145 | 11 | } |
|
146 | |||
147 | 11 | return false; |
|
148 | } |
||
149 | |||
150 | 11 | protected function performOperation($operation) |
|
151 | { |
||
152 | 11 | switch ($operation->action) { |
|
153 | 11 | case 'equal' : |
|
154 | 11 | $this->processEqualOperation( $operation ); |
|
155 | 11 | break; |
|
156 | 9 | case 'delete' : |
|
157 | 5 | $this->processDeleteOperation( $operation, "diffdel" ); |
|
158 | 5 | break; |
|
159 | 9 | case 'insert' : |
|
160 | 8 | $this->processInsertOperation( $operation, "diffins"); |
|
161 | 8 | break; |
|
162 | 7 | case 'replace': |
|
163 | 7 | $this->processReplaceOperation( $operation ); |
|
164 | 7 | break; |
|
165 | default: |
||
166 | break; |
||
167 | 11 | } |
|
168 | 11 | } |
|
169 | |||
170 | 7 | protected function processReplaceOperation($operation) |
|
175 | |||
176 | 9 | View Code Duplication | protected function processInsertOperation($operation, $cssClass) |
177 | { |
||
178 | 9 | $text = array(); |
|
179 | 9 | foreach ($this->newWords as $pos => $s) { |
|
180 | 9 | if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { |
|
181 | 9 | if (in_array($s, $this->isolatedDiffTags) && isset($this->newIsolatedDiffTags[$pos])) { |
|
182 | 4 | foreach ($this->newIsolatedDiffTags[$pos] as $word) { |
|
183 | 4 | $text[] = $word; |
|
184 | 4 | } |
|
185 | 4 | } else { |
|
186 | 9 | $text[] = $s; |
|
187 | } |
||
188 | 9 | } |
|
189 | 9 | } |
|
190 | 9 | $this->insertTag( "ins", $cssClass, $text ); |
|
191 | 9 | } |
|
192 | |||
193 | 9 | View Code Duplication | protected function processDeleteOperation($operation, $cssClass) |
194 | { |
||
195 | 9 | $text = array(); |
|
196 | 9 | foreach ($this->oldWords as $pos => $s) { |
|
197 | 9 | if ($pos >= $operation->startInOld && $pos < $operation->endInOld) { |
|
198 | 9 | if (in_array($s, $this->isolatedDiffTags) && isset($this->oldIsolatedDiffTags[$pos])) { |
|
199 | 6 | foreach ($this->oldIsolatedDiffTags[$pos] as $word) { |
|
200 | 6 | $text[] = $word; |
|
201 | 6 | } |
|
202 | 6 | } else { |
|
203 | 8 | $text[] = $s; |
|
204 | } |
||
205 | 9 | } |
|
206 | 9 | } |
|
207 | 9 | $this->insertTag( "del", $cssClass, $text ); |
|
208 | 9 | } |
|
209 | |||
210 | /** |
||
211 | * @param Operation $operation |
||
212 | * @param int $pos |
||
213 | * @param string $placeholder |
||
214 | * @param bool $stripWrappingTags |
||
215 | * |
||
216 | * @return string |
||
217 | */ |
||
218 | 7 | protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true) |
|
219 | { |
||
220 | 7 | $oldText = implode("", $this->findIsolatedDiffTagsInOld($operation, $pos)); |
|
221 | 7 | $newText = implode("", $this->newIsolatedDiffTags[$pos]); |
|
222 | |||
223 | 7 | if ($this->isListPlaceholder($placeholder)) { |
|
224 | 4 | return $this->diffList($oldText, $newText); |
|
225 | 5 | } elseif ($this->useTableDiffing && $this->isTablePlaceholder($placeholder)) { |
|
226 | return $this->diffTables($oldText, $newText); |
||
227 | 5 | } elseif ($this->isLinkPlaceholder($placeholder)) { |
|
228 | 1 | return $this->diffLinks($oldText, $newText); |
|
229 | } |
||
230 | |||
231 | 4 | return $this->diffElements($oldText, $newText, $stripWrappingTags); |
|
232 | } |
||
233 | |||
234 | 5 | protected function diffElements($oldText, $newText, $stripWrappingTags = true) |
|
235 | { |
||
236 | 5 | $wrapStart = ''; |
|
237 | 5 | $wrapEnd = ''; |
|
238 | |||
239 | 5 | if ($stripWrappingTags) { |
|
240 | 5 | $pattern = '/(^<[^>]+>)|(<\/[^>]+>$)/i'; |
|
241 | 5 | $matches = array(); |
|
242 | |||
243 | 5 | if (preg_match_all($pattern, $newText, $matches)) { |
|
244 | 5 | $wrapStart = isset($matches[0][0]) ? $matches[0][0] : ''; |
|
245 | 5 | $wrapEnd = isset($matches[0][1]) ? $matches[0][1] : ''; |
|
246 | 5 | } |
|
247 | 5 | $oldText = preg_replace($pattern, '', $oldText); |
|
248 | 5 | $newText = preg_replace($pattern, '', $newText); |
|
249 | 5 | } |
|
250 | |||
251 | 5 | $diff = new HtmlDiff($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs); |
|
252 | |||
253 | 5 | return $wrapStart . $diff->build() . $wrapEnd; |
|
254 | } |
||
255 | |||
256 | 4 | protected function diffList($oldText, $newText) |
|
257 | { |
||
258 | 4 | $diff = new ListDiffNew($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs); |
|
259 | 4 | $diff->setMatchThreshold($this->matchThreshold); |
|
260 | |||
261 | 4 | return $diff->build(); |
|
262 | } |
||
263 | |||
264 | protected function diffTables($oldText, $newText) |
||
265 | { |
||
266 | $diff = new TableDiff($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs); |
||
267 | $diff->setMatchThreshold($this->matchThreshold); |
||
268 | $diff->setStrategy($this->strategy); |
||
269 | |||
270 | return $diff->build(); |
||
271 | } |
||
272 | |||
273 | /** |
||
274 | * @param string $oldText |
||
275 | * @param string $newText |
||
276 | * |
||
277 | * @return string |
||
278 | */ |
||
279 | 1 | protected function diffLinks($oldText, $newText) |
|
280 | { |
||
281 | 1 | $oldHref = $this->getAttributeFromTag($oldText, 'href'); |
|
282 | 1 | $newHref = $this->getAttributeFromTag($newText, 'href'); |
|
283 | |||
284 | 1 | if ($oldHref != $newHref) { |
|
285 | 1 | return sprintf( |
|
286 | 1 | '%s%s', |
|
287 | 1 | $this->wrapText($oldText, 'del', 'diffmod diff-href'), |
|
288 | 1 | $this->wrapText($newText, 'ins', 'diffmod diff-href') |
|
289 | 1 | ); |
|
290 | } |
||
291 | |||
292 | 1 | return $this->diffElements($oldText, $newText); |
|
293 | } |
||
294 | |||
295 | 11 | View Code Duplication | protected function processEqualOperation($operation) |
296 | { |
||
297 | 11 | $result = array(); |
|
298 | 11 | foreach ($this->newWords as $pos => $s) { |
|
299 | |||
300 | 11 | if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { |
|
301 | 11 | if (in_array($s, $this->isolatedDiffTags) && isset($this->newIsolatedDiffTags[$pos])) { |
|
302 | |||
303 | 7 | $result[] = $this->diffIsolatedPlaceholder($operation, $pos, $s); |
|
304 | 7 | } else { |
|
305 | 11 | $result[] = $s; |
|
306 | } |
||
307 | 11 | } |
|
308 | 11 | } |
|
309 | 11 | $this->content .= implode( "", $result ); |
|
310 | 11 | } |
|
311 | |||
312 | /** |
||
313 | * @param string $text |
||
314 | * @param string $attribute |
||
315 | * |
||
316 | * @return null|string |
||
317 | */ |
||
318 | 1 | protected function getAttributeFromTag($text, $attribute) |
|
319 | { |
||
320 | 1 | $matches = array(); |
|
321 | 1 | if (preg_match(sprintf('/<a\s+[^>]*%s=([\'"])(.*)\1[^>]*>/i', $attribute), $text, $matches)) { |
|
322 | 1 | return $matches[2]; |
|
323 | } |
||
324 | |||
325 | return null; |
||
326 | } |
||
327 | |||
328 | 7 | protected function isListPlaceholder($text) |
|
332 | |||
333 | /** |
||
334 | * @param string $text |
||
335 | * |
||
336 | * @return bool |
||
337 | */ |
||
338 | 5 | public function isLinkPlaceholder($text) |
|
339 | { |
||
340 | 5 | return $this->isPlaceholderType($text, 'a'); |
|
341 | } |
||
342 | |||
343 | /** |
||
344 | * @param string $text |
||
345 | * @param array|string $types |
||
346 | * @param bool $strict |
||
347 | * |
||
348 | * @return bool |
||
349 | */ |
||
350 | 7 | protected function isPlaceholderType($text, $types, $strict = true) |
|
351 | { |
||
352 | 7 | if (!is_array($types)) { |
|
353 | 5 | $types = array($types); |
|
354 | 5 | } |
|
355 | |||
356 | 7 | $criteria = array(); |
|
357 | 7 | foreach ($types as $type) { |
|
358 | 7 | if (isset($this->isolatedDiffTags[$type])) { |
|
359 | 7 | $criteria[] = $this->isolatedDiffTags[$type]; |
|
360 | 7 | } else { |
|
361 | $criteria[] = $type; |
||
362 | } |
||
363 | 7 | } |
|
364 | |||
365 | 7 | return in_array($text, $criteria, $strict); |
|
366 | } |
||
367 | |||
368 | 5 | protected function isTablePlaceholder($text) |
|
369 | { |
||
370 | 5 | return in_array($text, array( |
|
371 | 5 | $this->isolatedDiffTags['table'], |
|
372 | 5 | ), true); |
|
373 | } |
||
374 | |||
375 | 7 | protected function findIsolatedDiffTagsInOld($operation, $posInNew) |
|
381 | |||
382 | 9 | protected function insertTag($tag, $cssClass, &$words) |
|
383 | { |
||
384 | 9 | while (true) { |
|
385 | 9 | if ( count( $words ) == 0 ) { |
|
386 | 9 | break; |
|
387 | } |
||
388 | |||
389 | 9 | $nonTags = $this->extractConsecutiveWords( $words, 'noTag' ); |
|
390 | |||
391 | 9 | $specialCaseTagInjection = ''; |
|
392 | 9 | $specialCaseTagInjectionIsBefore = false; |
|
393 | |||
394 | 9 | if ( count( $nonTags ) != 0 ) { |
|
395 | 9 | $text = $this->wrapText( implode( "", $nonTags ), $tag, $cssClass ); |
|
396 | 9 | $this->content .= $text; |
|
397 | 9 | } else { |
|
398 | 6 | $firstOrDefault = false; |
|
399 | 6 | foreach ($this->specialCaseOpeningTags as $x) { |
|
400 | if ( preg_match( $x, $words[ 0 ] ) ) { |
||
401 | $firstOrDefault = $x; |
||
402 | break; |
||
403 | } |
||
404 | 6 | } |
|
405 | 6 | if ($firstOrDefault) { |
|
406 | $specialCaseTagInjection = '<ins class="mod">'; |
||
407 | if ($tag == "del") { |
||
408 | unset( $words[ 0 ] ); |
||
409 | } |
||
410 | 6 | } elseif ( array_search( $words[ 0 ], $this->specialCaseClosingTags ) !== false ) { |
|
411 | $specialCaseTagInjection = "</ins>"; |
||
412 | $specialCaseTagInjectionIsBefore = true; |
||
413 | if ($tag == "del") { |
||
414 | unset( $words[ 0 ] ); |
||
415 | } |
||
416 | } |
||
417 | } |
||
418 | 9 | if ( count( $words ) == 0 && count( $specialCaseTagInjection ) == 0 ) { |
|
419 | break; |
||
420 | } |
||
421 | 9 | if ($specialCaseTagInjectionIsBefore) { |
|
422 | $this->content .= $specialCaseTagInjection . implode( "", $this->extractConsecutiveWords( $words, 'tag' ) ); |
||
423 | } else { |
||
424 | 9 | $workTag = $this->extractConsecutiveWords( $words, 'tag' ); |
|
425 | 9 | if ( isset( $workTag[ 0 ] ) && $this->isOpeningTag( $workTag[ 0 ] ) && !$this->isClosingTag( $workTag[ 0 ] ) ) { |
|
426 | 8 | if ( strpos( $workTag[ 0 ], 'class=' ) ) { |
|
427 | 4 | $workTag[ 0 ] = str_replace( 'class="', 'class="diffmod ', $workTag[ 0 ] ); |
|
428 | 4 | $workTag[ 0 ] = str_replace( "class='", 'class="diffmod ', $workTag[ 0 ] ); |
|
429 | 4 | } else { |
|
430 | 8 | $workTag[ 0 ] = str_replace( ">", ' class="diffmod">', $workTag[ 0 ] ); |
|
431 | } |
||
432 | 8 | } |
|
433 | 9 | $this->content .= implode( "", $workTag ) . $specialCaseTagInjection; |
|
434 | } |
||
435 | 9 | } |
|
436 | 9 | } |
|
437 | |||
438 | 9 | protected function checkCondition($word, $condition) |
|
442 | |||
443 | 10 | protected function wrapText($text, $tagName, $cssClass) |
|
447 | |||
448 | 9 | protected function extractConsecutiveWords(&$words, $condition) |
|
449 | { |
||
450 | 9 | $indexOfFirstTag = null; |
|
451 | 9 | $words = array_values($words); |
|
452 | 9 | foreach ($words as $i => $word) { |
|
453 | 9 | if ( !$this->checkCondition( $word, $condition ) ) { |
|
454 | 8 | $indexOfFirstTag = $i; |
|
455 | 8 | break; |
|
456 | } |
||
457 | 9 | } |
|
458 | 9 | if ($indexOfFirstTag !== null) { |
|
459 | 8 | $items = array(); |
|
460 | 8 | View Code Duplication | foreach ($words as $pos => $s) { |
461 | 8 | if ($pos >= 0 && $pos < $indexOfFirstTag) { |
|
462 | 8 | $items[] = $s; |
|
463 | 8 | } |
|
464 | 8 | } |
|
465 | 8 | if ($indexOfFirstTag > 0) { |
|
466 | 8 | array_splice( $words, 0, $indexOfFirstTag ); |
|
467 | 8 | } |
|
468 | |||
469 | 8 | return $items; |
|
470 | } else { |
||
482 | |||
483 | 11 | protected function isTag($item) |
|
487 | |||
488 | 11 | protected function isOpeningTag($item) |
|
492 | |||
493 | 11 | protected function isClosingTag($item) |
|
497 | |||
498 | 11 | protected function operations() |
|
531 | |||
532 | 11 | protected function matchingBlocks() |
|
539 | |||
540 | 11 | protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks) |
|
553 | |||
554 | 8 | protected function stripTagAttributes($word) |
|
560 | |||
561 | 11 | protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew) |
|
616 | } |
||
617 |
Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.