Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like ListDiff often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ListDiff, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
5 | class ListDiff extends HtmlDiff |
||
6 | { |
||
7 | /** |
||
8 | * This is the minimum percentage a list item can match its counterpart in order to be considered a match. |
||
9 | * @var integer |
||
10 | */ |
||
11 | protected static $listMatchThreshold = 35; |
||
12 | |||
13 | /** @var array */ |
||
14 | protected $listWords = array(); |
||
15 | |||
16 | /** @var array */ |
||
17 | protected $listTags = array(); |
||
18 | |||
19 | /** @var array */ |
||
20 | protected $listIsolatedDiffTags = array(); |
||
21 | |||
22 | /** @var array */ |
||
23 | protected $isolatedDiffTags = array ( |
||
24 | 'ol' => '[[REPLACE_ORDERED_LIST]]', |
||
25 | 'ul' => '[[REPLACE_UNORDERED_LIST]]', |
||
26 | 'dl' => '[[REPLACE_DEFINITION_LIST]]', |
||
27 | ); |
||
28 | |||
29 | /** |
||
30 | * List (li) placeholder. |
||
31 | * @var string |
||
32 | */ |
||
33 | protected static $listPlaceHolder = "[[REPLACE_LIST_ITEM]]"; |
||
34 | |||
35 | /** |
||
36 | * Holds the type of list this is ol, ul, dl. |
||
37 | * @var string |
||
38 | */ |
||
39 | protected $listType; |
||
40 | |||
41 | /** |
||
42 | * Used to hold what type of list the old list is. |
||
43 | * @var string |
||
44 | */ |
||
45 | protected $oldListType; |
||
46 | |||
47 | /** |
||
48 | * Used to hold what type of list the new list is. |
||
49 | * @var string |
||
50 | */ |
||
51 | protected $newListType; |
||
52 | |||
53 | /** |
||
54 | * Hold the old/new content of the content of the list. |
||
55 | * @var array |
||
56 | */ |
||
57 | protected $list; |
||
58 | |||
59 | /** |
||
60 | * Contains the old/new child lists content within this list. |
||
61 | * @var array |
||
62 | */ |
||
63 | protected $childLists; |
||
64 | |||
65 | /** |
||
66 | * Contains the old/new text strings that match |
||
67 | * @var array |
||
68 | */ |
||
69 | protected $textMatches; |
||
70 | |||
71 | /** |
||
72 | * Contains the indexed start positions of each list within word string. |
||
73 | * @var array |
||
74 | */ |
||
75 | protected $listsIndex; |
||
76 | |||
77 | /** |
||
78 | * Array that holds the index of all content outside of the array. Format is array(index => content). |
||
79 | * @var array |
||
80 | */ |
||
81 | protected $contentIndex = array(); |
||
82 | |||
83 | /** |
||
84 | * Holds the order and data on each list/content block within this list. |
||
85 | * @var array |
||
86 | */ |
||
87 | protected $diffOrderIndex = array(); |
||
88 | |||
89 | /** |
||
90 | * This is the opening ol,ul,dl ist tag. |
||
91 | * @var string |
||
92 | */ |
||
93 | protected $oldParentTag; |
||
94 | |||
95 | /** |
||
96 | * This is the opening ol,ul,dl ist tag. |
||
97 | * @var string |
||
98 | */ |
||
99 | protected $newParentTag; |
||
100 | |||
101 | /** |
||
102 | * We're using the same functions as the parent in build() to get us to the point of |
||
103 | * manipulating the data within this class. |
||
104 | * |
||
105 | * @return string |
||
106 | */ |
||
107 | public function build() |
||
118 | |||
119 | /** |
||
120 | * Calls to the actual custom functions of this class, to diff list content. |
||
121 | */ |
||
122 | protected function diffListContent() |
||
150 | |||
151 | /** |
||
152 | * This function is used to populate both contentIndex and diffOrderIndex arrays for use in the diff function. |
||
153 | */ |
||
154 | protected function indexContent() |
||
209 | |||
210 | /* |
||
211 | * This function is used to remove the wrapped ul, ol, or dl characters from this list |
||
212 | * and sets the listType as ul, ol, or dl, so that we can use it later. |
||
213 | * $list is being set here as well, as an array with the old and new version of this list content. |
||
214 | */ |
||
215 | protected function formatThisListContent() |
||
231 | |||
232 | /** |
||
233 | * |
||
234 | * @param array $arrayData |
||
235 | * @param string $index |
||
236 | * @return array |
||
237 | */ |
||
238 | protected function formatList(array $arrayData, $index = 'old') |
||
262 | |||
263 | /** |
||
264 | * @param string $tag |
||
265 | * @return string |
||
266 | */ |
||
267 | protected function getAndStripTag($tag) |
||
272 | |||
273 | protected function matchAndCompareLists() |
||
294 | |||
295 | /** |
||
296 | * Creates matches for lists. |
||
297 | */ |
||
298 | protected function compareChildLists() |
||
302 | |||
303 | /** |
||
304 | * Abstracted function used to match items in an array. |
||
305 | * This is used primarily for populating lists matches. |
||
306 | * |
||
307 | * @param array $listArray |
||
308 | * @param array $resultArray |
||
309 | * @param string|null $column |
||
310 | */ |
||
311 | protected function createNewOldMatches(&$listArray, &$resultArray, $column = null) |
||
417 | |||
418 | /** |
||
419 | * This fuction is exactly like array_column. This is added for PHP versions that do not support array_column. |
||
420 | * @param array $targetArray |
||
421 | * @param mixed $key |
||
422 | * @return array |
||
423 | */ |
||
424 | protected function getArrayColumn(array $targetArray, $key) |
||
435 | |||
436 | /** |
||
437 | * Build multidimensional array holding the contents of each list node, old and new. |
||
438 | */ |
||
439 | protected function buildChildLists() |
||
444 | |||
445 | /** |
||
446 | * Diff the actual contents of the lists against their matched counterpart. |
||
447 | * Build the content of the class. |
||
448 | */ |
||
449 | protected function diff() |
||
501 | |||
502 | /** |
||
503 | * |
||
504 | * @param string $newList |
||
505 | * @param string $oldList |
||
506 | * @param array $match |
||
507 | * @param array $index |
||
508 | * @return string |
||
509 | */ |
||
510 | protected function addListElementToContent($newList, $oldList, array $match, array $index, $type) |
||
524 | |||
525 | /** |
||
526 | * |
||
527 | * @param integer $oldIndexCount |
||
528 | * @param null|integer $newPosition |
||
529 | * @return string |
||
530 | */ |
||
531 | protected function addContentElementsToContent($oldIndexCount, $newPosition = null) |
||
549 | |||
550 | /** |
||
551 | * |
||
552 | * @param array $match |
||
553 | * @param string $type |
||
554 | * @return array|string |
||
555 | */ |
||
556 | protected function getListByMatch(array $match, $type = 'new') |
||
562 | |||
563 | /** |
||
564 | * This function replaces array_column function in PHP for older versions of php. |
||
565 | * |
||
566 | * @param array $parentArray |
||
567 | * @param string $column |
||
568 | * @param mixed $value |
||
569 | * @param boolean $allMatches |
||
570 | * @return array|boolean |
||
571 | */ |
||
572 | protected function getArrayByColumnValue($parentArray, $column, $value, $allMatches = false) |
||
587 | |||
588 | /** |
||
589 | * Converts the list (li) content arrays to string. |
||
590 | * |
||
591 | * @param array $listContentArray |
||
592 | * @return string |
||
593 | */ |
||
594 | protected function convertListContentArrayToString($listContentArray) |
||
618 | |||
619 | /** |
||
620 | * Return the contents of each list node. |
||
621 | * Process any placeholders for nested lists. |
||
622 | * |
||
623 | * @param string $text |
||
624 | * @param array $matches |
||
625 | * @return string |
||
626 | */ |
||
627 | protected function processPlaceholders($text, array $matches) |
||
660 | |||
661 | /** |
||
662 | * Checks to see if a diff tag is in string. |
||
663 | * |
||
664 | * @param string $word |
||
665 | * @return string |
||
666 | */ |
||
667 | protected function checkWordForDiffTag($word) |
||
685 | |||
686 | /** |
||
687 | * Used to remove new lines. |
||
688 | * |
||
689 | * @param string $text |
||
690 | * @return string |
||
691 | */ |
||
692 | protected function stripNewLine($text) |
||
696 | |||
697 | /** |
||
698 | * Grab the list content using the listsIndex array. |
||
699 | * |
||
700 | * @param string $indexKey |
||
701 | * @param array $matches |
||
702 | * @return array |
||
703 | */ |
||
704 | protected function getListContent($indexKey = 'new', array $matches) |
||
722 | |||
723 | /** |
||
724 | * Finds the end of list within its index. |
||
725 | * |
||
726 | * @param array $index |
||
727 | * @param integer $start |
||
728 | * @return integer |
||
729 | */ |
||
730 | protected function findEndForIndex(array $index, $start) |
||
749 | |||
750 | /** |
||
751 | * indexLists |
||
752 | * |
||
753 | * Index the list, starting positions, so that we can refer back to it later. |
||
754 | * This is used to see where one list node starts and another ends. |
||
755 | */ |
||
756 | protected function indexLists() |
||
777 | |||
778 | /** |
||
779 | * Adds the opening or closing list html element, based on listType. |
||
780 | * |
||
781 | * @param boolean $opening |
||
782 | * @return string |
||
783 | */ |
||
784 | protected function addListTypeWrapper($opening = true) |
||
793 | |||
794 | /** |
||
795 | * Replace nested list with placeholders. |
||
796 | */ |
||
797 | public function replaceListIsolatedDiffTags() |
||
802 | |||
803 | /** |
||
804 | * Grab the contents of a list node. |
||
805 | * |
||
806 | * @param array $contentArray |
||
807 | * @param boolean $stripTags |
||
808 | * @return array |
||
809 | */ |
||
810 | protected function getListsContent(array $contentArray, $stripTags = true) |
||
839 | |||
840 | /** |
||
841 | * This function helps build the list content array of a list. |
||
842 | * If a list has another list within it, the inner list is replaced with the list placeholder and the inner list |
||
843 | * content becomes a child of the parent list. |
||
844 | * This goes recursively down. |
||
845 | * |
||
846 | * @param string $word |
||
847 | * @param array $array |
||
848 | * @param integer $targetDepth |
||
849 | * @param integer $thisDepth |
||
850 | * @param array $nestedCount |
||
851 | */ |
||
852 | protected function addStringToArrayByDepth($word, array &$array, $targetDepth, $thisDepth, array $nestedCount) |
||
914 | |||
915 | /** |
||
916 | * Checks if text is opening list tag. |
||
917 | * |
||
918 | * @param string $item |
||
919 | * @return boolean |
||
920 | */ |
||
921 | protected function isOpeningListTag($item) |
||
929 | |||
930 | /** |
||
931 | * Check if text is closing list tag. |
||
932 | * |
||
933 | * @param string $item |
||
934 | * @return boolean |
||
935 | */ |
||
936 | protected function isClosingListTag($item) |
||
944 | } |
||
945 |
This check looks for multiple assignments in successive lines of code. It will report an issue if the operators are not in a straight line.
To visualize
will produce issues in the first and second line, while this second example
will produce no issues.