Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like SimpleHtmlDomNode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SimpleHtmlDomNode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class SimpleHtmlDomNode |
||
22 | { |
||
23 | public $nodetype = HDOM_TYPE_TEXT; |
||
24 | |||
25 | /** |
||
26 | * @var string |
||
27 | */ |
||
28 | public $tag = 'text'; |
||
29 | |||
30 | /** |
||
31 | * @var array |
||
32 | */ |
||
33 | public $attr = array(); |
||
34 | |||
35 | /** |
||
36 | * @var SimpleHtmlDomNode[] |
||
37 | */ |
||
38 | public $children = array(); |
||
39 | |||
40 | /** |
||
41 | * @var SimpleHtmlDomNode[] |
||
42 | */ |
||
43 | public $nodes = array(); |
||
44 | |||
45 | /** |
||
46 | * @var SimpleHtmlDomNode |
||
47 | */ |
||
48 | public $parent = null; |
||
49 | |||
50 | /** |
||
51 | * The "info" array - see HDOM_INFO_... for what each element contains. |
||
52 | * |
||
53 | * @var array |
||
54 | */ |
||
55 | public $_ = array(); |
||
56 | |||
57 | /** |
||
58 | * @var int |
||
59 | */ |
||
60 | public $tag_start = 0; |
||
61 | |||
62 | /** |
||
63 | * @var SimpleHtmlDom|null |
||
64 | */ |
||
65 | private $dom = null; |
||
66 | |||
67 | /** |
||
68 | * @param $dom |
||
69 | */ |
||
70 | 6 | public function __construct($dom) |
|
75 | |||
76 | /** |
||
77 | * Returns true if $string is valid UTF-8 and false otherwise. |
||
78 | * |
||
79 | * @param mixed $str String to be tested |
||
80 | * |
||
81 | * @return boolean |
||
82 | */ |
||
83 | public static function is_utf8($str) |
||
87 | |||
88 | 1 | public function __destruct() |
|
92 | |||
93 | // clean up memory due to php5 circular references memory leak... |
||
94 | |||
95 | 1 | public function clear() |
|
104 | |||
105 | /** |
||
106 | * magic - toString |
||
107 | * |
||
108 | * @return string |
||
109 | */ |
||
110 | public function __toString() |
||
114 | |||
115 | /** |
||
116 | * get dom node's outer text (with tag) |
||
117 | * |
||
118 | * @return string |
||
119 | */ |
||
120 | 5 | public function outertext() |
|
168 | |||
169 | /** |
||
170 | * get dom node's inner html |
||
171 | * |
||
172 | * @return string |
||
173 | */ |
||
174 | 5 | public function innertext() |
|
191 | |||
192 | /** |
||
193 | * dump node's tree |
||
194 | * |
||
195 | * @param bool $show_attr |
||
196 | * @param int $deep |
||
197 | */ |
||
198 | public function dump($show_attr = true, $deep = 0) |
||
219 | |||
220 | /** |
||
221 | * Debugging function to dump a single dom node with a bunch of information about it. |
||
222 | * |
||
223 | * @param bool $echo |
||
224 | * @param $node |
||
225 | * |
||
226 | * @return string|void |
||
227 | */ |
||
228 | public function dump_node($echo = true, $node) |
||
281 | |||
282 | /** |
||
283 | * function to locate a specific ancestor tag in the path to the root. |
||
284 | * |
||
285 | * @param $tag |
||
286 | * |
||
287 | * @return \voku\helper\SimpleHtmlDomNode |
||
288 | */ |
||
289 | public function find_ancestor_tag($tag) |
||
304 | |||
305 | /** |
||
306 | * build node's text with tag |
||
307 | * |
||
308 | * @return string |
||
309 | */ |
||
310 | 5 | public function makeup() |
|
350 | |||
351 | /** |
||
352 | * magic unset |
||
353 | * |
||
354 | * @param $name |
||
355 | */ |
||
356 | 1 | public function __unset($name) |
|
362 | |||
363 | /** |
||
364 | * Function to try a few tricks to determine the displayed size of an img on the page. |
||
365 | * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. |
||
366 | * |
||
367 | * @author John Schlick |
||
368 | * @version April 19 2012 |
||
369 | * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it |
||
370 | * out. |
||
371 | */ |
||
372 | public function get_display_size() |
||
447 | |||
448 | /** |
||
449 | * get all attributes |
||
450 | * |
||
451 | * @return array |
||
452 | */ |
||
453 | public function getAllAttributes() |
||
457 | |||
458 | /** |
||
459 | * get attribute |
||
460 | * |
||
461 | * @param $name |
||
462 | * |
||
463 | * @return bool|mixed|string |
||
464 | */ |
||
465 | public function getAttribute($name) |
||
470 | |||
471 | /** |
||
472 | * magic get |
||
473 | * |
||
474 | * @param $name |
||
475 | * |
||
476 | * @return bool|mixed|string |
||
477 | */ |
||
478 | 3 | public function __get($name) |
|
497 | |||
498 | /** |
||
499 | * magic set |
||
500 | * |
||
501 | * @param $name |
||
502 | * @param $value |
||
503 | * |
||
504 | * @return mixed |
||
505 | */ |
||
506 | 4 | public function __set($name, $value) |
|
532 | |||
533 | /** |
||
534 | * get dom node's plain text |
||
535 | * |
||
536 | * @return string |
||
537 | */ |
||
538 | public function text() |
||
577 | |||
578 | /** |
||
579 | * xmltext |
||
580 | * |
||
581 | * @return mixed|string |
||
582 | */ |
||
583 | public function xmltext() |
||
591 | |||
592 | /** |
||
593 | * set attribute |
||
594 | * |
||
595 | * @param $name |
||
596 | * @param $value |
||
597 | */ |
||
598 | public function setAttribute($name, $value) |
||
603 | |||
604 | /** |
||
605 | * has attribute |
||
606 | * |
||
607 | * @param $name |
||
608 | * |
||
609 | * @return bool |
||
610 | */ |
||
611 | public function hasAttribute($name) |
||
616 | |||
617 | /** |
||
618 | * magic isset |
||
619 | * |
||
620 | * @param $name |
||
621 | * |
||
622 | * @return bool |
||
623 | */ |
||
624 | public function __isset($name) |
||
638 | |||
639 | /** |
||
640 | * remove attribute |
||
641 | * |
||
642 | * @param $name |
||
643 | */ |
||
644 | public function removeAttribute($name) |
||
649 | |||
650 | /** |
||
651 | * get element by id |
||
652 | * |
||
653 | * @param $id |
||
654 | * |
||
655 | * @return array|null |
||
656 | */ |
||
657 | public function getElementById($id) |
||
661 | |||
662 | /** |
||
663 | * find elements by css selector |
||
664 | * |
||
665 | * @param $selector |
||
666 | * @param null|int $idx |
||
667 | * |
||
668 | * @return SimpleHtmlDomNode|SimpleHtmlDomNode[]|array|null |
||
669 | */ |
||
670 | 6 | public function find($selector, $idx = null) |
|
737 | |||
738 | /** |
||
739 | * parse_selector |
||
740 | * |
||
741 | * @param $selector_string |
||
742 | * |
||
743 | * @return array |
||
744 | */ |
||
745 | 6 | protected function parse_selector($selector_string) |
|
825 | |||
826 | /** |
||
827 | * seek for given conditions |
||
828 | * |
||
829 | * PaperG - added parameter to allow for case insensitive testing of the value of a selector. |
||
830 | * |
||
831 | * @param $selector |
||
832 | * @param $ret |
||
833 | */ |
||
834 | 6 | protected function seek($selector, &$ret) |
|
935 | |||
936 | /** |
||
937 | * match |
||
938 | * |
||
939 | * @param $exp |
||
940 | * @param $pattern |
||
941 | * @param $value |
||
942 | * |
||
943 | * @return bool|int |
||
944 | */ |
||
945 | 5 | protected function match($exp, $pattern, $value) |
|
966 | |||
967 | /** |
||
968 | * get elements by id |
||
969 | * |
||
970 | * @param $id |
||
971 | * @param null $idx |
||
972 | * |
||
973 | * @return array|null |
||
974 | */ |
||
975 | public function getElementsById($id, $idx = null) |
||
979 | |||
980 | /** |
||
981 | * get element by tag name |
||
982 | * |
||
983 | * @param $name |
||
984 | * |
||
985 | * @return array|null |
||
986 | */ |
||
987 | public function getElementByTagName($name) |
||
991 | |||
992 | /** |
||
993 | * get elements by tag name |
||
994 | * |
||
995 | * @param $name |
||
996 | * @param null $idx |
||
997 | * |
||
998 | * @return array|null |
||
999 | */ |
||
1000 | public function getElementsByTagName($name, $idx = null) |
||
1004 | |||
1005 | /** |
||
1006 | * parent node |
||
1007 | * |
||
1008 | * @return null |
||
1009 | */ |
||
1010 | public function parentNode() |
||
1014 | |||
1015 | /** |
||
1016 | * returns the parent of node |
||
1017 | * |
||
1018 | * If a node is passed in, it will reset the parent of the current node to that one. |
||
1019 | * |
||
1020 | * @param null $parent |
||
1021 | * |
||
1022 | * @return null |
||
1023 | */ |
||
1024 | public function parent($parent = null) |
||
1036 | |||
1037 | /** |
||
1038 | * child nodes |
||
1039 | * |
||
1040 | * @param int $idx |
||
1041 | * |
||
1042 | * @return array|null |
||
1043 | */ |
||
1044 | public function childNodes($idx = -1) |
||
1048 | |||
1049 | /** |
||
1050 | * returns children of node |
||
1051 | * |
||
1052 | * @param int $idx |
||
1053 | * |
||
1054 | * @return array|null |
||
1055 | */ |
||
1056 | public function children($idx = -1) |
||
1068 | |||
1069 | /** |
||
1070 | * first child |
||
1071 | * |
||
1072 | * @return null |
||
1073 | */ |
||
1074 | public function firstChild() |
||
1078 | |||
1079 | /** |
||
1080 | * returns the first child of node |
||
1081 | * |
||
1082 | * @return null |
||
1083 | */ |
||
1084 | public function first_child() |
||
1092 | |||
1093 | /** |
||
1094 | * last child |
||
1095 | * |
||
1096 | * @return null |
||
1097 | */ |
||
1098 | public function lastChild() |
||
1102 | |||
1103 | /** |
||
1104 | * returns the last child of node |
||
1105 | * |
||
1106 | * @return null |
||
1107 | */ |
||
1108 | public function last_child() |
||
1117 | |||
1118 | /** |
||
1119 | * next sibling |
||
1120 | * |
||
1121 | * @return null |
||
1122 | */ |
||
1123 | public function nextSibling() |
||
1127 | |||
1128 | /** |
||
1129 | * returns the next sibling of node |
||
1130 | * |
||
1131 | * @return null |
||
1132 | */ |
||
1133 | View Code Duplication | public function next_sibling() |
|
1151 | |||
1152 | /** |
||
1153 | * previous sibling |
||
1154 | * |
||
1155 | * @return null |
||
1156 | */ |
||
1157 | public function previousSibling() |
||
1161 | |||
1162 | /** |
||
1163 | * returns the previous sibling of node |
||
1164 | * |
||
1165 | * @return null|\voku\helper\SimpleHtmlDomNode |
||
1166 | */ |
||
1167 | View Code Duplication | public function prev_sibling() |
|
1185 | |||
1186 | /** |
||
1187 | * has child nodes |
||
1188 | * |
||
1189 | * @return bool |
||
1190 | */ |
||
1191 | public function hasChildNodes() |
||
1195 | |||
1196 | /** |
||
1197 | * verify that node has children |
||
1198 | * |
||
1199 | * @return bool |
||
1200 | */ |
||
1201 | public function has_child() |
||
1205 | |||
1206 | /** |
||
1207 | * node name |
||
1208 | * |
||
1209 | * @return string |
||
1210 | */ |
||
1211 | public function nodeName() |
||
1215 | |||
1216 | /** |
||
1217 | * append child |
||
1218 | * |
||
1219 | * @param SimpleHtmlDomNode $node |
||
1220 | * |
||
1221 | * @return mixed |
||
1222 | */ |
||
1223 | public function appendChild($node) |
||
1229 | |||
1230 | } |
||
1231 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.