Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like AbstractNode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use AbstractNode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
17 | abstract class AbstractNode |
||
18 | { |
||
19 | |||
20 | /** |
||
21 | * Contains the tag name/type |
||
22 | * |
||
23 | * @var \PHPHtmlParser\Dom\Tag |
||
24 | */ |
||
25 | protected $tag; |
||
26 | |||
27 | /** |
||
28 | * Contains a list of attributes on this tag. |
||
29 | * |
||
30 | * @var array |
||
31 | */ |
||
32 | protected $attr = []; |
||
33 | |||
34 | /** |
||
35 | * An array of all the children. |
||
36 | * |
||
37 | * @var array |
||
38 | */ |
||
39 | protected $children = []; |
||
40 | |||
41 | /** |
||
42 | * Contains the parent Node. |
||
43 | * |
||
44 | * @var AbstractNode |
||
45 | */ |
||
46 | protected $parent = null; |
||
47 | |||
48 | /** |
||
49 | * The unique id of the class. Given by PHP. |
||
50 | * |
||
51 | * @var string |
||
52 | */ |
||
53 | protected $id; |
||
54 | |||
55 | /** |
||
56 | * The encoding class used to encode strings. |
||
57 | * |
||
58 | * @var mixed |
||
59 | */ |
||
60 | protected $encode; |
||
61 | |||
62 | /** |
||
63 | * Creates a unique spl hash for this node. |
||
64 | */ |
||
65 | public function __construct() |
||
69 | |||
70 | /** |
||
71 | * Magic get method for attributes and certain methods. |
||
72 | * |
||
73 | * @param string $key |
||
74 | * @return mixed |
||
75 | */ |
||
76 | public function __get($key) |
||
93 | |||
94 | /** |
||
95 | * Attempts to clear out any object references. |
||
96 | */ |
||
97 | public function __destruct() |
||
104 | |||
105 | /** |
||
106 | * Simply calls the outer text method. |
||
107 | * |
||
108 | * @return string |
||
109 | */ |
||
110 | public function __toString() |
||
114 | |||
115 | /** |
||
116 | * Returns the id of this object. |
||
117 | */ |
||
118 | public function id() |
||
122 | |||
123 | /** |
||
124 | * Returns the parent of node. |
||
125 | * |
||
126 | * @return AbstractNode |
||
127 | */ |
||
128 | public function getParent() |
||
132 | |||
133 | /** |
||
134 | * Sets the parent node. |
||
135 | * |
||
136 | * @param AbstractNode $parent |
||
137 | * @return $this |
||
138 | * @throws CircularException |
||
139 | */ |
||
140 | public function setParent(AbstractNode $parent) |
||
167 | |||
168 | /** |
||
169 | * Sets the encoding class to this node and propagates it |
||
170 | * to all its children. |
||
171 | * |
||
172 | * @param Encode $encode |
||
173 | */ |
||
174 | public function propagateEncoding(Encode $encode) |
||
185 | |||
186 | /** |
||
187 | * Checks if this node has children. |
||
188 | * |
||
189 | * @return bool |
||
190 | */ |
||
191 | public function hasChildren() |
||
195 | |||
196 | /** |
||
197 | * Returns the child by id. |
||
198 | * |
||
199 | * @param int $id |
||
200 | * @return AbstractNode |
||
201 | * @throws ChildNotFoundException |
||
202 | */ |
||
203 | public function getChild($id) |
||
211 | |||
212 | /** |
||
213 | * Returns a new array of child nodes |
||
214 | * |
||
215 | * @return array |
||
216 | */ |
||
217 | public function getChildren() |
||
232 | |||
233 | /** |
||
234 | * Counts children |
||
235 | * |
||
236 | * @return int |
||
237 | */ |
||
238 | public function countChildren() |
||
242 | |||
243 | /** |
||
244 | * Adds a child node to this node and returns the id of the child for this |
||
245 | * parent. |
||
246 | * |
||
247 | * @param AbstractNode $child |
||
248 | * @return bool |
||
249 | * @throws CircularException |
||
250 | */ |
||
251 | public function addChild(AbstractNode $child) |
||
290 | |||
291 | /** |
||
292 | * Removes the child by id. |
||
293 | * |
||
294 | * @param int $id |
||
295 | * @return $this |
||
296 | */ |
||
297 | public function removeChild($id) |
||
321 | |||
322 | /** |
||
323 | * Attempts to get the next child. |
||
324 | * |
||
325 | * @param int $id |
||
326 | * @return AbstractNode |
||
327 | * @uses $this->getChild() |
||
328 | */ |
||
329 | View Code Duplication | public function nextChild($id) |
|
336 | |||
337 | /** |
||
338 | * Attempts to get the previous child. |
||
339 | * |
||
340 | * @param int $id |
||
341 | * @return AbstractNode |
||
342 | * @uses $this->getChild() |
||
343 | */ |
||
344 | View Code Duplication | public function previousChild($id) |
|
351 | |||
352 | /** |
||
353 | * Checks if the given node id is a child of the |
||
354 | * current node. |
||
355 | * |
||
356 | * @param int $id |
||
357 | * @return bool |
||
358 | */ |
||
359 | public function isChild($id) |
||
369 | |||
370 | /** |
||
371 | * Checks if the given node id is a descendant of the |
||
372 | * current node. |
||
373 | * |
||
374 | * @param int $id |
||
375 | * @return bool |
||
376 | */ |
||
377 | public function isDescendant($id) |
||
395 | |||
396 | /** |
||
397 | * Checks if the given node id is an ancestor of |
||
398 | * the current node. |
||
399 | * |
||
400 | * @param int $id |
||
401 | * @return bool |
||
402 | */ |
||
403 | public function isAncestor($id) |
||
411 | |||
412 | /** |
||
413 | * Attempts to get an ancestor node by the given id. |
||
414 | * |
||
415 | * @param int $id |
||
416 | * @return null|AbstractNode |
||
417 | */ |
||
418 | public function getAncestor($id) |
||
430 | |||
431 | /** |
||
432 | * Shortcut to return the first child. |
||
433 | * |
||
434 | * @return AbstractNode |
||
435 | * @uses $this->getChild() |
||
436 | */ |
||
437 | public function firstChild() |
||
444 | |||
445 | /** |
||
446 | * Attempts to get the last child. |
||
447 | * |
||
448 | * @return AbstractNode |
||
449 | */ |
||
450 | public function lastChild() |
||
457 | |||
458 | /** |
||
459 | * Attempts to get the next sibling. |
||
460 | * |
||
461 | * @return AbstractNode |
||
462 | * @throws ParentNotFoundException |
||
463 | */ |
||
464 | public function nextSibling() |
||
472 | |||
473 | /** |
||
474 | * Attempts to get the previous sibling |
||
475 | * |
||
476 | * @return AbstractNode |
||
477 | * @throws ParentNotFoundException |
||
478 | */ |
||
479 | public function previousSibling() |
||
487 | |||
488 | /** |
||
489 | * Gets the tag object of this node. |
||
490 | * |
||
491 | * @return Tag |
||
492 | */ |
||
493 | public function getTag() |
||
497 | |||
498 | /** |
||
499 | * A wrapper method that simply calls the getAttribute method |
||
500 | * on the tag of this node. |
||
501 | * |
||
502 | * @return array |
||
503 | */ |
||
504 | public function getAttributes() |
||
513 | |||
514 | /** |
||
515 | * A wrapper method that simply calls the getAttribute method |
||
516 | * on the tag of this node. |
||
517 | * |
||
518 | * @param string $key |
||
519 | * @return mixed |
||
520 | */ |
||
521 | public function getAttribute($key) |
||
530 | |||
531 | /** |
||
532 | * A wrapper method that simply calls the setAttribute method |
||
533 | * on the tag of this node. |
||
534 | * |
||
535 | * @param string $key |
||
536 | * @param string $value |
||
537 | * @return $this |
||
538 | */ |
||
539 | public function setAttribute($key, $value) |
||
545 | |||
546 | /** |
||
547 | * Function to locate a specific ancestor tag in the path to the root. |
||
548 | * |
||
549 | * @param string $tag |
||
550 | * @return AbstractNode |
||
551 | * @throws ParentNotFoundException |
||
552 | */ |
||
553 | public function ancestorByTag($tag) |
||
568 | |||
569 | /** |
||
570 | * Find elements by css selector |
||
571 | * |
||
572 | * @param string $selector |
||
573 | * @param int $nth |
||
574 | * @return array|AbstractNode |
||
575 | */ |
||
576 | public function find($selector, $nth = null) |
||
592 | |||
593 | /** |
||
594 | * Function to try a few tricks to determine the displayed size of an img on the page. |
||
595 | * NOTE: This will ONLY work on an IMG tag. Returns FALSE on all other tag types. |
||
596 | * |
||
597 | * Future enhancement: |
||
598 | * Look in the tag to see if there is a class or id specified that has a height or width attribute to it. |
||
599 | * |
||
600 | * Far future enhancement |
||
601 | * Look at all the parent tags of this image to see if they specify a class or id that has an img selector that specifies a height or width |
||
602 | * Note that in this case, the class or id will have the img sub-selector for it to apply to the image. |
||
603 | * |
||
604 | * ridiculously far future development |
||
605 | * If the class or id is specified in a SEPARATE css file that's not on the page, go get it and do what we were just doing for the ones on the page. |
||
606 | * |
||
607 | * @author John Schlick |
||
608 | * @return array an array containing the 'height' and 'width' of the image on the page or -1 if we can't figure it out. |
||
609 | */ |
||
610 | public function get_display_size() |
||
649 | |||
650 | /** |
||
651 | * If there is a length in the style attributes use it. |
||
652 | * |
||
653 | * @param array $attributes |
||
654 | * @param int $length |
||
655 | * @param string $key |
||
656 | * @return int |
||
657 | */ |
||
658 | protected function getLength(array $attributes, $length, $key) |
||
673 | |||
674 | /** |
||
675 | * Gets the inner html of this node. |
||
676 | * |
||
677 | * @return string |
||
678 | */ |
||
679 | abstract public function innerHtml(); |
||
680 | |||
681 | /** |
||
682 | * Gets the html of this node, including it's own |
||
683 | * tag. |
||
684 | * |
||
685 | * @return string |
||
686 | */ |
||
687 | abstract public function outerHtml(); |
||
688 | |||
689 | /** |
||
690 | * Gets the text of this node (if there is any text). |
||
691 | * |
||
692 | * @return string |
||
693 | */ |
||
694 | abstract public function text(); |
||
695 | |||
696 | /** |
||
697 | * Call this when something in the node tree has changed. Like a child has been added |
||
698 | * or a parent has been changed. |
||
699 | * |
||
700 | * @return void |
||
701 | */ |
||
702 | abstract protected function clear(); |
||
703 | } |
||
704 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.