Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like SimpleHtmlDom often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use SimpleHtmlDom, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
53 | class SimpleHtmlDom implements \IteratorAggregate |
||
54 | { |
||
55 | /** |
||
56 | * @var array |
||
57 | */ |
||
58 | protected static $functionAliases = [ |
||
59 | 'children' => 'childNodes', |
||
60 | 'first_child' => 'firstChild', |
||
61 | 'last_child' => 'lastChild', |
||
62 | 'next_sibling' => 'nextSibling', |
||
63 | 'prev_sibling' => 'previousSibling', |
||
64 | 'parent' => 'parentNode', |
||
65 | 'outertext' => 'html', |
||
66 | 'outerhtml' => 'html', |
||
67 | 'innertext' => 'innerHtml', |
||
68 | 'innerhtml' => 'innerHtml', |
||
69 | ]; |
||
70 | |||
71 | /** |
||
72 | * @var DOMElement|DOMNode |
||
73 | */ |
||
74 | protected $node; |
||
75 | |||
76 | /** |
||
77 | * @param DOMElement|DOMNode $node |
||
78 | */ |
||
79 | 103 | public function __construct(DOMNode $node) |
|
83 | |||
84 | /** |
||
85 | * @param string $name |
||
86 | * @param array $arguments |
||
87 | * |
||
88 | * @return SimpleHtmlDom|string|null |
||
89 | * @throws \BadMethodCallException |
||
90 | * |
||
91 | */ |
||
92 | 9 | View Code Duplication | public function __call($name, $arguments) |
102 | |||
103 | /** |
||
104 | * @param string $name |
||
105 | * |
||
106 | * @return array|string|null |
||
107 | */ |
||
108 | 47 | public function __get($name) |
|
136 | |||
137 | /** |
||
138 | * @param string $selector |
||
139 | * @param int $idx |
||
140 | * |
||
141 | * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
142 | */ |
||
143 | 12 | public function __invoke($selector, $idx = null) |
|
147 | |||
148 | /** |
||
149 | * @param string $name |
||
150 | * |
||
151 | * @return bool |
||
152 | */ |
||
153 | 1 | public function __isset($name) |
|
175 | |||
176 | /** |
||
177 | * @param string $name |
||
178 | * @param mixed $value |
||
179 | * |
||
180 | * @return null|SimpleHtmlDom |
||
181 | */ |
||
182 | 16 | public function __set($name, $value) |
|
204 | |||
205 | /** |
||
206 | * @return string |
||
207 | */ |
||
208 | 2 | public function __toString() |
|
212 | |||
213 | /** |
||
214 | * @param string $name |
||
215 | * |
||
216 | * @return void |
||
217 | */ |
||
218 | public function __unset($name) |
||
222 | |||
223 | /** |
||
224 | * Returns children of node. |
||
225 | * |
||
226 | * @param int $idx |
||
227 | * |
||
228 | * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface|null |
||
229 | */ |
||
230 | 2 | public function childNodes(int $idx = -1) |
|
240 | |||
241 | /** |
||
242 | * Find list of nodes with a CSS selector. |
||
243 | * |
||
244 | * @param string $selector |
||
245 | * @param int|null $idx |
||
246 | * |
||
247 | * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
248 | */ |
||
249 | 26 | public function find(string $selector, $idx = null) |
|
253 | |||
254 | /** |
||
255 | * Find one node with a CSS selector. |
||
256 | * |
||
257 | * @param string $selector |
||
258 | * |
||
259 | * @return SimpleHtmlDom |
||
260 | */ |
||
261 | 1 | public function findOne(string $selector): self |
|
265 | |||
266 | /** |
||
267 | * Find nodes with a CSS selector. |
||
268 | * |
||
269 | * @param string $selector |
||
270 | * |
||
271 | * @return SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
272 | */ |
||
273 | public function findMulti(string $selector) |
||
277 | |||
278 | /** |
||
279 | * Returns the first child of node. |
||
280 | * |
||
281 | * @return SimpleHtmlDom|null |
||
282 | */ |
||
283 | 4 | public function firstChild() |
|
294 | |||
295 | /** |
||
296 | * Returns an array of attributes. |
||
297 | * |
||
298 | * @return array|null |
||
299 | */ |
||
300 | 2 | public function getAllAttributes() |
|
313 | |||
314 | /** |
||
315 | * Return attribute value. |
||
316 | * |
||
317 | * @param string $name |
||
318 | * |
||
319 | * @return string |
||
320 | */ |
||
321 | 14 | public function getAttribute(string $name): string |
|
331 | |||
332 | /** |
||
333 | * Return element by #id. |
||
334 | * |
||
335 | * @param string $id |
||
336 | * |
||
337 | * @return SimpleHtmlDom |
||
338 | */ |
||
339 | 1 | public function getElementById(string $id): self |
|
343 | |||
344 | /** |
||
345 | * Returns elements by #id. |
||
346 | * |
||
347 | * @param string $id |
||
348 | * @param int|null $idx |
||
349 | * |
||
350 | * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
351 | */ |
||
352 | public function getElementsById(string $id, $idx = null) |
||
356 | |||
357 | /** |
||
358 | * Return elements by .class. |
||
359 | * |
||
360 | * @param string $class |
||
361 | * |
||
362 | * @return SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
363 | */ |
||
364 | public function getElementByClass(string $class) |
||
368 | |||
369 | /** |
||
370 | * Return element by tag name. |
||
371 | * |
||
372 | * @param string $name |
||
373 | * |
||
374 | * @return SimpleHtmlDom|SimpleHtmlDomNodeBlank |
||
375 | */ |
||
376 | public function getElementByTagName(string $name) |
||
390 | |||
391 | /** |
||
392 | * Returns elements by tag name. |
||
393 | * |
||
394 | * @param string $name |
||
395 | * @param int|null $idx |
||
396 | * |
||
397 | * @return SimpleHtmlDom|SimpleHtmlDom[]|SimpleHtmlDomNodeInterface |
||
398 | */ |
||
399 | public function getElementsByTagName(string $name, $idx = null) |
||
426 | |||
427 | /** |
||
428 | * Create a new "HtmlDomParser"-object from the current context. |
||
429 | * |
||
430 | * @return HtmlDomParser |
||
431 | */ |
||
432 | public function getHtmlDomParser(): HtmlDomParser |
||
436 | |||
437 | /** |
||
438 | * Retrieve an external iterator. |
||
439 | * |
||
440 | * @see http://php.net/manual/en/iteratoraggregate.getiterator.php |
||
441 | * |
||
442 | * @return SimpleHtmlDomNode |
||
443 | * <p> |
||
444 | * An instance of an object implementing <b>Iterator</b> or |
||
445 | * <b>Traversable</b> |
||
446 | * </p> |
||
447 | */ |
||
448 | public function getIterator(): SimpleHtmlDomNode |
||
459 | |||
460 | /** |
||
461 | * @return DOMNode |
||
462 | */ |
||
463 | public function getNode(): DOMNode |
||
467 | |||
468 | /** |
||
469 | * Determine if an attribute exists on the element. |
||
470 | * |
||
471 | * @param string $name |
||
472 | * |
||
473 | * @return bool |
||
474 | */ |
||
475 | public function hasAttribute(string $name): bool |
||
483 | |||
484 | /** |
||
485 | * Get dom node's outer html. |
||
486 | * |
||
487 | * @param bool $multiDecodeNewHtmlEntity |
||
488 | * |
||
489 | * @return string |
||
490 | */ |
||
491 | public function html(bool $multiDecodeNewHtmlEntity = false): string |
||
495 | |||
496 | /** |
||
497 | * Get dom node's inner html. |
||
498 | * |
||
499 | * @param bool $multiDecodeNewHtmlEntity |
||
500 | * |
||
501 | * @return string |
||
502 | */ |
||
503 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string |
||
507 | |||
508 | /** |
||
509 | * Returns the last child of node. |
||
510 | * |
||
511 | * @return SimpleHtmlDom|null |
||
512 | */ |
||
513 | public function lastChild() |
||
524 | |||
525 | /** |
||
526 | * Returns the next sibling of node. |
||
527 | * |
||
528 | * @return SimpleHtmlDom|null |
||
529 | */ |
||
530 | public function nextSibling() |
||
541 | |||
542 | /** |
||
543 | * Returns the parent of node. |
||
544 | * |
||
545 | * @return SimpleHtmlDom |
||
546 | */ |
||
547 | public function parentNode(): self |
||
551 | |||
552 | /** |
||
553 | * Nodes can get partially destroyed in which they're still an |
||
554 | * actual DOM node (such as \DOMElement) but almost their entire |
||
555 | * body is gone, including the `nodeType` attribute. |
||
556 | * |
||
557 | * @return bool true if node has been destroyed |
||
558 | */ |
||
559 | public function isRemoved(): bool |
||
563 | |||
564 | /** |
||
565 | * Returns the previous sibling of node. |
||
566 | * |
||
567 | * @return SimpleHtmlDom|null |
||
568 | */ |
||
569 | public function previousSibling() |
||
580 | |||
581 | /** |
||
582 | * Replace child node. |
||
583 | * |
||
584 | * @param string $string |
||
585 | * |
||
586 | * @return SimpleHtmlDom |
||
587 | * |
||
588 | */ |
||
589 | protected function replaceChildWithString(string $string): self |
||
624 | |||
625 | /** |
||
626 | * Replace this node with text |
||
627 | * |
||
628 | * @param string $string |
||
629 | * |
||
630 | * @return SimpleHtmlDom |
||
631 | */ |
||
632 | protected function replaceTextWithString($string): self |
||
650 | |||
651 | /** |
||
652 | * Replace this node. |
||
653 | * |
||
654 | * @param string $string |
||
655 | * |
||
656 | * @return SimpleHtmlDom |
||
657 | * |
||
658 | */ |
||
659 | protected function replaceNodeWithString(string $string): self |
||
709 | |||
710 | /** |
||
711 | * Normalize the given input for comparision. |
||
712 | * |
||
713 | * @param HtmlDomParser|string $input |
||
714 | * |
||
715 | * @return string |
||
716 | */ |
||
717 | private function normalizeStringForComparision($input): string |
||
753 | |||
754 | /** |
||
755 | * @param HtmlDomParser $newDocument |
||
756 | * @param bool $removeExtraHeadTag |
||
757 | * |
||
758 | * @return HtmlDomParser |
||
759 | */ |
||
760 | protected function cleanHtmlWrapper(HtmlDomParser $newDocument, $removeExtraHeadTag = false): HtmlDomParser |
||
830 | |||
831 | /** |
||
832 | * Change the name of a tag in a "DOMNode". |
||
833 | * |
||
834 | * @param DOMNode $node |
||
835 | * @param string $name |
||
836 | * |
||
837 | * @return false|DOMElement |
||
838 | * <p>DOMElement a new instance of class DOMElement or false |
||
839 | * if an error occured.</p> |
||
840 | */ |
||
841 | protected function changeElementName(\DOMNode $node, string $name) |
||
866 | |||
867 | /** |
||
868 | * Set attribute value. |
||
869 | * |
||
870 | * @param string $name <p>The name of the html-attribute.</p> |
||
871 | * @param string|null $value <p>Set to NULL or empty string, to remove the attribute.</p> |
||
872 | * @param bool $strict </p> |
||
873 | * $value must be NULL, to remove the attribute, |
||
874 | * so that you can set an empty string as attribute-value e.g. autofocus="" |
||
875 | * </p> |
||
876 | * |
||
877 | * @return SimpleHtmlDom |
||
878 | */ |
||
879 | public function setAttribute(string $name, $value = null, bool $strict = false): self |
||
894 | |||
895 | /** |
||
896 | * @param string|string[]|null $value <p> |
||
897 | * null === get the current input value |
||
898 | * text === set a new input value |
||
899 | * </p> |
||
900 | * |
||
901 | * @return string|string[]|null |
||
902 | */ |
||
903 | public function val($value = null) |
||
976 | |||
977 | /** |
||
978 | * Remove attribute. |
||
979 | * |
||
980 | * @param string $name <p>The name of the html-attribute.</p> |
||
981 | * |
||
982 | * @return mixed |
||
983 | */ |
||
984 | public function removeAttribute(string $name) |
||
990 | |||
991 | /** |
||
992 | * Get dom node's plain text. |
||
993 | * |
||
994 | * @return string |
||
995 | */ |
||
996 | public function text(): string |
||
1000 | } |
||
1001 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.