Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlDomParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDomParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
34 | class HtmlDomParser extends AbstractDomParser |
||
35 | { |
||
36 | /** |
||
37 | * @var string[] |
||
38 | */ |
||
39 | protected static $functionAliases = [ |
||
40 | 'outertext' => 'html', |
||
41 | 'outerhtml' => 'html', |
||
42 | 'innertext' => 'innerHtml', |
||
43 | 'innerhtml' => 'innerHtml', |
||
44 | 'load' => 'loadHtml', |
||
45 | 'load_file' => 'loadHtmlFile', |
||
46 | ]; |
||
47 | |||
48 | /** |
||
49 | * @var string[] |
||
50 | */ |
||
51 | protected $templateLogicSyntaxInSpecialScriptTags = [ |
||
52 | '+', |
||
53 | '<%', |
||
54 | '{%', |
||
55 | '{{', |
||
56 | ]; |
||
57 | |||
58 | /** |
||
59 | * The properties specified for each special script tag is an array of the following format: |
||
60 | * string script tag => regex script tag |
||
61 | * |
||
62 | * ```php |
||
63 | * protected $specialScriptTags = [ |
||
64 | * 'text/html' => 'text\/html', |
||
65 | * 'text/x-custom-template' => 'text\/x-custom-template', |
||
66 | * 'text/x-handlebars-template' => 'text\/x-handlebars-template' |
||
67 | * ] |
||
68 | * ``` |
||
69 | * |
||
70 | * @var string[] |
||
71 | */ |
||
72 | protected $specialScriptTags = [ |
||
73 | 'text/html' => 'text\/html', |
||
74 | 'text/x-custom-template' => 'text\/x-custom-template', |
||
75 | 'text/x-handlebars-template' => 'text\/x-handlebars-template' |
||
76 | ]; |
||
77 | |||
78 | /** |
||
79 | * @var bool |
||
80 | */ |
||
81 | protected $isDOMDocumentCreatedWithoutHtml = false; |
||
82 | |||
83 | /** |
||
84 | * @var bool |
||
85 | */ |
||
86 | protected $isDOMDocumentCreatedWithoutWrapper = false; |
||
87 | |||
88 | /** |
||
89 | * @var bool |
||
90 | */ |
||
91 | protected $isDOMDocumentCreatedWithCommentWrapper = false; |
||
92 | |||
93 | /** |
||
94 | * @var bool |
||
95 | */ |
||
96 | protected $isDOMDocumentCreatedWithoutHeadWrapper = false; |
||
97 | |||
98 | /** |
||
99 | * @var bool |
||
100 | */ |
||
101 | protected $isDOMDocumentCreatedWithoutPTagWrapper = false; |
||
102 | |||
103 | /** |
||
104 | * @var bool |
||
105 | */ |
||
106 | protected $isDOMDocumentCreatedWithoutHtmlWrapper = false; |
||
107 | |||
108 | /** |
||
109 | * @var bool |
||
110 | */ |
||
111 | protected $isDOMDocumentCreatedWithoutBodyWrapper = false; |
||
112 | |||
113 | /** |
||
114 | * @var bool |
||
115 | */ |
||
116 | protected $isDOMDocumentCreatedWithFakeEndScript = false; |
||
117 | |||
118 | /** |
||
119 | * @var bool |
||
120 | */ |
||
121 | protected $keepBrokenHtml; |
||
122 | |||
123 | /** |
||
124 | * @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode |
||
125 | */ |
||
126 | 212 | View Code Duplication | public function __construct($element = null) |
154 | |||
155 | /** |
||
156 | * @param string $name |
||
157 | * @param array $arguments |
||
158 | * |
||
159 | * @return bool|mixed |
||
160 | */ |
||
161 | 76 | public function __call($name, $arguments) |
|
171 | |||
172 | /** |
||
173 | * @param string $name |
||
174 | * @param array $arguments |
||
175 | * |
||
176 | * @throws \BadMethodCallException |
||
177 | * @throws \RuntimeException |
||
178 | * |
||
179 | * @return HtmlDomParser |
||
180 | */ |
||
181 | 26 | View Code Duplication | public static function __callStatic($name, $arguments) |
201 | |||
202 | /** @noinspection MagicMethodsValidityInspection */ |
||
203 | |||
204 | /** |
||
205 | * @param string $name |
||
206 | * |
||
207 | * @return string|null |
||
208 | */ |
||
209 | 15 | public function __get($name) |
|
227 | |||
228 | /** |
||
229 | * @return string |
||
230 | */ |
||
231 | 19 | public function __toString() |
|
235 | |||
236 | /** |
||
237 | * does nothing (only for api-compatibility-reasons) |
||
238 | * |
||
239 | * @return bool |
||
240 | * |
||
241 | * @deprecated |
||
242 | */ |
||
243 | 6 | public function clear(): bool |
|
247 | |||
248 | /** |
||
249 | * Create DOMDocument from HTML. |
||
250 | * |
||
251 | * @param string $html |
||
252 | * @param int|null $libXMLExtraOptions |
||
253 | * |
||
254 | * @return \DOMDocument |
||
255 | */ |
||
256 | 196 | protected function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument |
|
405 | |||
406 | /** |
||
407 | * Find list of nodes with a CSS selector. |
||
408 | * |
||
409 | * @param string $selector |
||
410 | * @param int|null $idx |
||
411 | * |
||
412 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
413 | */ |
||
414 | 144 | View Code Duplication | public function find(string $selector, $idx = null) |
445 | |||
446 | /** |
||
447 | * Find nodes with a CSS selector. |
||
448 | * |
||
449 | * @param string $selector |
||
450 | * |
||
451 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
452 | */ |
||
453 | 12 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface |
|
457 | |||
458 | /** |
||
459 | * Find nodes with a CSS selector or false, if no element is found. |
||
460 | * |
||
461 | * @param string $selector |
||
462 | * |
||
463 | * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
464 | */ |
||
465 | 4 | public function findMultiOrFalse(string $selector) |
|
475 | |||
476 | /** |
||
477 | * Find one node with a CSS selector. |
||
478 | * |
||
479 | * @param string $selector |
||
480 | * |
||
481 | * @return SimpleHtmlDomInterface |
||
482 | */ |
||
483 | 32 | public function findOne(string $selector): SimpleHtmlDomInterface |
|
487 | |||
488 | /** |
||
489 | * Find one node with a CSS selector or false, if no element is found. |
||
490 | * |
||
491 | * @param string $selector |
||
492 | * |
||
493 | * @return false|SimpleHtmlDomInterface |
||
494 | */ |
||
495 | 6 | public function findOneOrFalse(string $selector) |
|
505 | |||
506 | /** |
||
507 | * @param string $content |
||
508 | * @param bool $multiDecodeNewHtmlEntity |
||
509 | * |
||
510 | * @return string |
||
511 | */ |
||
512 | 122 | public function fixHtmlOutput( |
|
618 | |||
619 | /** |
||
620 | * Return elements by ".class". |
||
621 | * |
||
622 | * @param string $class |
||
623 | * |
||
624 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
625 | */ |
||
626 | public function getElementByClass(string $class): SimpleHtmlDomNodeInterface |
||
630 | |||
631 | /** |
||
632 | * Return element by #id. |
||
633 | * |
||
634 | * @param string $id |
||
635 | * |
||
636 | * @return SimpleHtmlDomInterface |
||
637 | */ |
||
638 | 3 | public function getElementById(string $id): SimpleHtmlDomInterface |
|
642 | |||
643 | /** |
||
644 | * Return element by tag name. |
||
645 | * |
||
646 | * @param string $name |
||
647 | * |
||
648 | * @return SimpleHtmlDomInterface |
||
649 | */ |
||
650 | 1 | public function getElementByTagName(string $name): SimpleHtmlDomInterface |
|
660 | |||
661 | /** |
||
662 | * Returns elements by "#id". |
||
663 | * |
||
664 | * @param string $id |
||
665 | * @param int|null $idx |
||
666 | * |
||
667 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
668 | */ |
||
669 | public function getElementsById(string $id, $idx = null) |
||
673 | |||
674 | /** |
||
675 | * Returns elements by tag name. |
||
676 | * |
||
677 | * @param string $name |
||
678 | * @param int|null $idx |
||
679 | * |
||
680 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
681 | */ |
||
682 | 6 | public function getElementsByTagName(string $name, $idx = null) |
|
709 | |||
710 | /** |
||
711 | * Get dom node's outer html. |
||
712 | * |
||
713 | * @param bool $multiDecodeNewHtmlEntity |
||
714 | * |
||
715 | * @return string |
||
716 | */ |
||
717 | 89 | public function html(bool $multiDecodeNewHtmlEntity = false): string |
|
735 | |||
736 | /** |
||
737 | * Load HTML from string. |
||
738 | * |
||
739 | * @param string $html |
||
740 | * @param int|null $libXMLExtraOptions |
||
741 | * |
||
742 | * @return HtmlDomParser |
||
743 | */ |
||
744 | 196 | public function loadHtml(string $html, $libXMLExtraOptions = null): DomParserInterface |
|
753 | |||
754 | /** |
||
755 | * Load HTML from file. |
||
756 | * |
||
757 | * @param string $filePath |
||
758 | * @param int|null $libXMLExtraOptions |
||
759 | * |
||
760 | * @throws \RuntimeException |
||
761 | * |
||
762 | * @return HtmlDomParser |
||
763 | */ |
||
764 | 13 | View Code Duplication | public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): DomParserInterface |
794 | |||
795 | /** |
||
796 | * Get the HTML as XML or plain XML if needed. |
||
797 | * |
||
798 | * @param bool $multiDecodeNewHtmlEntity |
||
799 | * @param bool $htmlToXml |
||
800 | * @param bool $removeXmlHeader |
||
801 | * @param int $options |
||
802 | * |
||
803 | * @return string |
||
804 | */ |
||
805 | 2 | View Code Duplication | public function xml( |
830 | |||
831 | /** |
||
832 | * @param string $selector |
||
833 | * @param int $idx |
||
834 | * |
||
835 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
836 | */ |
||
837 | 3 | public function __invoke($selector, $idx = null) |
|
841 | |||
842 | /** |
||
843 | * @return bool |
||
844 | */ |
||
845 | 122 | public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool |
|
849 | |||
850 | /** |
||
851 | * @return bool |
||
852 | */ |
||
853 | 122 | public function getIsDOMDocumentCreatedWithoutPTagWrapper(): bool |
|
857 | |||
858 | /** |
||
859 | * @return bool |
||
860 | */ |
||
861 | 122 | public function getIsDOMDocumentCreatedWithoutHtml(): bool |
|
865 | |||
866 | /** |
||
867 | * @return bool |
||
868 | */ |
||
869 | 122 | public function getIsDOMDocumentCreatedWithoutBodyWrapper(): bool |
|
873 | |||
874 | /** |
||
875 | * @return bool |
||
876 | */ |
||
877 | 122 | public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool |
|
881 | |||
882 | /** |
||
883 | * @return bool |
||
884 | */ |
||
885 | 122 | public function getIsDOMDocumentCreatedWithoutWrapper(): bool |
|
889 | |||
890 | /** |
||
891 | * @return bool |
||
892 | */ |
||
893 | 122 | public function getIsDOMDocumentCreatedWithFakeEndScript(): bool |
|
897 | |||
898 | /** |
||
899 | * @param string $html |
||
900 | * |
||
901 | * @return string |
||
902 | */ |
||
903 | 3 | protected function keepBrokenHtml(string $html): string |
|
948 | |||
949 | /** |
||
950 | * @param string $html |
||
951 | * |
||
952 | * @return void |
||
953 | */ |
||
954 | 6 | protected function keepSpecialScriptTags(string &$html) |
|
985 | |||
986 | /** |
||
987 | * @param bool $keepBrokenHtml |
||
988 | * |
||
989 | * @return HtmlDomParser |
||
990 | */ |
||
991 | 3 | public function useKeepBrokenHtml(bool $keepBrokenHtml): DomParserInterface |
|
997 | |||
998 | /** |
||
999 | * @param string[] $templateLogicSyntaxInSpecialScriptTags |
||
1000 | * |
||
1001 | * @return HtmlDomParser |
||
1002 | */ |
||
1003 | 2 | public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): DomParserInterface |
|
1015 | |||
1016 | /** |
||
1017 | * @param string[] $specialScriptTags |
||
1018 | * |
||
1019 | * @return HtmlDomParser |
||
1020 | */ |
||
1021 | public function overwriteSpecialScriptTags(array $specialScriptTags): DomParserInterface |
||
1033 | } |
||
1034 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.