Complex classes like HtmlDomParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDomParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
59 | class HtmlDomParser extends AbstractDomParser |
||
60 | { |
||
61 | /** |
||
62 | * @var string[] |
||
63 | */ |
||
64 | protected static $functionAliases = [ |
||
65 | 'outertext' => 'html', |
||
66 | 'outerhtml' => 'html', |
||
67 | 'innertext' => 'innerHtml', |
||
68 | 'innerhtml' => 'innerHtml', |
||
69 | 'load' => 'loadHtml', |
||
70 | 'load_file' => 'loadHtmlFile', |
||
71 | ]; |
||
72 | |||
73 | /** |
||
74 | * @var string[] |
||
75 | */ |
||
76 | protected $templateLogicSyntaxInSpecialScriptTags = [ |
||
77 | '+', |
||
78 | '<%', |
||
79 | '{%', |
||
80 | '{{', |
||
81 | ]; |
||
82 | |||
83 | /** |
||
84 | * The properties specified for each special script tag is an array. |
||
85 | * |
||
86 | * ```php |
||
87 | * protected $specialScriptTags = [ |
||
88 | * 'text/html', |
||
89 | * 'text/x-custom-template', |
||
90 | * 'text/x-handlebars-template' |
||
91 | * ] |
||
92 | * ``` |
||
93 | * |
||
94 | * @var string[] |
||
95 | */ |
||
96 | protected $specialScriptTags = [ |
||
97 | 'text/html', |
||
98 | 'text/x-custom-template', |
||
99 | 'text/x-handlebars-template', |
||
100 | ]; |
||
101 | |||
102 | /** |
||
103 | * @var string[] |
||
104 | */ |
||
105 | protected $selfClosingTags = [ |
||
106 | 'area', |
||
107 | 'base', |
||
108 | 'br', |
||
109 | 'col', |
||
110 | 'command', |
||
111 | 'embed', |
||
112 | 'hr', |
||
113 | 'img', |
||
114 | 'input', |
||
115 | 'keygen', |
||
116 | 'link', |
||
117 | 'meta', |
||
118 | 'param', |
||
119 | 'source', |
||
120 | 'track', |
||
121 | 'wbr', |
||
122 | ]; |
||
123 | |||
124 | /** |
||
125 | * @var bool |
||
126 | */ |
||
127 | protected $isDOMDocumentCreatedWithoutHtml = false; |
||
128 | |||
129 | /** |
||
130 | * @var bool |
||
131 | */ |
||
132 | protected $isDOMDocumentCreatedWithoutWrapper = false; |
||
133 | |||
134 | /** |
||
135 | * @var bool |
||
136 | */ |
||
137 | protected $isDOMDocumentCreatedWithCommentWrapper = false; |
||
138 | |||
139 | /** |
||
140 | * @var bool |
||
141 | */ |
||
142 | protected $isDOMDocumentCreatedWithoutHeadWrapper = false; |
||
143 | |||
144 | /** |
||
145 | * @var bool |
||
146 | */ |
||
147 | protected $isDOMDocumentCreatedWithoutPTagWrapper = false; |
||
148 | |||
149 | /** |
||
150 | * @var bool |
||
151 | */ |
||
152 | protected $isDOMDocumentCreatedWithoutHtmlWrapper = false; |
||
153 | |||
154 | /** |
||
155 | * @var bool |
||
156 | */ |
||
157 | protected $isDOMDocumentCreatedWithoutBodyWrapper = false; |
||
158 | |||
159 | /** |
||
160 | * @var bool |
||
161 | */ |
||
162 | protected $isDOMDocumentCreatedWithFakeEndScript = false; |
||
163 | |||
164 | /** |
||
165 | * @var bool |
||
166 | */ |
||
167 | protected $keepBrokenHtml = false; |
||
168 | |||
169 | /** |
||
170 | * @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode |
||
171 | */ |
||
172 | 219 | public function __construct($element = null) |
|
200 | |||
201 | /** |
||
202 | * @param string $name |
||
203 | * @param array $arguments |
||
204 | * |
||
205 | * @return bool|mixed |
||
206 | */ |
||
207 | 79 | public function __call($name, $arguments) |
|
217 | |||
218 | /** |
||
219 | * @param string $name |
||
220 | * @param array $arguments |
||
221 | * |
||
222 | * @throws \BadMethodCallException |
||
223 | * @throws \RuntimeException |
||
224 | * |
||
225 | * @return HtmlDomParser |
||
226 | */ |
||
227 | 28 | public static function __callStatic($name, $arguments) |
|
247 | |||
248 | /** @noinspection MagicMethodsValidityInspection */ |
||
249 | |||
250 | /** |
||
251 | * @param string $name |
||
252 | * |
||
253 | * @return string|null |
||
254 | */ |
||
255 | 17 | public function __get($name) |
|
273 | |||
274 | /** |
||
275 | * @return string |
||
276 | */ |
||
277 | 20 | public function __toString() |
|
281 | |||
282 | /** |
||
283 | * does nothing (only for api-compatibility-reasons) |
||
284 | * |
||
285 | * @return bool |
||
286 | * |
||
287 | * @deprecated |
||
288 | */ |
||
289 | public function clear(): bool |
||
293 | |||
294 | /** |
||
295 | * Create DOMDocument from HTML. |
||
296 | * |
||
297 | * @param string|null $html |
||
298 | * @param int|null $libXMLExtraOptions |
||
299 | * |
||
300 | * @return DOMDocument |
||
301 | */ |
||
302 | 204 | protected function createDOMDocument(?string $html, $libXMLExtraOptions = null): DOMDocument |
|
500 | |||
501 | /** |
||
502 | * Find list of nodes with a CSS selector. |
||
503 | * |
||
504 | * @param string $selector |
||
505 | * @param int|null $idx |
||
506 | * |
||
507 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
508 | */ |
||
509 | 149 | public function find(string $selector, $idx = null) |
|
540 | |||
541 | /** |
||
542 | * Find nodes with a CSS selector. |
||
543 | * |
||
544 | * @param string $selector |
||
545 | * |
||
546 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
547 | */ |
||
548 | 12 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface |
|
552 | |||
553 | /** |
||
554 | * Find nodes with a CSS selector or false, if no element is found. |
||
555 | * |
||
556 | * @param string $selector |
||
557 | * |
||
558 | * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
559 | */ |
||
560 | 4 | public function findMultiOrFalse(string $selector) |
|
570 | |||
571 | /** |
||
572 | * Find one node with a CSS selector. |
||
573 | * |
||
574 | * @param string $selector |
||
575 | * |
||
576 | * @return SimpleHtmlDomInterface |
||
577 | */ |
||
578 | 33 | public function findOne(string $selector): SimpleHtmlDomInterface |
|
582 | |||
583 | /** |
||
584 | * Find one node with a CSS selector or false, if no element is found. |
||
585 | * |
||
586 | * @param string $selector |
||
587 | * |
||
588 | * @return false|SimpleHtmlDomInterface |
||
589 | */ |
||
590 | 6 | public function findOneOrFalse(string $selector) |
|
600 | |||
601 | /** |
||
602 | * @param string $content |
||
603 | * @param bool $multiDecodeNewHtmlEntity |
||
604 | * |
||
605 | * @return string |
||
606 | */ |
||
607 | 130 | public function fixHtmlOutput( |
|
717 | |||
718 | /** |
||
719 | * Return elements by ".class". |
||
720 | * |
||
721 | * @param string $class |
||
722 | * |
||
723 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
724 | */ |
||
725 | public function getElementByClass(string $class): SimpleHtmlDomNodeInterface |
||
729 | |||
730 | /** |
||
731 | * Return element by #id. |
||
732 | * |
||
733 | * @param string $id |
||
734 | * |
||
735 | * @return SimpleHtmlDomInterface |
||
736 | */ |
||
737 | 3 | public function getElementById(string $id): SimpleHtmlDomInterface |
|
741 | |||
742 | /** |
||
743 | * Return element by tag name. |
||
744 | * |
||
745 | * @param string $name |
||
746 | * |
||
747 | * @return SimpleHtmlDomInterface |
||
748 | */ |
||
749 | 1 | public function getElementByTagName(string $name): SimpleHtmlDomInterface |
|
759 | |||
760 | /** |
||
761 | * Returns elements by "#id". |
||
762 | * |
||
763 | * @param string $id |
||
764 | * @param int|null $idx |
||
765 | * |
||
766 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
767 | */ |
||
768 | public function getElementsById(string $id, $idx = null) |
||
772 | |||
773 | /** |
||
774 | * Returns elements by tag name. |
||
775 | * |
||
776 | * @param string $name |
||
777 | * @param int|null $idx |
||
778 | * |
||
779 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
780 | */ |
||
781 | 6 | public function getElementsByTagName(string $name, $idx = null) |
|
808 | |||
809 | /** |
||
810 | * Get dom node's outer html. |
||
811 | * |
||
812 | * @param bool $multiDecodeNewHtmlEntity |
||
813 | * |
||
814 | * @return string |
||
815 | */ |
||
816 | 96 | public function html(bool $multiDecodeNewHtmlEntity = false): string |
|
834 | |||
835 | /** |
||
836 | * Load HTML from string. |
||
837 | * |
||
838 | * @param string|null $html |
||
839 | * @param int|null $libXMLExtraOptions |
||
840 | * |
||
841 | * @return HtmlDomParser |
||
842 | */ |
||
843 | 207 | public function loadHtml($html, $libXMLExtraOptions = null): DomParserInterface |
|
852 | |||
853 | /** |
||
854 | * Load HTML from file. |
||
855 | * |
||
856 | * @param string $filePath |
||
857 | * @param int|null $libXMLExtraOptions |
||
858 | * |
||
859 | * @throws \RuntimeException |
||
860 | * |
||
861 | * @return HtmlDomParser |
||
862 | */ |
||
863 | 13 | public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): DomParserInterface |
|
893 | |||
894 | /** |
||
895 | * Get the HTML as XML or plain XML if needed. |
||
896 | * |
||
897 | * @param bool $multiDecodeNewHtmlEntity |
||
898 | * @param bool $htmlToXml |
||
899 | * @param bool $removeXmlHeader |
||
900 | * @param int $options |
||
901 | * |
||
902 | * @return string |
||
903 | */ |
||
904 | 2 | public function xml( |
|
929 | |||
930 | /** |
||
931 | * @param string $selector |
||
932 | * @param int $idx |
||
933 | * |
||
934 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> |
||
935 | */ |
||
936 | 3 | public function __invoke($selector, $idx = null) |
|
940 | |||
941 | /** |
||
942 | * @return bool |
||
943 | */ |
||
944 | 130 | public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool |
|
948 | |||
949 | /** |
||
950 | * @return bool |
||
951 | */ |
||
952 | 130 | public function getIsDOMDocumentCreatedWithoutPTagWrapper(): bool |
|
956 | |||
957 | /** |
||
958 | * @return bool |
||
959 | */ |
||
960 | 130 | public function getIsDOMDocumentCreatedWithoutHtml(): bool |
|
964 | |||
965 | /** |
||
966 | * @return bool |
||
967 | */ |
||
968 | 130 | public function getIsDOMDocumentCreatedWithoutBodyWrapper(): bool |
|
972 | |||
973 | /** |
||
974 | * @return bool |
||
975 | */ |
||
976 | 130 | public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool |
|
980 | |||
981 | /** |
||
982 | * @return bool |
||
983 | */ |
||
984 | 130 | public function getIsDOMDocumentCreatedWithoutWrapper(): bool |
|
988 | |||
989 | /** |
||
990 | * @return bool |
||
991 | */ |
||
992 | 130 | public function getIsDOMDocumentCreatedWithFakeEndScript(): bool |
|
996 | |||
997 | /** |
||
998 | * @param string $html |
||
999 | * |
||
1000 | * @return string |
||
1001 | */ |
||
1002 | 5 | protected function keepBrokenHtml(string $html): string |
|
1047 | |||
1048 | /** |
||
1049 | * @param string $html |
||
1050 | * |
||
1051 | * @return void |
||
1052 | */ |
||
1053 | 6 | protected function keepSpecialScriptTags(string &$html) |
|
1090 | |||
1091 | /** |
||
1092 | * @param bool $keepBrokenHtml |
||
1093 | * |
||
1094 | * @return HtmlDomParser |
||
1095 | */ |
||
1096 | 5 | public function useKeepBrokenHtml(bool $keepBrokenHtml): DomParserInterface |
|
1102 | |||
1103 | /** |
||
1104 | * @param string[] $templateLogicSyntaxInSpecialScriptTags |
||
1105 | * |
||
1106 | * @return HtmlDomParser |
||
1107 | */ |
||
1108 | 2 | public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): DomParserInterface |
|
1120 | |||
1121 | /** |
||
1122 | * @param string[] $specialScriptTags |
||
1123 | * |
||
1124 | * @return HtmlDomParser |
||
1125 | */ |
||
1126 | public function overwriteSpecialScriptTags(array $specialScriptTags): DomParserInterface |
||
1138 | } |
||
1139 |
If you define a variable conditionally, it can happen that it is not defined for all execution paths.
Let’s take a look at an example:
In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.
Available Fixes
Check for existence of the variable explicitly:
Define a default value for the variable:
Add a value for the missing path: