Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like HtmlDomParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use HtmlDomParser, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
34 | class HtmlDomParser |
||
35 | { |
||
36 | /** |
||
37 | * @var array |
||
38 | */ |
||
39 | protected static $functionAliases = [ |
||
40 | 'outertext' => 'html', |
||
41 | 'outerhtml' => 'html', |
||
42 | 'innertext' => 'innerHtml', |
||
43 | 'innerhtml' => 'innerHtml', |
||
44 | 'load' => 'loadHtml', |
||
45 | 'load_file' => 'loadHtmlFile', |
||
46 | ]; |
||
47 | |||
48 | /** |
||
49 | * @var string[][] |
||
50 | */ |
||
51 | protected static $domLinkReplaceHelper = [ |
||
52 | 'orig' => ['[', ']', '{', '}'], |
||
53 | 'tmp' => [ |
||
54 | '____SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_LEFT____', |
||
55 | '____SIMPLE_HTML_DOM__VOKU__SQUARE_BRACKET_RIGHT____', |
||
56 | '____SIMPLE_HTML_DOM__VOKU__BRACKET_LEFT____', |
||
57 | '____SIMPLE_HTML_DOM__VOKU__BRACKET_RIGHT____', |
||
58 | ], |
||
59 | ]; |
||
60 | |||
61 | /** |
||
62 | * @var array |
||
63 | */ |
||
64 | protected static $domReplaceHelper = [ |
||
65 | 'orig' => ['&', '|', '+', '%', '@', '<html ⚡'], |
||
66 | 'tmp' => [ |
||
67 | '____SIMPLE_HTML_DOM__VOKU__AMP____', |
||
68 | '____SIMPLE_HTML_DOM__VOKU__PIPE____', |
||
69 | '____SIMPLE_HTML_DOM__VOKU__PLUS____', |
||
70 | '____SIMPLE_HTML_DOM__VOKU__PERCENT____', |
||
71 | '____SIMPLE_HTML_DOM__VOKU__AT____', |
||
72 | '<html ____SIMPLE_HTML_DOM__VOKU__GOOGLE_AMP____="true"', |
||
73 | ], |
||
74 | ]; |
||
75 | |||
76 | protected static $domHtmlWrapperHelper = '____simple_html_dom__voku__html_wrapper____'; |
||
77 | |||
78 | protected static $domHtmlSpecialScriptHelper = '____simple_html_dom__voku__html_special_sctipt____'; |
||
79 | |||
80 | /** |
||
81 | * @var array |
||
82 | */ |
||
83 | protected static $domBrokenReplaceHelper = []; |
||
84 | |||
85 | /** |
||
86 | * @var callable |
||
87 | */ |
||
88 | protected static $callback; |
||
89 | |||
90 | /** |
||
91 | * @var \DOMDocument |
||
92 | */ |
||
93 | protected $document; |
||
94 | |||
95 | /** |
||
96 | * @var string |
||
97 | */ |
||
98 | protected $encoding = 'UTF-8'; |
||
99 | |||
100 | /** |
||
101 | * @var bool |
||
102 | */ |
||
103 | protected $isDOMDocumentCreatedWithoutHtml = false; |
||
104 | |||
105 | /** |
||
106 | * @var bool |
||
107 | */ |
||
108 | protected $isDOMDocumentCreatedWithoutWrapper = false; |
||
109 | |||
110 | /** |
||
111 | * @var bool |
||
112 | */ |
||
113 | protected $isDOMDocumentCreatedWithoutHeadWrapper = false; |
||
114 | |||
115 | /** |
||
116 | * @var bool |
||
117 | */ |
||
118 | protected $isDOMDocumentCreatedWithoutHtmlWrapper = false; |
||
119 | |||
120 | /** |
||
121 | * @var bool |
||
122 | */ |
||
123 | protected $isDOMDocumentCreatedWithFakeEndScript = false; |
||
124 | |||
125 | /** |
||
126 | * @var bool |
||
127 | */ |
||
128 | protected $keepBrokenHtml; |
||
129 | |||
130 | /** |
||
131 | * Constructor |
||
132 | * |
||
133 | * @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode |
||
134 | * |
||
135 | * @throws \InvalidArgumentException |
||
136 | */ |
||
137 | 143 | public function __construct($element = null) |
|
138 | { |
||
139 | 143 | $this->document = new \DOMDocument('1.0', $this->getEncoding()); |
|
140 | |||
141 | // reset |
||
142 | 143 | self::$domBrokenReplaceHelper = []; |
|
143 | |||
144 | // DOMDocument settings |
||
145 | 143 | $this->document->preserveWhiteSpace = true; |
|
146 | 143 | $this->document->formatOutput = true; |
|
147 | |||
148 | 143 | if ($element instanceof SimpleHtmlDomInterface) { |
|
149 | 72 | $element = $element->getNode(); |
|
150 | } |
||
151 | |||
152 | 143 | if ($element instanceof \DOMNode) { |
|
153 | 72 | $domNode = $this->document->importNode($element, true); |
|
154 | |||
155 | 72 | if ($domNode instanceof \DOMNode) { |
|
156 | /** @noinspection UnusedFunctionResultInspection */ |
||
157 | 72 | $this->document->appendChild($domNode); |
|
158 | } |
||
159 | |||
160 | 72 | return; |
|
161 | } |
||
162 | |||
163 | 143 | if ($element !== null) { |
|
164 | /** @noinspection UnusedFunctionResultInspection */ |
||
165 | 79 | $this->loadHtml($element); |
|
166 | } |
||
167 | 142 | } |
|
168 | |||
169 | /** |
||
170 | * @param string $name |
||
171 | * @param array $arguments |
||
172 | * |
||
173 | * @return bool|mixed |
||
174 | */ |
||
175 | 53 | View Code Duplication | public function __call($name, $arguments) |
|
|||
176 | { |
||
177 | 53 | $name = \strtolower($name); |
|
178 | |||
179 | 53 | if (isset(self::$functionAliases[$name])) { |
|
180 | 52 | return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); |
|
181 | } |
||
182 | |||
183 | 1 | throw new \BadMethodCallException('Method does not exist: ' . $name); |
|
184 | } |
||
185 | |||
186 | /** |
||
187 | * @param string $name |
||
188 | * @param array $arguments |
||
189 | * |
||
190 | * @throws \BadMethodCallException |
||
191 | * @throws \RuntimeException |
||
192 | * @throws \InvalidArgumentException |
||
193 | * |
||
194 | * @return HtmlDomParser |
||
195 | */ |
||
196 | 21 | public static function __callStatic($name, $arguments) |
|
197 | { |
||
198 | 21 | $arguments0 = $arguments[0] ?? ''; |
|
199 | |||
200 | 21 | $arguments1 = $arguments[1] ?? null; |
|
201 | |||
202 | 21 | if ($name === 'str_get_html') { |
|
203 | 16 | $parser = new static(); |
|
204 | |||
205 | 16 | return $parser->loadHtml($arguments0, $arguments1); |
|
206 | } |
||
207 | |||
208 | 5 | if ($name === 'file_get_html') { |
|
209 | 4 | $parser = new static(); |
|
210 | |||
211 | 4 | return $parser->loadHtmlFile($arguments0, $arguments1); |
|
212 | } |
||
213 | |||
214 | 1 | throw new \BadMethodCallException('Method does not exist'); |
|
215 | } |
||
216 | |||
217 | /** @noinspection MagicMethodsValidityInspection */ |
||
218 | |||
219 | /** |
||
220 | * @param string $name |
||
221 | * |
||
222 | * @return string|null |
||
223 | */ |
||
224 | 14 | public function __get($name) |
|
225 | { |
||
226 | 14 | $name = \strtolower($name); |
|
227 | |||
228 | switch ($name) { |
||
229 | 14 | case 'outerhtml': |
|
230 | 14 | case 'outertext': |
|
231 | 5 | return $this->html(); |
|
232 | 10 | case 'innerhtml': |
|
233 | 4 | case 'innertext': |
|
234 | 7 | return $this->innerHtml(); |
|
235 | 3 | case 'text': |
|
236 | 3 | case 'plaintext': |
|
237 | 2 | return $this->text(); |
|
238 | } |
||
239 | |||
240 | 1 | return null; |
|
241 | } |
||
242 | |||
243 | /** |
||
244 | * @param string $selector |
||
245 | * @param int $idx |
||
246 | * |
||
247 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
248 | */ |
||
249 | 3 | public function __invoke($selector, $idx = null) |
|
250 | { |
||
251 | 3 | return $this->find($selector, $idx); |
|
252 | } |
||
253 | |||
254 | /** |
||
255 | * @return string |
||
256 | */ |
||
257 | 17 | public function __toString() |
|
258 | { |
||
259 | 17 | return $this->html(); |
|
260 | } |
||
261 | |||
262 | /** |
||
263 | * does nothing (only for api-compatibility-reasons) |
||
264 | * |
||
265 | * @deprecated |
||
266 | * |
||
267 | * @return bool |
||
268 | */ |
||
269 | 1 | public function clear(): bool |
|
270 | { |
||
271 | 1 | return true; |
|
272 | } |
||
273 | |||
274 | /** |
||
275 | * @param string $html |
||
276 | * |
||
277 | * @return string |
||
278 | */ |
||
279 | 132 | public static function replaceToPreserveHtmlEntities(string $html): string |
|
280 | { |
||
281 | // init |
||
282 | 132 | $linksNew = []; |
|
283 | 132 | $linksOld = []; |
|
284 | |||
285 | 132 | if (\strpos($html, 'http') !== false) { |
|
286 | |||
287 | // regEx for e.g.: [https://www.domain.de/foo.php?foobar=1&email=lars%40moelleken.org&guid=test1233312&{{foo}}#foo] |
||
288 | 60 | $regExUrl = '/(\[?\bhttps?:\/\/[^\s<>]+(?:\([\w]+\)|[^[:punct:]\s]|\/|\}|\]))/i'; |
|
289 | 60 | \preg_match_all($regExUrl, $html, $linksOld); |
|
290 | |||
291 | 60 | if (!empty($linksOld[1])) { |
|
292 | 57 | $linksOld = $linksOld[1]; |
|
293 | 57 | foreach ((array) $linksOld as $linkKey => $linkOld) { |
|
294 | 57 | $linksNew[$linkKey] = \str_replace( |
|
295 | 57 | self::$domLinkReplaceHelper['orig'], |
|
296 | 57 | self::$domLinkReplaceHelper['tmp'], |
|
297 | 57 | $linkOld |
|
298 | ); |
||
299 | } |
||
300 | } |
||
301 | } |
||
302 | |||
303 | 132 | $linksNewCount = \count($linksNew); |
|
304 | 132 | if ($linksNewCount > 0 && \count($linksOld) === $linksNewCount) { |
|
305 | 57 | $search = \array_merge($linksOld, self::$domReplaceHelper['orig']); |
|
306 | 57 | $replace = \array_merge($linksNew, self::$domReplaceHelper['tmp']); |
|
307 | } else { |
||
308 | 80 | $search = self::$domReplaceHelper['orig']; |
|
309 | 80 | $replace = self::$domReplaceHelper['tmp']; |
|
310 | } |
||
311 | |||
312 | 132 | return \str_replace($search, $replace, $html); |
|
313 | } |
||
314 | |||
315 | /** |
||
316 | * @param string $html |
||
317 | * |
||
318 | * @return string |
||
319 | */ |
||
320 | 84 | public static function putReplacedBackToPreserveHtmlEntities(string $html): string |
|
321 | { |
||
322 | 84 | static $DOM_REPLACE__HELPER_CACHE = null; |
|
323 | |||
324 | 84 | if ($DOM_REPLACE__HELPER_CACHE === null) { |
|
325 | 1 | $DOM_REPLACE__HELPER_CACHE['tmp'] = \array_merge( |
|
326 | 1 | self::$domLinkReplaceHelper['tmp'], |
|
327 | 1 | self::$domReplaceHelper['tmp'] |
|
328 | ); |
||
329 | 1 | $DOM_REPLACE__HELPER_CACHE['orig'] = \array_merge( |
|
330 | 1 | self::$domLinkReplaceHelper['orig'], |
|
331 | 1 | self::$domReplaceHelper['orig'] |
|
332 | ); |
||
333 | |||
334 | 1 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__start'] = '<' . self::$domHtmlWrapperHelper . '>'; |
|
335 | 1 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_wrapper__end'] = '</' . self::$domHtmlWrapperHelper . '>'; |
|
336 | |||
337 | 1 | $DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__start'] = ''; |
|
338 | 1 | $DOM_REPLACE__HELPER_CACHE['orig']['html_wrapper__end'] = ''; |
|
339 | |||
340 | 1 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_special_script__start'] = '<' . self::$domHtmlSpecialScriptHelper; |
|
341 | 1 | $DOM_REPLACE__HELPER_CACHE['tmp']['html_special_script__end'] = '</' . self::$domHtmlSpecialScriptHelper . '>'; |
|
342 | |||
343 | 1 | $DOM_REPLACE__HELPER_CACHE['orig']['html_special_script__start'] = '<script'; |
|
344 | 1 | $DOM_REPLACE__HELPER_CACHE['orig']['html_special_script__end'] = '</script>'; |
|
345 | } |
||
346 | |||
347 | if ( |
||
348 | 84 | isset(self::$domBrokenReplaceHelper['tmp']) |
|
349 | && |
||
350 | 84 | \count(self::$domBrokenReplaceHelper['tmp']) > 0 |
|
351 | ) { |
||
352 | 2 | $html = \str_replace(self::$domBrokenReplaceHelper['tmp'], self::$domBrokenReplaceHelper['orig'], $html); |
|
353 | } |
||
354 | |||
355 | 84 | return \str_replace($DOM_REPLACE__HELPER_CACHE['tmp'], $DOM_REPLACE__HELPER_CACHE['orig'], $html); |
|
356 | } |
||
357 | |||
358 | /** |
||
359 | * Create DOMDocument from HTML. |
||
360 | * |
||
361 | * @param string $html |
||
362 | * @param int|null $libXMLExtraOptions |
||
363 | * |
||
364 | * @return \DOMDocument |
||
365 | */ |
||
366 | 131 | private function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument |
|
367 | { |
||
368 | 131 | if ($this->keepBrokenHtml) { |
|
369 | 2 | $html = $this->keepBrokenHtml(\trim($html)); |
|
370 | } |
||
371 | |||
372 | 131 | if (\strpos($html, '<') === false) { |
|
373 | 7 | $this->isDOMDocumentCreatedWithoutHtml = true; |
|
374 | 129 | } elseif (\strpos(\ltrim($html), '<') !== 0) { |
|
375 | 5 | $this->isDOMDocumentCreatedWithoutWrapper = true; |
|
376 | } |
||
377 | |||
378 | 131 | if (\strpos($html, '<html') === false) { |
|
379 | 79 | $this->isDOMDocumentCreatedWithoutHtmlWrapper = true; |
|
380 | } |
||
381 | |||
382 | /** @noinspection HtmlRequiredTitleElement */ |
||
383 | 131 | if (\strpos($html, '<head>') === false) { |
|
384 | 82 | $this->isDOMDocumentCreatedWithoutHeadWrapper = true; |
|
385 | } |
||
386 | |||
387 | if ( |
||
388 | 131 | \strpos($html, '</script>') === false |
|
389 | && |
||
390 | 131 | \strpos($html, '<\/script>') !== false |
|
391 | ) { |
||
392 | 1 | $this->isDOMDocumentCreatedWithFakeEndScript = true; |
|
393 | } |
||
394 | |||
395 | 131 | if (\strpos($html, '<script') !== false) { |
|
396 | 15 | $this->html5FallbackForScriptTags($html); |
|
397 | |||
398 | if ( |
||
399 | 15 | \strpos($html, 'type="text/html"') !== false |
|
400 | || |
||
401 | 14 | \strpos($html, 'type=\'text/html\'') !== false |
|
402 | || |
||
403 | 15 | \strpos($html, 'type=text/html') !== false |
|
404 | ) { |
||
405 | 1 | $this->keepSpecialScriptTags($html); |
|
406 | } |
||
407 | } |
||
408 | |||
409 | // set error level |
||
410 | 131 | $internalErrors = \libxml_use_internal_errors(true); |
|
411 | 131 | $disableEntityLoader = \libxml_disable_entity_loader(true); |
|
412 | 131 | \libxml_clear_errors(); |
|
413 | |||
414 | 131 | $optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET; |
|
415 | |||
416 | 131 | if (\defined('LIBXML_BIGLINES')) { |
|
417 | 131 | $optionsXml |= \LIBXML_BIGLINES; |
|
418 | } |
||
419 | |||
420 | 131 | if (\defined('LIBXML_COMPACT')) { |
|
421 | 131 | $optionsXml |= \LIBXML_COMPACT; |
|
422 | } |
||
423 | |||
424 | 131 | if (\defined('LIBXML_HTML_NODEFDTD')) { |
|
425 | 131 | $optionsXml |= \LIBXML_HTML_NODEFDTD; |
|
426 | } |
||
427 | |||
428 | 131 | if ($libXMLExtraOptions !== null) { |
|
429 | 1 | $optionsXml |= $libXMLExtraOptions; |
|
430 | } |
||
431 | |||
432 | if ( |
||
433 | 131 | $this->isDOMDocumentCreatedWithoutWrapper |
|
434 | || |
||
435 | 131 | $this->keepBrokenHtml |
|
436 | ) { |
||
437 | 6 | $html = '<' . self::$domHtmlWrapperHelper . '>' . $html . '</' . self::$domHtmlWrapperHelper . '>'; |
|
438 | } |
||
439 | |||
440 | 131 | $html = self::replaceToPreserveHtmlEntities($html); |
|
441 | |||
442 | 131 | $documentFound = false; |
|
443 | 131 | $sxe = \simplexml_load_string($html, \SimpleXMLElement::class, $optionsXml); |
|
444 | 131 | if ($sxe !== false && \count(\libxml_get_errors()) === 0) { |
|
445 | 47 | $domElementTmp = \dom_import_simplexml($sxe); |
|
446 | 47 | if ($domElementTmp) { |
|
447 | 47 | $documentFound = true; |
|
448 | 47 | $this->document = $domElementTmp->ownerDocument; |
|
449 | } |
||
450 | } |
||
451 | |||
452 | 131 | if ($documentFound === false) { |
|
453 | |||
454 | // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251 |
||
455 | 89 | $xmlHackUsed = false; |
|
456 | 89 | if (\stripos('<?xml', $html) !== 0) { |
|
457 | 89 | $xmlHackUsed = true; |
|
458 | 89 | $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html; |
|
459 | } |
||
460 | |||
461 | 89 | $this->document->loadHTML($html, $optionsXml); |
|
462 | |||
463 | // remove the "xml-encoding" hack |
||
464 | 89 | if ($xmlHackUsed) { |
|
465 | 89 | foreach ($this->document->childNodes as $child) { |
|
466 | 89 | if ($child->nodeType === \XML_PI_NODE) { |
|
467 | /** @noinspection UnusedFunctionResultInspection */ |
||
468 | 89 | $this->document->removeChild($child); |
|
469 | |||
470 | 89 | break; |
|
471 | } |
||
472 | } |
||
473 | } |
||
474 | } |
||
475 | |||
476 | // set encoding |
||
477 | 131 | $this->document->encoding = $this->getEncoding(); |
|
478 | |||
479 | // restore lib-xml settings |
||
480 | 131 | \libxml_clear_errors(); |
|
481 | 131 | \libxml_use_internal_errors($internalErrors); |
|
482 | 131 | \libxml_disable_entity_loader($disableEntityLoader); |
|
483 | |||
484 | 131 | return $this->document; |
|
485 | } |
||
486 | |||
487 | /** |
||
488 | * workaround for bug: https://bugs.php.net/bug.php?id=74628 |
||
489 | * |
||
490 | * @param string $html |
||
491 | */ |
||
492 | 15 | protected function html5FallbackForScriptTags(string &$html) |
|
493 | { |
||
494 | // regEx for e.g.: [<script id="elements-image-2">...<script>] |
||
495 | /** @noinspection HtmlDeprecatedTag */ |
||
496 | 15 | $regExSpecialScript = '/<(script)(?<attr>[^>]*)>(?<content>.*)<\/\1>/isU'; |
|
497 | 15 | $html = \preg_replace_callback($regExSpecialScript, static function ($scripts) { |
|
498 | 14 | return '<script' . $scripts['attr'] . '>' . \str_replace('</', '<\/', $scripts['content']) . '</script>'; |
|
499 | 15 | }, $html); |
|
500 | 15 | } |
|
501 | |||
502 | /** |
||
503 | * @param string $html |
||
504 | */ |
||
505 | 1 | protected function keepSpecialScriptTags(string &$html) |
|
506 | { |
||
507 | 1 | $specialScripts = []; |
|
508 | // regEx for e.g.: [<script id="elements-image-1" type="text/html">...</script>] |
||
509 | 1 | $regExSpecialScript = '/<(script) [^>]*type=(["\']){0,1}text\/html\2{0,1}([^>]*)>.*<\/\1>/isU'; |
|
510 | 1 | \preg_match_all($regExSpecialScript, $html, $specialScripts); |
|
511 | |||
512 | 1 | if (isset($specialScripts[0])) { |
|
513 | 1 | foreach ($specialScripts[0] as $specialScript) { |
|
514 | 1 | $specialNonScript = '<' . self::$domHtmlSpecialScriptHelper . \substr($specialScript, \strlen('<script')); |
|
515 | 1 | $specialNonScript = \substr($specialNonScript, 0, -\strlen('</script>')) . '</' . self::$domHtmlSpecialScriptHelper . '>'; |
|
516 | // remove the html5 fallback |
||
517 | 1 | $specialNonScript = \str_replace('<\/', '</', $specialNonScript); |
|
518 | |||
519 | 1 | $html = \str_replace($specialScript, $specialNonScript, $html); |
|
520 | } |
||
521 | } |
||
522 | 1 | } |
|
523 | |||
524 | /** |
||
525 | * @param string $html |
||
526 | * |
||
527 | * @return string |
||
528 | */ |
||
529 | 2 | protected function keepBrokenHtml(string $html): string |
|
574 | |||
575 | /** |
||
576 | * Return element by #id. |
||
577 | * |
||
578 | * @param string $id |
||
579 | * |
||
580 | * @return SimpleHtmlDomInterface |
||
581 | */ |
||
582 | 2 | public function getElementById(string $id): SimpleHtmlDomInterface |
|
586 | |||
587 | /** |
||
588 | * Returns elements by #id. |
||
589 | * |
||
590 | * @param string $id |
||
591 | * @param int|null $idx |
||
592 | * |
||
593 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
594 | */ |
||
595 | public function getElementsById(string $id, $idx = null) |
||
599 | |||
600 | /** |
||
601 | * Return elements by .class. |
||
602 | * |
||
603 | * @param string $class |
||
604 | * |
||
605 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
606 | */ |
||
607 | public function getElementByClass(string $class): SimpleHtmlDomNodeInterface |
||
611 | |||
612 | /** |
||
613 | * Return element by tag name. |
||
614 | * |
||
615 | * @param string $name |
||
616 | * |
||
617 | * @return SimpleHtmlDomInterface |
||
618 | */ |
||
619 | public function getElementByTagName(string $name): SimpleHtmlDomInterface |
||
629 | |||
630 | /** |
||
631 | * Returns elements by tag name. |
||
632 | * |
||
633 | * @param string $name |
||
634 | * @param int|null $idx |
||
635 | * |
||
636 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
637 | */ |
||
638 | View Code Duplication | public function getElementsByTagName(string $name, $idx = null) |
|
639 | { |
||
640 | 3 | $nodesList = $this->document->getElementsByTagName($name); |
|
641 | |||
665 | |||
666 | /** |
||
667 | * Find one node with a CSS selector. |
||
668 | * |
||
669 | * @param string $selector |
||
670 | * |
||
671 | * @return SimpleHtmlDomInterface |
||
672 | */ |
||
673 | public function findOne(string $selector): SimpleHtmlDomInterface |
||
677 | |||
678 | /** |
||
679 | * Find nodes with a CSS selector. |
||
680 | * |
||
681 | * @param string $selector |
||
682 | * |
||
683 | * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
684 | */ |
||
685 | public function findMulti(string $selector): SimpleHtmlDomNodeInterface |
||
689 | |||
690 | /** |
||
691 | * Find list of nodes with a CSS selector. |
||
692 | * |
||
693 | * @param string $selector |
||
694 | * @param int|null $idx |
||
695 | * |
||
696 | * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface |
||
697 | */ |
||
698 | public function find(string $selector, $idx = null) |
||
727 | |||
728 | /** |
||
729 | * @param string $content |
||
730 | * @param bool $multiDecodeNewHtmlEntity |
||
731 | * |
||
732 | * @return string |
||
733 | */ |
||
734 | public function fixHtmlOutput(string $content, bool $multiDecodeNewHtmlEntity = false): string |
||
841 | |||
842 | /** |
||
843 | * @return \DOMDocument |
||
844 | */ |
||
845 | public function getDocument(): \DOMDocument |
||
849 | |||
850 | /** |
||
851 | * Get the encoding to use. |
||
852 | * |
||
853 | * @return string |
||
854 | */ |
||
855 | private function getEncoding(): string |
||
859 | |||
860 | /** |
||
861 | * @return bool |
||
862 | */ |
||
863 | public function getIsDOMDocumentCreatedWithoutHtml(): bool |
||
867 | |||
868 | /** |
||
869 | * @return bool |
||
870 | */ |
||
871 | public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool |
||
875 | |||
876 | /** |
||
877 | * @return bool |
||
878 | */ |
||
879 | public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool |
||
883 | |||
884 | /** |
||
885 | * @return bool |
||
886 | */ |
||
887 | public function getIsDOMDocumentCreatedWithoutWrapper(): bool |
||
891 | |||
892 | /** |
||
893 | * Get dom node's outer html. |
||
894 | * |
||
895 | * @param bool $multiDecodeNewHtmlEntity |
||
896 | * |
||
897 | * @return string |
||
898 | */ |
||
899 | public function html(bool $multiDecodeNewHtmlEntity = false): string |
||
917 | |||
918 | /** |
||
919 | * @param bool $keepBrokenHtml |
||
920 | * |
||
921 | * @return HtmlDomParser |
||
922 | */ |
||
923 | public function useKeepBrokenHtml(bool $keepBrokenHtml): self |
||
929 | |||
930 | /** |
||
931 | * Get the HTML as XML. |
||
932 | * |
||
933 | * @param bool $multiDecodeNewHtmlEntity |
||
934 | * |
||
935 | * @return string |
||
936 | */ |
||
937 | public function xml(bool $multiDecodeNewHtmlEntity = false): string |
||
946 | |||
947 | /** |
||
948 | * Get dom node's inner html. |
||
949 | * |
||
950 | * @param bool $multiDecodeNewHtmlEntity |
||
951 | * |
||
952 | * @return string |
||
953 | */ |
||
954 | public function innerHtml(bool $multiDecodeNewHtmlEntity = false): string |
||
967 | |||
968 | /** |
||
969 | * Load HTML from string. |
||
970 | * |
||
971 | * @param string $html |
||
972 | * @param int|null $libXMLExtraOptions |
||
973 | * |
||
974 | * @throws \InvalidArgumentException if argument is not string |
||
975 | * |
||
976 | * @return HtmlDomParser |
||
977 | */ |
||
978 | public function loadHtml(string $html, $libXMLExtraOptions = null): self |
||
984 | |||
985 | /** |
||
986 | * Load HTML from file. |
||
987 | * |
||
988 | * @param string $filePath |
||
989 | * @param int|null $libXMLExtraOptions |
||
990 | * |
||
991 | * @throws \RuntimeException |
||
992 | * @throws \InvalidArgumentException |
||
993 | * |
||
994 | * @return HtmlDomParser |
||
995 | */ |
||
996 | public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): self |
||
1023 | |||
1024 | /** |
||
1025 | * Save the html-dom as string. |
||
1026 | * |
||
1027 | * @param string $filepath |
||
1028 | * |
||
1029 | * @return string |
||
1030 | */ |
||
1031 | public function save(string $filepath = ''): string |
||
1040 | |||
1041 | /** |
||
1042 | * @param callable $functionName |
||
1043 | */ |
||
1044 | public function set_callback($functionName) |
||
1048 | |||
1049 | /** |
||
1050 | * Get dom node's plain text. |
||
1051 | * |
||
1052 | * @param bool $multiDecodeNewHtmlEntity |
||
1053 | * |
||
1054 | * @return string |
||
1055 | */ |
||
1056 | public function text(bool $multiDecodeNewHtmlEntity = false): string |
||
1060 | |||
1061 | public function __clone() |
||
1065 | } |
||
1066 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.