Completed
Push — master ( 9f7e76...8ede3c )
by Lars
01:48
created

HtmlDomParser::findMulti()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @property-read string $outerText
9
 *                                 <p>Get dom node's outer html (alias for "outerHtml").</p>
10
 * @property-read string $outerHtml
11
 *                                 <p>Get dom node's outer html.</p>
12
 * @property-read string $innerText
13
 *                                 <p>Get dom node's inner html (alias for "innerHtml").</p>
14
 * @property-read string $innerHtml
15
 *                                 <p>Get dom node's inner html.</p>
16
 * @property-read string $plaintext
17
 *                                 <p>Get dom node's plain text.</p>
18
 *
19
 * @method string outerText()
20
 *                                 <p>Get dom node's outer html (alias for "outerHtml()").</p>
21
 * @method string outerHtml()
22
 *                                 <p>Get dom node's outer html.</p>
23
 * @method string innerText()
24
 *                                 <p>Get dom node's inner html (alias for "innerHtml()").</p>
25
 * @method HtmlDomParser load(string $html)
26
 *                                 <p>Load HTML from string.</p>
27
 * @method HtmlDomParser load_file(string $html)
28
 *                                 <p>Load HTML from file.</p>
29
 * @method static HtmlDomParser file_get_html($html, $libXMLExtraOptions = null)
30
 *                                 <p>Load HTML from file.</p>
31
 * @method static HtmlDomParser str_get_html($html, $libXMLExtraOptions = null)
32
 *                                 <p>Load HTML from string.</p>
33
 */
34
class HtmlDomParser extends AbstractDomParser
35
{
36
    /**
37
     * @var string[]
38
     */
39
    protected static $functionAliases = [
40
        'outertext' => 'html',
41
        'outerhtml' => 'html',
42
        'innertext' => 'innerHtml',
43
        'innerhtml' => 'innerHtml',
44
        'load'      => 'loadHtml',
45
        'load_file' => 'loadHtmlFile',
46
    ];
47
48
    /**
49
     * @var bool
50
     */
51
    protected $isDOMDocumentCreatedWithoutHtml = false;
52
53
    /**
54
     * @var bool
55
     */
56
    protected $isDOMDocumentCreatedWithoutWrapper = false;
57
58
    /**
59
     * @var bool
60
     */
61
    protected $isDOMDocumentCreatedWithoutHeadWrapper = false;
62
63
    /**
64
     * @var bool
65
     */
66
    protected $isDOMDocumentCreatedWithoutPTagWrapper = false;
67
68
    /**
69
     * @var bool
70
     */
71
    protected $isDOMDocumentCreatedWithoutHtmlWrapper = false;
72
73
    /**
74
     * @var bool
75
     */
76
    protected $isDOMDocumentCreatedWithoutBodyWrapper = false;
77
78
    /**
79
     * @var bool
80
     */
81
    protected $isDOMDocumentCreatedWithFakeEndScript = false;
82
83
    /**
84
     * @var bool
85
     */
86
    protected $keepBrokenHtml;
87
88
    /**
89
     * @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode
90
     */
91 182 View Code Duplication
    public function __construct($element = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
92
    {
93 182
        $this->document = new \DOMDocument('1.0', $this->getEncoding());
94
95
        // DOMDocument settings
96 182
        $this->document->preserveWhiteSpace = true;
97 182
        $this->document->formatOutput = true;
98
99 182
        if ($element instanceof SimpleHtmlDomInterface) {
100 82
            $element = $element->getNode();
101
        }
102
103 182
        if ($element instanceof \DOMNode) {
104 82
            $domNode = $this->document->importNode($element, true);
105
106 82
            if ($domNode instanceof \DOMNode) {
107
                /** @noinspection UnusedFunctionResultInspection */
108 82
                $this->document->appendChild($domNode);
109
            }
110
111 82
            return;
112
        }
113
114 182
        if ($element !== null) {
115
            /** @noinspection UnusedFunctionResultInspection */
116 81
            $this->loadHtml($element);
117
        }
118 181
    }
119
120
    /**
121
     * @param string $name
122
     * @param array  $arguments
123
     *
124
     * @return bool|mixed
125
     */
126 59
    public function __call($name, $arguments)
127
    {
128 59
        $name = \strtolower($name);
129
130 59
        if (isset(self::$functionAliases[$name])) {
131 58
            return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments);
132
        }
133
134 1
        throw new \BadMethodCallException('Method does not exist: ' . $name);
135
    }
136
137
    /**
138
     * @param string $name
139
     * @param array  $arguments
140
     *
141
     * @throws \BadMethodCallException
142
     * @throws \RuntimeException
143
     *
144
     * @return HtmlDomParser
145
     */
146 22 View Code Duplication
    public static function __callStatic($name, $arguments)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
147
    {
148 22
        $arguments0 = $arguments[0] ?? '';
149
150 22
        $arguments1 = $arguments[1] ?? null;
151
152 22
        if ($name === 'str_get_html') {
153 17
            $parser = new static();
154
155 17
            return $parser->loadHtml($arguments0, $arguments1);
156
        }
157
158 5
        if ($name === 'file_get_html') {
159 4
            $parser = new static();
160
161 4
            return $parser->loadHtmlFile($arguments0, $arguments1);
162
        }
163
164 1
        throw new \BadMethodCallException('Method does not exist');
165
    }
166
167
    /** @noinspection MagicMethodsValidityInspection */
168
169
    /**
170
     * @param string $name
171
     *
172
     * @return string|null
173
     */
174 14
    public function __get($name)
175
    {
176 14
        $name = \strtolower($name);
177
178 14
        switch ($name) {
179 14
            case 'outerhtml':
180 14
            case 'outertext':
181 5
                return $this->html();
182 10
            case 'innerhtml':
183 4
            case 'innertext':
184 7
                return $this->innerHtml();
185 3
            case 'text':
186 3
            case 'plaintext':
187 2
                return $this->text();
188
        }
189
190 1
        return null;
191
    }
192
193
    /**
194
     * @return string
195
     */
196 18
    public function __toString()
197
    {
198 18
        return $this->html();
199
    }
200
201
    /**
202
     * does nothing (only for api-compatibility-reasons)
203
     *
204
     * @return bool
205
     *
206
     * @deprecated
207
     */
208 1
    public function clear(): bool
209
    {
210 1
        return true;
211
    }
212
213
    /**
214
     * Create DOMDocument from HTML.
215
     *
216
     * @param string   $html
217
     * @param int|null $libXMLExtraOptions
218
     *
219
     * @return \DOMDocument
220
     */
221 170
    protected function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument
222
    {
223 170
        if ($this->keepBrokenHtml) {
224 3
            $html = $this->keepBrokenHtml(\trim($html));
225
        }
226
227 170
        if (\strpos($html, '<') === false) {
228 10
            $this->isDOMDocumentCreatedWithoutHtml = true;
229 168
        } elseif (\strpos(\ltrim($html), '<') !== 0) {
230 5
            $this->isDOMDocumentCreatedWithoutWrapper = true;
231
        }
232
233 170
        if (\strpos($html, '<html') === false) {
234 96
            $this->isDOMDocumentCreatedWithoutHtmlWrapper = true;
235
        }
236
237 170
        if (\strpos($html, '<body') === false) {
238 100
            $this->isDOMDocumentCreatedWithoutBodyWrapper = true;
239
        }
240
241
        /** @noinspection HtmlRequiredTitleElement */
242 170
        if (\strpos($html, '<head>') === false) {
243 117
            $this->isDOMDocumentCreatedWithoutHeadWrapper = true;
244
        }
245
246
        /** @noinspection HtmlRequiredTitleElement */
247 170
        if (\strpos($html, '<p>') === false) {
248 90
            $this->isDOMDocumentCreatedWithoutPTagWrapper = true;
249
        }
250
251
        if (
252 170
            \strpos($html, '</script>') === false
253
            &&
254 170
            \strpos($html, '<\/script>') !== false
255
        ) {
256 1
            $this->isDOMDocumentCreatedWithFakeEndScript = true;
257
        }
258
259 170
        if (\strpos($html, '<script') !== false) {
260 20
            $this->html5FallbackForScriptTags($html);
261
262
            if (
263 20
                \strpos($html, 'type="text/html"') !== false
264
                ||
265 19
                \strpos($html, 'type=\'text/html\'') !== false
266
                ||
267 19
                \strpos($html, 'type=text/html') !== false
268
                ||
269 19
                \strpos($html, 'type="text/x-custom-template"') !== false
270
                ||
271 18
                \strpos($html, 'type=\'text/x-custom-template\'') !== false
272
                ||
273 20
                \strpos($html, 'type=text/x-custom-template') !== false
274
            ) {
275 2
                $this->keepSpecialScriptTags($html);
276
            }
277
        }
278
279
        // set error level
280 170
        $internalErrors = \libxml_use_internal_errors(true);
281 170
        $disableEntityLoader = \libxml_disable_entity_loader(true);
282 170
        \libxml_clear_errors();
283
284 170
        $optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET;
285
286 170
        if (\defined('LIBXML_BIGLINES')) {
287 170
            $optionsXml |= \LIBXML_BIGLINES;
288
        }
289
290 170
        if (\defined('LIBXML_COMPACT')) {
291 170
            $optionsXml |= \LIBXML_COMPACT;
292
        }
293
294 170
        if (\defined('LIBXML_HTML_NODEFDTD')) {
295 170
            $optionsXml |= \LIBXML_HTML_NODEFDTD;
296
        }
297
298 170
        if ($libXMLExtraOptions !== null) {
299 5
            $optionsXml |= $libXMLExtraOptions;
300
        }
301
302
        if (
303 170
            $this->isDOMDocumentCreatedWithoutWrapper
304
            ||
305 170
            $this->keepBrokenHtml
306
        ) {
307 7
            $html = '<' . self::$domHtmlWrapperHelper . '>' . $html . '</' . self::$domHtmlWrapperHelper . '>';
308
        }
309
310 170
        $html = self::replaceToPreserveHtmlEntities($html);
311
312 170
        $documentFound = false;
313 170
        $sxe = \simplexml_load_string($html, \SimpleXMLElement::class, $optionsXml);
314 170 View Code Duplication
        if ($sxe !== false && \count(\libxml_get_errors()) === 0) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
315 74
            $domElementTmp = \dom_import_simplexml($sxe);
316 74
            if ($domElementTmp) {
317 74
                $documentFound = true;
318 74
                $this->document = $domElementTmp->ownerDocument;
319
            }
320
        }
321
322 170 View Code Duplication
        if ($documentFound === false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
323
324
            // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251
325 104
            $xmlHackUsed = false;
326
            /** @noinspection StringFragmentMisplacedInspection */
327 104
            if (\stripos('<?xml', $html) !== 0) {
328 104
                $xmlHackUsed = true;
329 104
                $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html;
330
            }
331
332 104
            $this->document->loadHTML($html, $optionsXml);
333
334
            // remove the "xml-encoding" hack
335 104
            if ($xmlHackUsed) {
336 104
                foreach ($this->document->childNodes as $child) {
337 104
                    if ($child->nodeType === \XML_PI_NODE) {
338
                        /** @noinspection UnusedFunctionResultInspection */
339 104
                        $this->document->removeChild($child);
340
341 104
                        break;
342
                    }
343
                }
344
            }
345
        }
346
347
        // set encoding
348 170
        $this->document->encoding = $this->getEncoding();
349
350
        // restore lib-xml settings
351 170
        \libxml_clear_errors();
352 170
        \libxml_use_internal_errors($internalErrors);
353 170
        \libxml_disable_entity_loader($disableEntityLoader);
354
355 170
        return $this->document;
356
    }
357
358
    /**
359
     * Find list of nodes with a CSS selector.
360
     *
361
     * @param string   $selector
362
     * @param int|null $idx
363
     *
364
     * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
365
     */
366 121 View Code Duplication
    public function find(string $selector, $idx = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
367
    {
368 121
        $xPathQuery = SelectorConverter::toXPath($selector);
369
370 121
        $xPath = new \DOMXPath($this->document);
371 121
        $nodesList = $xPath->query($xPathQuery);
372 121
        $elements = new SimpleHtmlDomNode();
373
374 121
        foreach ($nodesList as $node) {
375 113
            $elements[] = new SimpleHtmlDom($node);
376
        }
377
378
        // return all elements
379 121
        if ($idx === null) {
380 69
            if (\count($elements) === 0) {
381 16
                return new SimpleHtmlDomNodeBlank();
382
            }
383
384 66
            return $elements;
385
        }
386
387
        // handle negative values
388 70
        if ($idx < 0) {
389 11
            $idx = \count($elements) + $idx;
390
        }
391
392
        // return one element
393 70
        return $elements[$idx] ?? new SimpleHtmlDomBlank();
394
    }
395
396
    /**
397
     * Find nodes with a CSS selector.
398
     *
399
     * @param string $selector
400
     *
401
     * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
402
     */
403 12
    public function findMulti(string $selector): SimpleHtmlDomNodeInterface
404
    {
405 12
        return $this->find($selector, null);
406
    }
407
408
    /**
409
     * Find nodes with a CSS selector or false, if no element is found.
410
     *
411
     * @param string $selector
412
     *
413
     * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
414
     */
415 3
    public function findMultiOrFalse(string $selector)
416
    {
417 3
        $return = $this->find($selector, null);
418
419 3
        if ($return instanceof SimpleHtmlDomNodeBlank) {
420 3
            return false;
421
        }
422
423 1
        return $return;
424
    }
425
426
    /**
427
     * Find one node with a CSS selector.
428
     *
429
     * @param string $selector
430
     *
431
     * @return SimpleHtmlDomInterface
432
     */
433 30
    public function findOne(string $selector): SimpleHtmlDomInterface
434
    {
435 30
        return $this->find($selector, 0);
436
    }
437
438
    /**
439
     * Find one node with a CSS selector or false, if no element is found.
440
     *
441
     * @param string $selector
442
     *
443
     * @return false|SimpleHtmlDomInterface
444
     */
445 2
    public function findOneOrFalse(string $selector)
446
    {
447 2
        $return = $this->find($selector, 0);
448
449 2
        if ($return instanceof SimpleHtmlDomBlank) {
450 2
            return false;
451
        }
452
453 1
        return $return;
454
    }
455
456
    /**
457
     * @param string $content
458
     * @param bool   $multiDecodeNewHtmlEntity
459
     *
460
     * @return string
461
     */
462 99
    public function fixHtmlOutput(
463
        string $content,
464
        bool $multiDecodeNewHtmlEntity = false
465
    ): string
466
    {
467
        // INFO: DOMDocument will encapsulate plaintext into a e.g. paragraph tag (<p>),
468
        //          so we try to remove it here again ...
469
470 99
        if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
471
            /** @noinspection HtmlRequiredLangAttribute */
472 42
            $content = \str_replace(
473
                [
474 42
                    '<html>',
475
                    '</html>',
476
                ],
477 42
                '',
478 42
                $content
479
            );
480
        }
481
482 99
        if ($this->getIsDOMDocumentCreatedWithoutHeadWrapper()) {
483
            /** @noinspection HtmlRequiredTitleElement */
484 45
            $content = \str_replace(
485
                [
486 45
                    '<head>',
487
                    '</head>',
488
                ],
489 45
                '',
490 45
                $content
491
            );
492
        }
493
494 99
        if ($this->getIsDOMDocumentCreatedWithoutBodyWrapper()) {
495
            /** @noinspection HtmlRequiredLangAttribute */
496 45
            $content = \str_replace(
497
                [
498 45
                    '<body>',
499
                    '</body>',
500
                ],
501 45
                '',
502 45
                $content
503
            );
504
        }
505
506 99
        if ($this->getIsDOMDocumentCreatedWithFakeEndScript()) {
507 1
            $content = \str_replace(
508 1
                '</script>',
509 1
                '',
510 1
                $content
511
            );
512
        }
513
514 99
        if ($this->getIsDOMDocumentCreatedWithoutWrapper()) {
515 4
            $content = (string) \preg_replace('/^<p>/', '', $content);
516 4
            $content = (string) \preg_replace('/<\/p>/', '', $content);
517
        }
518
519 99
        if ($this->getIsDOMDocumentCreatedWithoutPTagWrapper()) {
520 46
            $content = \str_replace(
521
                [
522 46
                    '<p>',
523
                    '</p>',
524
                ],
525 46
                '',
526 46
                $content
527
            );
528
        }
529
530 99
        if ($this->getIsDOMDocumentCreatedWithoutHtml()) {
531 8
            $content = \str_replace(
532 8
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">',
533 8
                '',
534 8
                $content
535
            );
536
        }
537
538
        /** @noinspection CheckTagEmptyBody */
539
        /** @noinspection HtmlExtraClosingTag */
540
        /** @noinspection HtmlRequiredTitleElement */
541 99
        $content = \trim(
542 99
            \str_replace(
543
                [
544 99
                    '<simpleHtmlDomP>',
545
                    '</simpleHtmlDomP>',
546
                    '<head><head>',
547
                    '</head></head>',
548
                    '<br></br>',
549
                ],
550
                [
551 99
                    '',
552
                    '',
553
                    '<head>',
554
                    '</head>',
555
                    '<br>',
556
                ],
557 99
                $content
558
            )
559
        );
560
561 99
        $content = $this->decodeHtmlEntity($content, $multiDecodeNewHtmlEntity);
562
563 99
        return self::putReplacedBackToPreserveHtmlEntities($content);
564
    }
565
566
    /**
567
     * Return elements by ".class".
568
     *
569
     * @param string $class
570
     *
571
     * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
572
     */
573
    public function getElementByClass(string $class): SimpleHtmlDomNodeInterface
574
    {
575
        return $this->findMulti(".${class}");
576
    }
577
578
    /**
579
     * Return element by #id.
580
     *
581
     * @param string $id
582
     *
583
     * @return SimpleHtmlDomInterface
584
     */
585 3
    public function getElementById(string $id): SimpleHtmlDomInterface
586
    {
587 3
        return $this->findOne("#${id}");
588
    }
589
590
    /**
591
     * Return element by tag name.
592
     *
593
     * @param string $name
594
     *
595
     * @return SimpleHtmlDomInterface
596
     */
597 1
    public function getElementByTagName(string $name): SimpleHtmlDomInterface
598
    {
599 1
        $node = $this->document->getElementsByTagName($name)->item(0);
600
601 1
        if ($node === null) {
602
            return new SimpleHtmlDomBlank();
603
        }
604
605 1
        return new SimpleHtmlDom($node);
606
    }
607
608
    /**
609
     * Returns elements by "#id".
610
     *
611
     * @param string   $id
612
     * @param int|null $idx
613
     *
614
     * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
615
     */
616
    public function getElementsById(string $id, $idx = null)
617
    {
618
        return $this->find("#${id}", $idx);
619
    }
620
621
    /**
622
     * Returns elements by tag name.
623
     *
624
     * @param string   $name
625
     * @param int|null $idx
626
     *
627
     * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
628
     */
629 6
    public function getElementsByTagName(string $name, $idx = null)
630
    {
631 6
        $nodesList = $this->document->getElementsByTagName($name);
632
633 6
        $elements = new SimpleHtmlDomNode();
634
635 6
        foreach ($nodesList as $node) {
636 4
            $elements[] = new SimpleHtmlDom($node);
637
        }
638
639
        // return all elements
640 6
        if ($idx === null) {
641 5
            if (\count($elements) === 0) {
642 2
                return new SimpleHtmlDomNodeBlank();
643
            }
644
645 3
            return $elements;
646
        }
647
648
        // handle negative values
649 1
        if ($idx < 0) {
650
            $idx = \count($elements) + $idx;
651
        }
652
653
        // return one element
654 1
        return $elements[$idx] ?? new SimpleHtmlDomNodeBlank();
655
    }
656
657
    /**
658
     * Get dom node's outer html.
659
     *
660
     * @param bool $multiDecodeNewHtmlEntity
661
     *
662
     * @return string
663
     */
664 68
    public function html(bool $multiDecodeNewHtmlEntity = false): string
665
    {
666 68
        if ($this::$callback !== null) {
667
            \call_user_func($this::$callback, [$this]);
668
        }
669
670 68
        if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) {
671 35
            $content = $this->document->saveHTML($this->document->documentElement);
672
        } else {
673 44
            $content = $this->document->saveHTML();
674
        }
675
676 68
        if ($content === false) {
677
            return '';
678
        }
679
680 68
        return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity);
681
    }
682
683
    /**
684
     * Load HTML from string.
685
     *
686
     * @param string   $html
687
     * @param int|null $libXMLExtraOptions
688
     *
689
     * @return HtmlDomParser
690
     */
691 170
    public function loadHtml(string $html, $libXMLExtraOptions = null): DomParserInterface
692
    {
693
        // reset
694 170
        self::$domBrokenReplaceHelper = [];
695
696 170
        $this->document = $this->createDOMDocument($html, $libXMLExtraOptions);
697
698 170
        return $this;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return $this; (voku\helper\HtmlDomParser) is incompatible with the return type declared by the interface voku\helper\DomParserInterface::loadHtml of type self.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
699
    }
700
701
    /**
702
     * Load HTML from file.
703
     *
704
     * @param string   $filePath
705
     * @param int|null $libXMLExtraOptions
706
     *
707
     * @throws \RuntimeException
708
     *
709
     * @return HtmlDomParser
710
     */
711 11 View Code Duplication
    public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): DomParserInterface
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
712
    {
713
        // reset
714 11
        self::$domBrokenReplaceHelper = [];
715
716
        if (
717 11
            !\preg_match("/^https?:\/\//i", $filePath)
718
            &&
719 11
            !\file_exists($filePath)
720
        ) {
721 1
            throw new \RuntimeException("File ${filePath} not found");
722
        }
723
724
        try {
725 10
            if (\class_exists('\voku\helper\UTF8')) {
726
                /** @noinspection PhpUndefinedClassInspection */
727
                $html = UTF8::file_get_contents($filePath);
728
            } else {
729 10
                $html = \file_get_contents($filePath);
730
            }
731 1
        } catch (\Exception $e) {
732 1
            throw new \RuntimeException("Could not load file ${filePath}");
733
        }
734
735 9
        if ($html === false) {
736
            throw new \RuntimeException("Could not load file ${filePath}");
737
        }
738
739 9
        return $this->loadHtml($html, $libXMLExtraOptions);
740
    }
741
742
    /**
743
     * Get the HTML as XML or plain XML if needed.
744
     *
745
     * @param bool $multiDecodeNewHtmlEntity
746
     * @param bool $htmlToXml
747
     * @param bool $removeXmlHeader
748
     * @param int  $options
749
     *
750
     * @return string
751
     */
752 2 View Code Duplication
    public function xml(
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
753
        bool $multiDecodeNewHtmlEntity = false,
754
        bool $htmlToXml = true,
755
        bool $removeXmlHeader = true,
756
        int $options = \LIBXML_NOEMPTYTAG
757
    ): string {
758 2
        $xml = $this->document->saveXML(null, $options);
759
760 2
        if ($removeXmlHeader) {
761 2
            $xml = \ltrim((string) \preg_replace('/<\?xml.*\?>/', '', $xml));
762
        }
763
764 2
        if ($htmlToXml) {
765 2
            $return = $this->fixHtmlOutput($xml, $multiDecodeNewHtmlEntity);
766
        } else {
767
            $xml = $this->decodeHtmlEntity($xml, $multiDecodeNewHtmlEntity);
768
769
            $return = self::putReplacedBackToPreserveHtmlEntities($xml);
770
        }
771
772 2
        return $return;
773
    }
774
775
    /**
776
     * @param string $selector
777
     * @param int    $idx
778
     *
779
     * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface
780
     */
781 3
    public function __invoke($selector, $idx = null)
782
    {
783 3
        return $this->find($selector, $idx);
784
    }
785
786
    /**
787
     * @return bool
788
     */
789 99
    public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool
790
    {
791 99
        return $this->isDOMDocumentCreatedWithoutHeadWrapper;
792
    }
793
794
    /**
795
     * @return bool
796
     */
797 99
    public function getIsDOMDocumentCreatedWithoutPTagWrapper(): bool
798
    {
799 99
        return $this->isDOMDocumentCreatedWithoutPTagWrapper;
800
    }
801
802
    /**
803
     * @return bool
804
     */
805 99
    public function getIsDOMDocumentCreatedWithoutHtml(): bool
806
    {
807 99
        return $this->isDOMDocumentCreatedWithoutHtml;
808
    }
809
810
    /**
811
     * @return bool
812
     */
813 99
    public function getIsDOMDocumentCreatedWithoutBodyWrapper(): bool
814
    {
815 99
        return $this->isDOMDocumentCreatedWithoutBodyWrapper;
816
    }
817
818
    /**
819
     * @return bool
820
     */
821 99
    public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool
822
    {
823 99
        return $this->isDOMDocumentCreatedWithoutHtmlWrapper;
824
    }
825
826
    /**
827
     * @return bool
828
     */
829 99
    public function getIsDOMDocumentCreatedWithoutWrapper(): bool
830
    {
831 99
        return $this->isDOMDocumentCreatedWithoutWrapper;
832
    }
833
834
    /**
835
     * @return bool
836
     */
837 99
    public function getIsDOMDocumentCreatedWithFakeEndScript(): bool
838
    {
839 99
        return $this->isDOMDocumentCreatedWithFakeEndScript;
840
    }
841
842
    /**
843
     * @param string $html
844
     *
845
     * @return string
846
     */
847 3
    protected function keepBrokenHtml(string $html): string
848
    {
849
        do {
850 3
            $original = $html;
851
852 3
            $html = (string) \preg_replace_callback(
853 3
                '/(?<start>.*)<(?<element_start>[a-z]+)(?<element_start_addon> [^>]*)?>(?<value>.*?)<\/(?<element_end>\2)>(?<end>.*)/sui',
854
                static function ($matches) {
855 3
                    return $matches['start'] .
856 3
                           '°lt_simple_html_dom__voku_°' . $matches['element_start'] . $matches['element_start_addon'] . '°gt_simple_html_dom__voku_°' .
857 3
                           $matches['value'] .
858 3
                           '°lt/_simple_html_dom__voku_°' . $matches['element_end'] . '°gt_simple_html_dom__voku_°' .
859 3
                           $matches['end'];
860 3
                },
861 3
                $html
862
            );
863 3
        } while ($original !== $html);
864
865
        do {
866 3
            $original = $html;
867
868 3
            $html = (string) \preg_replace_callback(
869 3
                '/(?<start>[^<]*)?(?<broken>(?:(?:<\/\w+(?:\s+\w+=\\"[^\"]+\\")*+)(?:[^<]+)>)+)(?<end>.*)/u',
870
                static function ($matches) {
871 3
                    $matches['broken'] = \str_replace(
872 3
                        ['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'],
873 3
                        ['</', '<', '>'],
874 3
                        $matches['broken']
875
                    );
876
877 3
                    self::$domBrokenReplaceHelper['orig'][] = $matches['broken'];
878 3
                    self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = self::$domHtmlBrokenHtmlHelper . \crc32($matches['broken']);
879
880 3
                    return $matches['start'] . $matchesHash . $matches['end'];
881 3
                },
882 3
                $html
883
            );
884 3
        } while ($original !== $html);
885
886 3
        return \str_replace(
887 3
            ['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'],
888 3
            ['</', '<', '>'],
889 3
            $html
890
        );
891
    }
892
893
    /**
894
     * @param string $html
895
     */
896 2
    protected function keepSpecialScriptTags(string &$html)
897
    {
898
        // regEx for e.g.: [<script id="elements-image-1" type="text/html">...</script>]
899 2
        $html = (string) \preg_replace_callback(
900 2
            '/(?<start>((?:<script) [^>]*type=(?:["\'])?(?:text\/html|text\/x-custom-template)+(?:[^>]*)>))(?<innerContent>.*)(?<end><\/script>)/isU',
901
            static function ($matches) {
902
                if (
903 2
                    \strpos($matches['innerContent'], '+') === false
904
                    &&
905 2
                    \strpos($matches['innerContent'], '<%') === false
906
                    &&
907 2
                    \strpos($matches['innerContent'], '{%') === false
908
                    &&
909 2
                    \strpos($matches['innerContent'], '{{') === false
910
                ) {
911
                    // remove the html5 fallback
912 1
                    $matches[0] = \str_replace('<\/', '</', $matches[0]);
913
914 1
                    $specialNonScript = '<' . self::$domHtmlSpecialScriptHelper . \substr($matches[0], \strlen('<script'));
915
916 1
                    return \substr($specialNonScript, 0, -\strlen('</script>')) . '</' . self::$domHtmlSpecialScriptHelper . '>';
917
                }
918
919
                // remove the html5 fallback
920 1
                $matches['innerContent'] = \str_replace('<\/', '</', $matches['innerContent']);
921
922 1
                self::$domBrokenReplaceHelper['orig'][] = $matches['innerContent'];
923 1
                self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = '' . self::$domHtmlBrokenHtmlHelper . '' . \crc32($matches['innerContent']);
924
925 1
                return $matches['start'] . $matchesHash . $matches['end'];
926 2
            },
927 2
            $html
928
        );
929 2
    }
930
931
    /**
932
     * @param bool $keepBrokenHtml
933
     *
934
     * @return HtmlDomParser
935
     */
936 3
    public function useKeepBrokenHtml(bool $keepBrokenHtml): DomParserInterface
937
    {
938 3
        $this->keepBrokenHtml = $keepBrokenHtml;
939
940 3
        return $this;
941
    }
942
}
943