Passed
Push — master ( 12b94f...668c77 )
by Gilles
03:31
created

Dom::getElementsByClass()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 183
    public function load(string $str, array $options = []): Dom
149
    {
150 183
        AbstractNode::resetCount();
151
        // check if it's a file
152 183
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 177
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 177
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 246
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 246
        $this->options = new Options;
218 246
        $this->options->setOptions($this->globalOptions)
219 246
                      ->setOptions($option);
220
221 246
        $this->rawSize = strlen($str);
222 246
        $this->raw     = $str;
223
224 246
        $html = $this->clean($str);
225
226 246
        $this->size    = strlen($str);
227 246
        $this->content = new Content($html);
228
229 246
        $this->parse();
230 240
        $this->detectCharset();
231
232 240
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 246
    protected function clean(string $str): string
516
    {
517 246
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 6
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
        // remove white space before closing tags
531 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
532 240
        if ($str === false) {
533
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
534
        }
535 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
536 240
        if ($str === false) {
537
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
538
        }
539
540
        // clean out the \n\r
541 240
        $replace = ' ';
542 240
        if ($this->options->get('preserveLineBreaks')) {
543 3
            $replace = '&#10;';
544
        }
545 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
546 240
        if ($str === false) {
547
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
548
        }
549
550
        // strip the doctype
551 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
552 240
        if ($str === false) {
553
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
554
        }
555
556
        // strip out comments
557 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
558 240
        if ($str === false) {
559
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
560
        }
561
562
        // strip out cdata
563 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
564 240
        if ($str === false) {
565
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
566
        }
567
568
        // strip out <script> tags
569 240
        if ($this->options->get('removeScripts')) {
570 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
571 237
            if ($str === false) {
572
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
573
            }
574 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
575 237
            if ($str === false) {
576
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
577
            }
578
        }
579
580
        // strip out <style> tags
581 240
        if ($this->options->get('removeStyles')) {
582 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
583 237
            if ($str === false) {
584
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
585
            }
586 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
587 237
            if ($str === false) {
588
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
589
            }
590
        }
591
592
        // strip out server side scripts
593 240
        if ($this->options->get('serverSideScripts')) {
594
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
595
            if ($str === false) {
596
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
597
            }
598
        }
599
600
        // strip smarty scripts
601 240
        if ($this->options->get('removeSmartyScripts')) {
602 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
603 237
            if ($str === false) {
604
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
605
            }
606
        }
607
608 240
        return $str;
609
    }
610
611
    /**
612
     * Attempts to parse the html in content.
613
     *
614
     * @return void
615
     * @throws ChildNotFoundException
616
     * @throws CircularException
617
     * @throws StrictException
618
     */
619 246
    protected function parse(): void
620
    {
621
        // add the root node
622 246
        $this->root = new HtmlNode('root');
623 246
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
624 246
        $activeNode = $this->root;
625 246
        while ( ! is_null($activeNode)) {
626 246
            $str = $this->content->copyUntil('<');
627 246
            if ($str == '') {
628 246
                $info = $this->parseTag();
629 246
                if ( ! $info['status']) {
630
                    // we are done here
631 240
                    $activeNode = null;
632 240
                    continue;
633
                }
634
635
                // check if it was a closing tag
636 240
                if ($info['closing']) {
637 231
                    $foundOpeningTag  = true;
638 231
                    $originalNode     = $activeNode;
639 231
                    while ($activeNode->getTag()->name() != $info['tag']) {
640 78
                        $activeNode = $activeNode->getParent();
641 78
                        if (is_null($activeNode)) {
642
                            // we could not find opening tag
643 36
                            $activeNode = $originalNode;
644 36
                            $foundOpeningTag = false;
645 36
                            break;
646
                        }
647
                    }
648 231
                    if ($foundOpeningTag) {
649 231
                        $activeNode = $activeNode->getParent();
650
                    }
651 231
                    continue;
652
                }
653
654 240
                if ( ! isset($info['node'])) {
655 12
                    continue;
656
                }
657
658
                /** @var AbstractNode $node */
659 240
                $node = $info['node'];
660 240
                $activeNode->addChild($node);
661
662
                // check if node is self closing
663 240
                if ( ! $node->getTag()->isSelfClosing()) {
664 240
                    $activeNode = $node;
665
                }
666 225
            } else if ($this->options->whitespaceTextNode ||
667 225
                trim($str) != ''
668
            ) {
669
                // we found text we care about
670 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
671 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
672 222
                $activeNode->addChild($textNode);
673
            }
674
        }
675 240
    }
676
677
    /**
678
     * Attempt to parse a tag out of the content.
679
     *
680
     * @return array
681
     * @throws StrictException
682
     */
683 246
    protected function parseTag(): array
684
    {
685
        $return = [
686 246
            'status'  => false,
687
            'closing' => false,
688
            'node'    => null,
689
        ];
690 246
        if ($this->content->char() != '<') {
691
            // we are not at the beginning of a tag
692 237
            return $return;
693
        }
694
695
        // check if this is a closing tag
696 240
        if ($this->content->fastForward(1)->char() == '/') {
697
            // end tag
698 231
            $tag = $this->content->fastForward(1)
699 231
                                 ->copyByToken('slash', true);
700
            // move to end of tag
701 231
            $this->content->copyUntil('>');
702 231
            $this->content->fastForward(1);
703
704
            // check if this closing tag counts
705 231
            $tag = strtolower($tag);
706 231
            if (in_array($tag, $this->selfClosing, true)) {
707 12
                $return['status'] = true;
708
709 12
                return $return;
710
            } else {
711 231
                $return['status']  = true;
712 231
                $return['closing'] = true;
713 231
                $return['tag']     = strtolower($tag);
714
            }
715
716 231
            return $return;
717
        }
718
719 240
        $tag  = strtolower($this->content->copyByToken('slash', true));
720 240
        if (trim($tag) == '')
721
        {
722
            // no tag found, invalid < found
723 3
            return $return;
724
        }
725 240
        $node = new HtmlNode($tag);
726 240
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
727
728
        // attributes
729 240
        while ($this->content->char() != '>' &&
730 240
            $this->content->char() != '/') {
731 231
            $space = $this->content->skipByToken('blank', true);
732 231
            if (empty($space)) {
733 6
                $this->content->fastForward(1);
734 6
                continue;
735
            }
736
737 231
            $name = $this->content->copyByToken('equal', true);
738 231
            if ($name == '/') {
739
                break;
740
            }
741
742 231
            if (empty($name)) {
743 123
				$this->content->skipByToken('blank');
744 123
				continue;
745
            }
746
747 228
            $this->content->skipByToken('blank');
748 228
            if ($this->content->char() == '=') {
749 228
                $attr = [];
750 228
                $this->content->fastForward(1)
751 228
                              ->skipByToken('blank');
752 228
                switch ($this->content->char()) {
753 228
                    case '"':
754 213
                        $attr['doubleQuote'] = true;
755 213
                        $this->content->fastForward(1);
756 213
                        $string = $this->content->copyUntil('"', true);
757
                        do {
758 213
                            $moreString = $this->content->copyUntilUnless('"', '=>');
759 213
                            $string .= $moreString;
760 213
                        } while ( ! empty($moreString));
761 213
                        $attr['value'] = $string;
762 213
                        $this->content->fastForward(1);
763 213
                        $node->getTag()->$name = $attr;
764 213
                        break;
765 21
                    case "'":
766 18
                        $attr['doubleQuote'] = false;
767 18
                        $this->content->fastForward(1);
768 18
                        $string = $this->content->copyUntil("'", true);
769
                        do {
770 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
771 18
                            $string .= $moreString;
772 18
                        } while ( ! empty($moreString));
773 18
                        $attr['value'] = $string;
774 18
                        $this->content->fastForward(1);
775 18
                        $node->getTag()->$name = $attr;
776 18
                        break;
777
                    default:
778 3
                        $attr['doubleQuote']   = true;
779 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
780 3
                        $node->getTag()->$name = $attr;
781 228
                        break;
782
                }
783
            } else {
784
                // no value attribute
785 72
                if ($this->options->strict) {
786
                    // can't have this in strict html
787 3
                    $character = $this->content->getPosition();
788 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
789
                }
790 69
                $node->getTag()->$name = [
791
                    'value'       => null,
792
                    'doubleQuote' => true,
793
                ];
794 69
                if ($this->content->char() != '>') {
795 12
                    $this->content->rewind(1);
796
                }
797
            }
798
        }
799
800 240
        $this->content->skipByToken('blank');
801 240
        $tag = strtolower($tag);
802 240
        if ($this->content->char() == '/') {
803
            // self closing tag
804 120
            $node->getTag()->selfClosing();
805 120
            $this->content->fastForward(1);
806 237
        } elseif (in_array($tag, $this->selfClosing, true)) {
807
808
            // Should be a self closing tag, check if we are strict
809 84
            if ($this->options->strict) {
810 3
                $character = $this->content->getPosition();
811 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
812
            }
813
814
            // We force self closing on this tag.
815 81
            $node->getTag()->selfClosing();
816
817
            // Should this tag use a trailing slash?
818 81
            if(in_array($tag, $this->noSlash, true))
819
            {
820 3
                $node->getTag()->noTrailingSlash();
821
            }
822
823
        }
824
825 240
        $this->content->fastForward(1);
826
827 240
        $return['status'] = true;
828 240
        $return['node']   = $node;
829
830 240
        return $return;
831
    }
832
833
    /**
834
     * Attempts to detect the charset that the html was sent in.
835
     *
836
     * @return bool
837
     * @throws ChildNotFoundException
838
     */
839 240
    protected function detectCharset(): bool
840
    {
841
        // set the default
842 240
        $encode = new Encode;
843 240
        $encode->from($this->defaultCharset);
844 240
        $encode->to($this->defaultCharset);
845
846 240
        $enforceEncoding = $this->options->enforceEncoding;
847 240
        if ( ! is_null($enforceEncoding)) {
848
            //  they want to enforce the given encoding
849
            $encode->from($enforceEncoding);
850
            $encode->to($enforceEncoding);
851
852
            return false;
853
        }
854
855
        /** @var AbstractNode $meta */
856 240
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
857 240
        if (is_null($meta)) {
858
            // could not find meta tag
859 210
            $this->root->propagateEncoding($encode);
860
861 210
            return false;
862
        }
863 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
864 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
865
            // could not find content
866
            $this->root->propagateEncoding($encode);
867
868
            return false;
869
        }
870 30
        $matches = [];
871 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
872 30
            $encode->from(trim($matches[1]));
873 30
            $this->root->propagateEncoding($encode);
874
875 30
            return true;
876
        }
877
878
        // no charset found
879
        $this->root->propagateEncoding($encode);
880
881
        return false;
882
    }
883
}
884