Passed
Pull Request — master (#217)
by
unknown
02:59
created

Dom::getElementsByTag()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 186
    public function load(string $str, array $options = []): Dom
149
    {
150 186
        AbstractNode::resetCount();
151
        // check if it's a file
152 186
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 180
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 180
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 249
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 249
        $this->options = new Options;
218 249
        $this->options->setOptions($this->globalOptions)
219 249
                      ->setOptions($option);
220
221 249
        $this->rawSize = strlen($str);
222 249
        $this->raw     = $str;
223
224 249
        $html = $this->clean($str);
225
226 249
        $this->size    = strlen($str);
227 249
        $this->content = new Content($html);
228
229 249
        $this->parse();
230 243
        $this->detectCharset();
231
232 243
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 57
    public function setOptions(array $options): Dom
243
    {
244 57
        $this->globalOptions = $options;
245
246 57
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 249
    protected function clean(string $str): string
516
    {
517 249
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 9
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
        // remove white space before closing tags
531 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
532 240
        if ($str === false) {
533
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
534
        }
535 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
536 240
        if ($str === false) {
537
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
538
        }
539
540
        // clean out the \n\r
541 240
        $replace = ' ';
542 240
        if ($this->options->get('preserveLineBreaks')) {
543 3
            $replace = '&#10;';
544
        }
545 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
546 240
        if ($str === false) {
547
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
548
        }
549
550
        // strip the doctype
551 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
552 240
        if ($str === false) {
553
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
554
        }
555
556
        // strip out comments
557 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
558 240
        if ($str === false) {
559
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
560
        }
561
562
        // strip out cdata
563 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
564 240
        if ($str === false) {
565
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
566
        }
567
568
        // strip out <script> tags
569 240
        if ($this->options->get('removeScripts')) {
570 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
571 237
            if ($str === false) {
572
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
573
            }
574 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
575 237
            if ($str === false) {
576
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
577
            }
578
        }
579
580
        // strip out <style> tags
581 240
        if ($this->options->get('removeStyles')) {
582 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
583 237
            if ($str === false) {
584
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
585
            }
586 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
587 237
            if ($str === false) {
588
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
589
            }
590
        }
591
592
        // strip out server side scripts
593 240
        if ($this->options->get('serverSideScripts')) {
594
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
595
            if ($str === false) {
596
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
597
            }
598
        }
599
600
        // strip smarty scripts
601 240
        if ($this->options->get('removeSmartyScripts')) {
602 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
603 237
            if ($str === false) {
604
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
605
            }
606
        }
607
608 240
        return $str;
609
    }
610
611
    /**
612
     * Attempts to parse the html in content.
613
     *
614
     * @return void
615
     * @throws ChildNotFoundException
616
     * @throws CircularException
617
     * @throws StrictException
618
     */
619 249
    protected function parse(): void
620
    {
621
        // add the root node
622 249
        $this->root = new HtmlNode('root');
623 249
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
624 249
        $activeNode = $this->root;
625 249
        while ( ! is_null($activeNode)) {
626 249
            if ($activeNode && $activeNode->tag->name() === 'script'
627 249
                && $this->options->get('cleanupInput') != true
628
            ) {
629 6
                $str = $this->content->copyUntil('</');
630
            } else {
631 249
                $str = $this->content->copyUntil('<');
632
            }
633 249
            if ($str == '') {
634 249
                $info = $this->parseTag();
635 249
                if ( ! $info['status']) {
636
                    // we are done here
637 243
                    $activeNode = null;
638 243
                    continue;
639
                }
640
641
                // check if it was a closing tag
642 243
                if ($info['closing']) {
643 234
                    $foundOpeningTag  = true;
644 234
                    $originalNode     = $activeNode;
645 234
                    while ($activeNode->getTag()->name() != $info['tag']) {
646 78
                        $activeNode = $activeNode->getParent();
647 78
                        if (is_null($activeNode)) {
648
                            // we could not find opening tag
649 36
                            $activeNode = $originalNode;
650 36
                            $foundOpeningTag = false;
651 36
                            break;
652
                        }
653
                    }
654 234
                    if ($foundOpeningTag) {
655 234
                        $activeNode = $activeNode->getParent();
656
                    }
657 234
                    continue;
658
                }
659
660 243
                if ( ! isset($info['node'])) {
661 12
                    continue;
662
                }
663
664
                /** @var AbstractNode $node */
665 243
                $node = $info['node'];
666 243
                $activeNode->addChild($node);
667
668
                // check if node is self closing
669 243
                if ( ! $node->getTag()->isSelfClosing()) {
670 243
                    $activeNode = $node;
671
                }
672 228
            } else if ($this->options->whitespaceTextNode ||
673 228
                trim($str) != ''
674
            ) {
675
                // we found text we care about
676 225
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
677 225
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
678 225
                $activeNode->addChild($textNode);
679
            }
680
        }
681 243
    }
682
683
    /**
684
     * Attempt to parse a tag out of the content.
685
     *
686
     * @return array
687
     * @throws StrictException
688
     */
689 249
    protected function parseTag(): array
690
    {
691
        $return = [
692 249
            'status'  => false,
693
            'closing' => false,
694
            'node'    => null,
695
        ];
696 249
        if ($this->content->char() != '<') {
697
            // we are not at the beginning of a tag
698 240
            return $return;
699
        }
700
701
        // check if this is a closing tag
702 243
        if ($this->content->fastForward(1)->char() == '/') {
703
            // end tag
704 234
            $tag = $this->content->fastForward(1)
705 234
                                 ->copyByToken('slash', true);
706
            // move to end of tag
707 234
            $this->content->copyUntil('>');
708 234
            $this->content->fastForward(1);
709
710
            // check if this closing tag counts
711 234
            $tag = strtolower($tag);
712 234
            if (in_array($tag, $this->selfClosing, true)) {
713 12
                $return['status'] = true;
714
715 12
                return $return;
716
            } else {
717 234
                $return['status']  = true;
718 234
                $return['closing'] = true;
719 234
                $return['tag']     = strtolower($tag);
720
            }
721
722 234
            return $return;
723
        }
724
725 243
        $tag  = strtolower($this->content->copyByToken('slash', true));
726 243
        if (trim($tag) == '')
727
        {
728
            // no tag found, invalid < found
729 3
            return $return;
730
        }
731 243
        $node = new HtmlNode($tag);
732 243
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
733
734
        // attributes
735 243
        while ($this->content->char() != '>' &&
736 243
            $this->content->char() != '/') {
737 234
            $space = $this->content->skipByToken('blank', true);
738 234
            if (empty($space)) {
739 6
                $this->content->fastForward(1);
740 6
                continue;
741
            }
742
743 234
            $name = $this->content->copyByToken('equal', true);
744 234
            if ($name == '/') {
745
                break;
746
            }
747
748 234
            if (empty($name)) {
749 123
				$this->content->skipByToken('blank');
750 123
				continue;
751
            }
752
753 231
            $this->content->skipByToken('blank');
754 231
            if ($this->content->char() == '=') {
755 231
                $attr = [];
756 231
                $this->content->fastForward(1)
757 231
                              ->skipByToken('blank');
758 231
                switch ($this->content->char()) {
759 231
                    case '"':
760 216
                        $attr['doubleQuote'] = true;
761 216
                        $this->content->fastForward(1);
762 216
                        $string = $this->content->copyUntil('"', true);
763
                        do {
764 216
                            $moreString = $this->content->copyUntilUnless('"', '=>');
765 216
                            $string .= $moreString;
766 216
                        } while ( ! empty($moreString));
767 216
                        $attr['value'] = $string;
768 216
                        $this->content->fastForward(1);
769 216
                        $node->getTag()->$name = $attr;
770 216
                        break;
771 18
                    case "'":
772 18
                        $attr['doubleQuote'] = false;
773 18
                        $this->content->fastForward(1);
774 18
                        $string = $this->content->copyUntil("'", true);
775
                        do {
776 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
777 18
                            $string .= $moreString;
778 18
                        } while ( ! empty($moreString));
779 18
                        $attr['value'] = $string;
780 18
                        $this->content->fastForward(1);
781 18
                        $node->getTag()->$name = $attr;
782 18
                        break;
783
                    default:
784
                        $attr['doubleQuote']   = true;
785
                        $attr['value']         = $this->content->copyByToken('attr', true);
786
                        $node->getTag()->$name = $attr;
787 231
                        break;
788
                }
789
            } else {
790
                // no value attribute
791 72
                if ($this->options->strict) {
792
                    // can't have this in strict html
793 3
                    $character = $this->content->getPosition();
794 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
795
                }
796 69
                $node->getTag()->$name = [
797
                    'value'       => null,
798
                    'doubleQuote' => true,
799
                ];
800 69
                if ($this->content->char() != '>') {
801 12
                    $this->content->rewind(1);
802
                }
803
            }
804
        }
805
806 243
        $this->content->skipByToken('blank');
807 243
        $tag = strtolower($tag);
808 243
        if ($this->content->char() == '/') {
809
            // self closing tag
810 120
            $node->getTag()->selfClosing();
811 120
            $this->content->fastForward(1);
812 240
        } elseif (in_array($tag, $this->selfClosing, true)) {
813
814
            // Should be a self closing tag, check if we are strict
815 84
            if ($this->options->strict) {
816 3
                $character = $this->content->getPosition();
817 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
818
            }
819
820
            // We force self closing on this tag.
821 81
            $node->getTag()->selfClosing();
822
823
            // Should this tag use a trailing slash?
824 81
            if(in_array($tag, $this->noSlash, true))
825
            {
826 3
                $node->getTag()->noTrailingSlash();
827
            }
828
829
        }
830
831 243
        $this->content->fastForward(1);
832
833 243
        $return['status'] = true;
834 243
        $return['node']   = $node;
835
836 243
        return $return;
837
    }
838
839
    /**
840
     * Attempts to detect the charset that the html was sent in.
841
     *
842
     * @return bool
843
     * @throws ChildNotFoundException
844
     */
845 243
    protected function detectCharset(): bool
846
    {
847
        // set the default
848 243
        $encode = new Encode;
849 243
        $encode->from($this->defaultCharset);
850 243
        $encode->to($this->defaultCharset);
851
852 243
        $enforceEncoding = $this->options->enforceEncoding;
853 243
        if ( ! is_null($enforceEncoding)) {
854
            //  they want to enforce the given encoding
855
            $encode->from($enforceEncoding);
856
            $encode->to($enforceEncoding);
857
858
            return false;
859
        }
860
861
        /** @var AbstractNode $meta */
862 243
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
863 243
        if (is_null($meta)) {
864
            // could not find meta tag
865 213
            $this->root->propagateEncoding($encode);
866
867 213
            return false;
868
        }
869 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
870 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
871
            // could not find content
872
            $this->root->propagateEncoding($encode);
873
874
            return false;
875
        }
876 30
        $matches = [];
877 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
878 30
            $encode->from(trim($matches[1]));
879 30
            $this->root->propagateEncoding($encode);
880
881 30
            return true;
882
        }
883
884
        // no charset found
885
        $this->root->propagateEncoding($encode);
886
887
        return false;
888
    }
889
}
890