Passed
Pull Request — master (#218)
by
unknown
03:03
created

Dom::getChildren()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 183
    public function load(string $str, array $options = []): Dom
149
    {
150 183
        AbstractNode::resetCount();
151
        // check if it's a file
152 183
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 177
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 177
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 246
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 246
        $this->options = new Options;
218 246
        $this->options->setOptions($this->globalOptions)
219 246
                      ->setOptions($option);
220
221 246
        $this->rawSize = strlen($str);
222 246
        $this->raw     = $str;
223
224 246
        $html = $this->clean($str);
225
226 246
        $this->size    = strlen($str);
227 246
        $this->content = new Content($html);
228
229 246
        $this->parse();
230 240
        $this->detectCharset();
231
232 240
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 246
    protected function clean(string $str): string
516
    {
517 246
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 6
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
	// Fix #190
531 240
	$str = mb_eregi_replace("'\s+>", "'>", utf8_encode($str));
532
	    
533
        // remove white space before closing tags
534 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
535 240
        if ($str === false) {
536
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
537
        }
538 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
539 240
        if ($str === false) {
540
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
541
        }
542
543
        // clean out the \n\r
544 240
        $replace = ' ';
545 240
        if ($this->options->get('preserveLineBreaks')) {
546 3
            $replace = '&#10;';
547
        }
548 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
549 240
        if ($str === false) {
550
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
551
        }
552
553
        // strip the doctype
554 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
555 240
        if ($str === false) {
556
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
557
        }
558
559
        // strip out comments
560 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
561 240
        if ($str === false) {
562
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
563
        }
564
565
        // strip out cdata
566 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
567 240
        if ($str === false) {
568
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
569
        }
570
571
        // strip out <script> tags
572 240
        if ($this->options->get('removeScripts')) {
573 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
574 237
            if ($str === false) {
575
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
576
            }
577 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
578 237
            if ($str === false) {
579
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
580
            }
581
        }
582
583
        // strip out <style> tags
584 240
        if ($this->options->get('removeStyles')) {
585 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
586 237
            if ($str === false) {
587
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
588
            }
589 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
590 237
            if ($str === false) {
591
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
592
            }
593
        }
594
595
        // strip out server side scripts
596 240
        if ($this->options->get('serverSideScripts')) {
597
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
598
            if ($str === false) {
599
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
600
            }
601
        }
602
603
        // strip smarty scripts
604 240
        if ($this->options->get('removeSmartyScripts')) {
605 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
606 237
            if ($str === false) {
607
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
608
            }
609
        }
610
611 240
        return $str;
612
    }
613
614
    /**
615
     * Attempts to parse the html in content.
616
     *
617
     * @return void
618
     * @throws ChildNotFoundException
619
     * @throws CircularException
620
     * @throws StrictException
621
     */
622 246
    protected function parse(): void
623
    {
624
        // add the root node
625 246
        $this->root = new HtmlNode('root');
626 246
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
627 246
        $activeNode = $this->root;
628 246
        while ( ! is_null($activeNode)) {
629 246
            $str = $this->content->copyUntil('<');
630 246
            if ($str == '') {
631 246
                $info = $this->parseTag();
632 246
                if ( ! $info['status']) {
633
                    // we are done here
634 240
                    $activeNode = null;
635 240
                    continue;
636
                }
637
638
                // check if it was a closing tag
639 240
                if ($info['closing']) {
640 231
                    $foundOpeningTag  = true;
641 231
                    $originalNode     = $activeNode;
642 231
                    while ($activeNode->getTag()->name() != $info['tag']) {
643 78
                        $activeNode = $activeNode->getParent();
644 78
                        if (is_null($activeNode)) {
645
                            // we could not find opening tag
646 36
                            $activeNode = $originalNode;
647 36
                            $foundOpeningTag = false;
648 36
                            break;
649
                        }
650
                    }
651 231
                    if ($foundOpeningTag) {
652 231
                        $activeNode = $activeNode->getParent();
653
                    }
654 231
                    continue;
655
                }
656
657 240
                if ( ! isset($info['node'])) {
658 12
                    continue;
659
                }
660
661
                /** @var AbstractNode $node */
662 240
                $node = $info['node'];
663 240
                $activeNode->addChild($node);
664
665
                // check if node is self closing
666 240
                if ( ! $node->getTag()->isSelfClosing()) {
667 240
                    $activeNode = $node;
668
                }
669 225
            } else if ($this->options->whitespaceTextNode ||
670 225
                trim($str) != ''
671
            ) {
672
                // we found text we care about
673 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
674 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
675 222
                $activeNode->addChild($textNode);
676
            }
677
        }
678 240
    }
679
680
    /**
681
     * Attempt to parse a tag out of the content.
682
     *
683
     * @return array
684
     * @throws StrictException
685
     */
686 246
    protected function parseTag(): array
687
    {
688
        $return = [
689 246
            'status'  => false,
690
            'closing' => false,
691
            'node'    => null,
692
        ];
693 246
        if ($this->content->char() != '<') {
694
            // we are not at the beginning of a tag
695 237
            return $return;
696
        }
697
698
        // check if this is a closing tag
699 240
        if ($this->content->fastForward(1)->char() == '/') {
700
            // end tag
701 231
            $tag = $this->content->fastForward(1)
702 231
                                 ->copyByToken('slash', true);
703
            // move to end of tag
704 231
            $this->content->copyUntil('>');
705 231
            $this->content->fastForward(1);
706
707
            // check if this closing tag counts
708 231
            $tag = strtolower($tag);
709 231
            if (in_array($tag, $this->selfClosing, true)) {
710 12
                $return['status'] = true;
711
712 12
                return $return;
713
            } else {
714 231
                $return['status']  = true;
715 231
                $return['closing'] = true;
716 231
                $return['tag']     = strtolower($tag);
717
            }
718
719 231
            return $return;
720
        }
721
722 240
        $tag  = strtolower($this->content->copyByToken('slash', true));
723 240
        if (trim($tag) == '')
724
        {
725
            // no tag found, invalid < found
726 3
            return $return;
727
        }
728 240
        $node = new HtmlNode($tag);
729 240
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
730
731
        // attributes
732 240
        while ($this->content->char() != '>' &&
733 240
            $this->content->char() != '/') {
734 231
            $space = $this->content->skipByToken('blank', true);
735 231
            if (empty($space)) {
736 6
                $this->content->fastForward(1);
737 6
                continue;
738
            }
739
740 231
            $name = $this->content->copyByToken('equal', true);
741 231
            if ($name == '/') {
742
                break;
743
            }
744
745 231
            if (empty($name)) {
746 123
				$this->content->skipByToken('blank');
747 123
				continue;
748
            }
749
750 228
            $this->content->skipByToken('blank');
751 228
            if ($this->content->char() == '=') {
752 228
                $attr = [];
753 228
                $this->content->fastForward(1)
754 228
                              ->skipByToken('blank');
755 228
                switch ($this->content->char()) {
756 228
                    case '"':
757 213
                        $attr['doubleQuote'] = true;
758 213
                        $this->content->fastForward(1);
759 213
                        $string = $this->content->copyUntil('"', true);
760
                        do {
761 213
                            $moreString = $this->content->copyUntilUnless('"', '=>');
762 213
                            $string .= $moreString;
763 213
                        } while ( ! empty($moreString));
764 213
                        $attr['value'] = $string;
765 213
                        $this->content->fastForward(1);
766 213
                        $node->getTag()->$name = $attr;
767 213
                        break;
768 21
                    case "'":
769 18
                        $attr['doubleQuote'] = false;
770 18
                        $this->content->fastForward(1);
771 18
                        $string = $this->content->copyUntil("'", true);
772
                        do {
773 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
774 18
                            $string .= $moreString;
775 18
                        } while ( ! empty($moreString));
776 18
                        $attr['value'] = $string;
777 18
                        $this->content->fastForward(1);
778 18
                        $node->getTag()->$name = $attr;
779 18
                        break;
780
                    default:
781 3
                        $attr['doubleQuote']   = true;
782 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
783 3
                        $node->getTag()->$name = $attr;
784 228
                        break;
785
                }
786
            } else {
787
                // no value attribute
788 72
                if ($this->options->strict) {
789
                    // can't have this in strict html
790 3
                    $character = $this->content->getPosition();
791 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
792
                }
793 69
                $node->getTag()->$name = [
794
                    'value'       => null,
795
                    'doubleQuote' => true,
796
                ];
797 69
                if ($this->content->char() != '>') {
798 12
                    $this->content->rewind(1);
799
                }
800
            }
801
        }
802
803 240
        $this->content->skipByToken('blank');
804 240
        $tag = strtolower($tag);
805 240
        if ($this->content->char() == '/') {
806
            // self closing tag
807 120
            $node->getTag()->selfClosing();
808 120
            $this->content->fastForward(1);
809 237
        } elseif (in_array($tag, $this->selfClosing, true)) {
810
811
            // Should be a self closing tag, check if we are strict
812 84
            if ($this->options->strict) {
813 3
                $character = $this->content->getPosition();
814 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
815
            }
816
817
            // We force self closing on this tag.
818 81
            $node->getTag()->selfClosing();
819
820
            // Should this tag use a trailing slash?
821 81
            if(in_array($tag, $this->noSlash, true))
822
            {
823 3
                $node->getTag()->noTrailingSlash();
824
            }
825
826
        }
827
828 240
        $this->content->fastForward(1);
829
830 240
        $return['status'] = true;
831 240
        $return['node']   = $node;
832
833 240
        return $return;
834
    }
835
836
    /**
837
     * Attempts to detect the charset that the html was sent in.
838
     *
839
     * @return bool
840
     * @throws ChildNotFoundException
841
     */
842 240
    protected function detectCharset(): bool
843
    {
844
        // set the default
845 240
        $encode = new Encode;
846 240
        $encode->from($this->defaultCharset);
847 240
        $encode->to($this->defaultCharset);
848
849 240
        $enforceEncoding = $this->options->enforceEncoding;
850 240
        if ( ! is_null($enforceEncoding)) {
851
            //  they want to enforce the given encoding
852
            $encode->from($enforceEncoding);
853
            $encode->to($enforceEncoding);
854
855
            return false;
856
        }
857
858
        /** @var AbstractNode $meta */
859 240
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
860 240
        if (is_null($meta)) {
861
            // could not find meta tag
862 210
            $this->root->propagateEncoding($encode);
863
864 210
            return false;
865
        }
866 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
867 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
868
            // could not find content
869
            $this->root->propagateEncoding($encode);
870
871
            return false;
872
        }
873 30
        $matches = [];
874 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
875 30
            $encode->from(trim($matches[1]));
876 30
            $this->root->propagateEncoding($encode);
877
878 30
            return true;
879
        }
880
881
        // no charset found
882
        $this->root->propagateEncoding($encode);
883
884
        return false;
885
    }
886
}
887