Completed
Pull Request — master (#212)
by
unknown
03:37
created

Dom::removeSelfClosingTag()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 1
dl 0
loc 8
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 183
    public function load(string $str, array $options = []): Dom
149
    {
150 183
        AbstractNode::resetCount();
151
        // check if it's a file
152 183
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 177
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 177
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 246
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 246
        $this->options = new Options;
218 246
        $this->options->setOptions($this->globalOptions)
219 246
                      ->setOptions($option);
220
221 246
        $this->rawSize = strlen($str);
222 246
        $this->raw     = $str;
223
224 246
        $html = $this->clean($str);
225
226 246
        $this->size    = strlen($str);
227 246
        $this->content = new Content($html);
228
229 246
        $this->parse();
230 240
        $this->detectCharset();
231
232 240
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 246
    protected function clean(string $str): string
516
    {
517 246
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 6
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
        //sometime need predicate an encode is from encoding 
531 240
        if ($this->options->get('useFromEncoding') != NULL) {
532
            $str = mb_convert_encoding( $str, "UTF-8", $this->options->get('useFromEncoding'));
533
        }
534
535
        // remove white space before closing tags
536 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
537 240
        if ($str === false) {
538
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
539
        }
540 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
541 240
        if ($str === false) {
542
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
543
        }
544
545
        // clean out the \n\r
546 240
        $replace = ' ';
547 240
        if ($this->options->get('preserveLineBreaks')) {
548 3
            $replace = '&#10;';
549
        }
550 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
551 240
        if ($str === false) {
552
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
553
        }
554
555
        // strip the doctype
556 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
557 240
        if ($str === false) {
558
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
559
        }
560
561
        // strip out comments
562 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
563 240
        if ($str === false) {
564
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
565
        }
566
567
        // strip out cdata
568 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
569 240
        if ($str === false) {
570
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
571
        }
572
573
        // strip out <script> tags
574 240
        if ($this->options->get('removeScripts')) {
575 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
576 237
            if ($str === false) {
577
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
578
            }
579 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
580 237
            if ($str === false) {
581
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
582
            }
583
        }
584
585
        // strip out <style> tags
586 240
        if ($this->options->get('removeStyles')) {
587 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
588 237
            if ($str === false) {
589
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
590
            }
591 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
592 237
            if ($str === false) {
593
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
594
            }
595
        }
596
597
        // strip out server side scripts
598 240
        if ($this->options->get('serverSideScripts')) {
599
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
600
            if ($str === false) {
601
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
602
            }
603
        }
604
605
        // strip smarty scripts
606 240
        if ($this->options->get('removeSmartyScripts')) {
607 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
608 237
            if ($str === false) {
609
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
610
            }
611
        }
612
613 240
        return $str;
614
    }
615
616
    /**
617
     * Attempts to parse the html in content.
618
     *
619
     * @return void
620
     * @throws ChildNotFoundException
621
     * @throws CircularException
622
     * @throws StrictException
623
     */
624 246
    protected function parse(): void
625
    {
626
        // add the root node
627 246
        $this->root = new HtmlNode('root');
628 246
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
629 246
        $activeNode = $this->root;
630 246
        while ( ! is_null($activeNode)) {
631 246
            $str = $this->content->copyUntil('<');
632 246
            if ($str == '') {
633 246
                $info = $this->parseTag();
634 246
                if ( ! $info['status']) {
635
                    // we are done here
636 240
                    $activeNode = null;
637 240
                    continue;
638
                }
639
640
                // check if it was a closing tag
641 240
                if ($info['closing']) {
642 231
                    $foundOpeningTag  = true;
643 231
                    $originalNode     = $activeNode;
644 231
                    while ($activeNode->getTag()->name() != $info['tag']) {
645 78
                        $activeNode = $activeNode->getParent();
646 78
                        if (is_null($activeNode)) {
647
                            // we could not find opening tag
648 36
                            $activeNode = $originalNode;
649 36
                            $foundOpeningTag = false;
650 36
                            break;
651
                        }
652
                    }
653 231
                    if ($foundOpeningTag) {
654 231
                        $activeNode = $activeNode->getParent();
655
                    }
656 231
                    continue;
657
                }
658
659 240
                if ( ! isset($info['node'])) {
660 12
                    continue;
661
                }
662
663
                /** @var AbstractNode $node */
664 240
                $node = $info['node'];
665 240
                $activeNode->addChild($node);
666
667
                // check if node is self closing
668 240
                if ( ! $node->getTag()->isSelfClosing()) {
669 240
                    $activeNode = $node;
670
                }
671 225
            } else if ($this->options->whitespaceTextNode ||
672 225
                trim($str) != ''
673
            ) {
674
                // we found text we care about
675 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
676 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
677 222
                $activeNode->addChild($textNode);
678
            }
679
        }
680 240
    }
681
682
    /**
683
     * Attempt to parse a tag out of the content.
684
     *
685
     * @return array
686
     * @throws StrictException
687
     */
688 246
    protected function parseTag(): array
689
    {
690
        $return = [
691 246
            'status'  => false,
692
            'closing' => false,
693
            'node'    => null,
694
        ];
695 246
        if ($this->content->char() != '<') {
696
            // we are not at the beginning of a tag
697 237
            return $return;
698
        }
699
700
        // check if this is a closing tag
701 240
        if ($this->content->fastForward(1)->char() == '/') {
702
            // end tag
703 231
            $tag = $this->content->fastForward(1)
704 231
                                 ->copyByToken('slash', true);
705
            // move to end of tag
706 231
            $this->content->copyUntil('>');
707 231
            $this->content->fastForward(1);
708
709
            // check if this closing tag counts
710 231
            $tag = strtolower($tag);
711 231
            if (in_array($tag, $this->selfClosing, true)) {
712 12
                $return['status'] = true;
713
714 12
                return $return;
715
            } else {
716 231
                $return['status']  = true;
717 231
                $return['closing'] = true;
718 231
                $return['tag']     = strtolower($tag);
719
            }
720
721 231
            return $return;
722
        }
723
724 240
        $tag  = strtolower($this->content->copyByToken('slash', true));
725 240
        if (trim($tag) == '')
726
        {
727
            // no tag found, invalid < found
728 3
            return $return;
729
        }
730 240
        $node = new HtmlNode($tag);
731 240
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
732
733
        // attributes
734 240
        while ($this->content->char() != '>' &&
735 240
            $this->content->char() != '/') {
736 231
            $space = $this->content->skipByToken('blank', true);
737 231
            if (empty($space)) {
738 6
                $this->content->fastForward(1);
739 6
                continue;
740
            }
741
742 231
            $name = $this->content->copyByToken('equal', true);
743 231
            if ($name == '/') {
744
                break;
745
            }
746
747 231
            if (empty($name)) {
748 123
				$this->content->skipByToken('blank');
749 123
				continue;
750
            }
751
752 228
            $this->content->skipByToken('blank');
753 228
            if ($this->content->char() == '=') {
754 228
                $attr = [];
755 228
                $this->content->fastForward(1)
756 228
                              ->skipByToken('blank');
757 228
                switch ($this->content->char()) {
758 228
                    case '"':
759 213
                        $attr['doubleQuote'] = true;
760 213
                        $this->content->fastForward(1);
761 213
                        $string = $this->content->copyUntil('"', true);
762
                        do {
763 213
                            $moreString = $this->content->copyUntilUnless('"', '=>');
764 213
                            $string .= $moreString;
765 213
                        } while ( ! empty($moreString));
766 213
                        $attr['value'] = $string;
767 213
                        $this->content->fastForward(1);
768 213
                        $node->getTag()->$name = $attr;
769 213
                        break;
770 21
                    case "'":
771 18
                        $attr['doubleQuote'] = false;
772 18
                        $this->content->fastForward(1);
773 18
                        $string = $this->content->copyUntil("'", true);
774
                        do {
775 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
776 18
                            $string .= $moreString;
777 18
                        } while ( ! empty($moreString));
778 18
                        $attr['value'] = $string;
779 18
                        $this->content->fastForward(1);
780 18
                        $node->getTag()->$name = $attr;
781 18
                        break;
782
                    default:
783 3
                        $attr['doubleQuote']   = true;
784 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
785 3
                        $node->getTag()->$name = $attr;
786 228
                        break;
787
                }
788
            } else {
789
                // no value attribute
790 72
                if ($this->options->strict) {
791
                    // can't have this in strict html
792 3
                    $character = $this->content->getPosition();
793 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
794
                }
795 69
                $node->getTag()->$name = [
796
                    'value'       => null,
797
                    'doubleQuote' => true,
798
                ];
799 69
                if ($this->content->char() != '>') {
800 12
                    $this->content->rewind(1);
801
                }
802
            }
803
        }
804
805 240
        $this->content->skipByToken('blank');
806 240
        $tag = strtolower($tag);
807 240
        if ($this->content->char() == '/') {
808
            // self closing tag
809 120
            $node->getTag()->selfClosing();
810 120
            $this->content->fastForward(1);
811 237
        } elseif (in_array($tag, $this->selfClosing, true)) {
812
813
            // Should be a self closing tag, check if we are strict
814 84
            if ($this->options->strict) {
815 3
                $character = $this->content->getPosition();
816 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
817
            }
818
819
            // We force self closing on this tag.
820 81
            $node->getTag()->selfClosing();
821
822
            // Should this tag use a trailing slash?
823 81
            if(in_array($tag, $this->noSlash, true))
824
            {
825 3
                $node->getTag()->noTrailingSlash();
826
            }
827
828
        }
829
830 240
        $this->content->fastForward(1);
831
832 240
        $return['status'] = true;
833 240
        $return['node']   = $node;
834
835 240
        return $return;
836
    }
837
838
    /**
839
     * Attempts to detect the charset that the html was sent in.
840
     *
841
     * @return bool
842
     * @throws ChildNotFoundException
843
     */
844 240
    protected function detectCharset(): bool
845
    {
846
        // set the default
847 240
        $encode = new Encode;
848 240
        $encode->from($this->defaultCharset);
849 240
        $encode->to($this->defaultCharset);
850
851 240
        $enforceEncoding = $this->options->enforceEncoding;
852 240
        if ( ! is_null($enforceEncoding)) {
853
            //  they want to enforce the given encoding
854
            $encode->from($enforceEncoding);
855
            $encode->to($enforceEncoding);
856
857
            return false;
858
        }
859
860
        /** @var AbstractNode $meta */
861 240
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
862 240
        if (is_null($meta)) {
863
            // could not find meta tag
864 210
            $this->root->propagateEncoding($encode);
865
866 210
            return false;
867
        }
868 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
869 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
870
            // could not find content
871
            $this->root->propagateEncoding($encode);
872
873
            return false;
874
        }
875 30
        $matches = [];
876 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
877 30
            $encode->from(trim($matches[1]));
878 30
            $this->root->propagateEncoding($encode);
879
880 30
            return true;
881
        }
882
883
        // no charset found
884
        $this->root->propagateEncoding($encode);
885
886
        return false;
887
    }
888
}
889