Passed
Push — master ( 12b94f...668c77 )
by Gilles
03:31
created

Dom::clean()   F

Complexity

Conditions 21
Paths 158

Size

Total Lines 94
Code Lines 51

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 35
CRAP Score 36.4057

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 21
eloc 51
c 2
b 0
f 0
nc 158
nop 1
dl 0
loc 94
ccs 35
cts 52
cp 0.6731
crap 36.4057
rs 3.6833

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 183
    public function load(string $str, array $options = []): Dom
149
    {
150 183
        AbstractNode::resetCount();
151
        // check if it's a file
152 183
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 177
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 177
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 246
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 246
        $this->options = new Options;
218 246
        $this->options->setOptions($this->globalOptions)
219 246
                      ->setOptions($option);
220
221 246
        $this->rawSize = strlen($str);
222 246
        $this->raw     = $str;
223
224 246
        $html = $this->clean($str);
225
226 246
        $this->size    = strlen($str);
227 246
        $this->content = new Content($html);
228
229 246
        $this->parse();
230 240
        $this->detectCharset();
231
232 240
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 246
    protected function clean(string $str): string
516
    {
517 246
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 6
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
        // remove white space before closing tags
531 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
532 240
        if ($str === false) {
533
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
534
        }
535 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
536 240
        if ($str === false) {
537
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
538
        }
539
540
        // clean out the \n\r
541 240
        $replace = ' ';
542 240
        if ($this->options->get('preserveLineBreaks')) {
543 3
            $replace = '&#10;';
544
        }
545 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
546 240
        if ($str === false) {
547
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
548
        }
549
550
        // strip the doctype
551 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
552 240
        if ($str === false) {
553
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
554
        }
555
556
        // strip out comments
557 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
558 240
        if ($str === false) {
559
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
560
        }
561
562
        // strip out cdata
563 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
564 240
        if ($str === false) {
565
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
566
        }
567
568
        // strip out <script> tags
569 240
        if ($this->options->get('removeScripts')) {
570 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
571 237
            if ($str === false) {
572
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
573
            }
574 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
575 237
            if ($str === false) {
576
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
577
            }
578
        }
579
580
        // strip out <style> tags
581 240
        if ($this->options->get('removeStyles')) {
582 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
583 237
            if ($str === false) {
584
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
585
            }
586 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
587 237
            if ($str === false) {
588
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
589
            }
590
        }
591
592
        // strip out server side scripts
593 240
        if ($this->options->get('serverSideScripts')) {
594
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
595
            if ($str === false) {
596
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
597
            }
598
        }
599
600
        // strip smarty scripts
601 240
        if ($this->options->get('removeSmartyScripts')) {
602 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
603 237
            if ($str === false) {
604
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
605
            }
606
        }
607
608 240
        return $str;
609
    }
610
611
    /**
612
     * Attempts to parse the html in content.
613
     *
614
     * @return void
615
     * @throws ChildNotFoundException
616
     * @throws CircularException
617
     * @throws StrictException
618
     */
619 246
    protected function parse(): void
620
    {
621
        // add the root node
622 246
        $this->root = new HtmlNode('root');
623 246
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
624 246
        $activeNode = $this->root;
625 246
        while ( ! is_null($activeNode)) {
626 246
            $str = $this->content->copyUntil('<');
627 246
            if ($str == '') {
628 246
                $info = $this->parseTag();
629 246
                if ( ! $info['status']) {
630
                    // we are done here
631 240
                    $activeNode = null;
632 240
                    continue;
633
                }
634
635
                // check if it was a closing tag
636 240
                if ($info['closing']) {
637 231
                    $foundOpeningTag  = true;
638 231
                    $originalNode     = $activeNode;
639 231
                    while ($activeNode->getTag()->name() != $info['tag']) {
640 78
                        $activeNode = $activeNode->getParent();
641 78
                        if (is_null($activeNode)) {
642
                            // we could not find opening tag
643 36
                            $activeNode = $originalNode;
644 36
                            $foundOpeningTag = false;
645 36
                            break;
646
                        }
647
                    }
648 231
                    if ($foundOpeningTag) {
649 231
                        $activeNode = $activeNode->getParent();
650
                    }
651 231
                    continue;
652
                }
653
654 240
                if ( ! isset($info['node'])) {
655 12
                    continue;
656
                }
657
658
                /** @var AbstractNode $node */
659 240
                $node = $info['node'];
660 240
                $activeNode->addChild($node);
661
662
                // check if node is self closing
663 240
                if ( ! $node->getTag()->isSelfClosing()) {
664 240
                    $activeNode = $node;
665
                }
666 225
            } else if ($this->options->whitespaceTextNode ||
667 225
                trim($str) != ''
668
            ) {
669
                // we found text we care about
670 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
671 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
672 222
                $activeNode->addChild($textNode);
673
            }
674
        }
675 240
    }
676
677
    /**
678
     * Attempt to parse a tag out of the content.
679
     *
680
     * @return array
681
     * @throws StrictException
682
     */
683 246
    protected function parseTag(): array
684
    {
685
        $return = [
686 246
            'status'  => false,
687
            'closing' => false,
688
            'node'    => null,
689
        ];
690 246
        if ($this->content->char() != '<') {
691
            // we are not at the beginning of a tag
692 237
            return $return;
693
        }
694
695
        // check if this is a closing tag
696 240
        if ($this->content->fastForward(1)->char() == '/') {
697
            // end tag
698 231
            $tag = $this->content->fastForward(1)
699 231
                                 ->copyByToken('slash', true);
700
            // move to end of tag
701 231
            $this->content->copyUntil('>');
702 231
            $this->content->fastForward(1);
703
704
            // check if this closing tag counts
705 231
            $tag = strtolower($tag);
706 231
            if (in_array($tag, $this->selfClosing, true)) {
707 12
                $return['status'] = true;
708
709 12
                return $return;
710
            } else {
711 231
                $return['status']  = true;
712 231
                $return['closing'] = true;
713 231
                $return['tag']     = strtolower($tag);
714
            }
715
716 231
            return $return;
717
        }
718
719 240
        $tag  = strtolower($this->content->copyByToken('slash', true));
720 240
        if (trim($tag) == '')
721
        {
722
            // no tag found, invalid < found
723 3
            return $return;
724
        }
725 240
        $node = new HtmlNode($tag);
726 240
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
727
728
        // attributes
729 240
        while ($this->content->char() != '>' &&
730 240
            $this->content->char() != '/') {
731 231
            $space = $this->content->skipByToken('blank', true);
732 231
            if (empty($space)) {
733 6
                $this->content->fastForward(1);
734 6
                continue;
735
            }
736
737 231
            $name = $this->content->copyByToken('equal', true);
738 231
            if ($name == '/') {
739
                break;
740
            }
741
742 231
            if (empty($name)) {
743 123
				$this->content->skipByToken('blank');
744 123
				continue;
745
            }
746
747 228
            $this->content->skipByToken('blank');
748 228
            if ($this->content->char() == '=') {
749 228
                $attr = [];
750 228
                $this->content->fastForward(1)
751 228
                              ->skipByToken('blank');
752 228
                switch ($this->content->char()) {
753 228
                    case '"':
754 213
                        $attr['doubleQuote'] = true;
755 213
                        $this->content->fastForward(1);
756 213
                        $string = $this->content->copyUntil('"', true);
757
                        do {
758 213
                            $moreString = $this->content->copyUntilUnless('"', '=>');
759 213
                            $string .= $moreString;
760 213
                        } while ( ! empty($moreString));
761 213
                        $attr['value'] = $string;
762 213
                        $this->content->fastForward(1);
763 213
                        $node->getTag()->$name = $attr;
764 213
                        break;
765 21
                    case "'":
766 18
                        $attr['doubleQuote'] = false;
767 18
                        $this->content->fastForward(1);
768 18
                        $string = $this->content->copyUntil("'", true);
769
                        do {
770 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
771 18
                            $string .= $moreString;
772 18
                        } while ( ! empty($moreString));
773 18
                        $attr['value'] = $string;
774 18
                        $this->content->fastForward(1);
775 18
                        $node->getTag()->$name = $attr;
776 18
                        break;
777
                    default:
778 3
                        $attr['doubleQuote']   = true;
779 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
780 3
                        $node->getTag()->$name = $attr;
781 228
                        break;
782
                }
783
            } else {
784
                // no value attribute
785 72
                if ($this->options->strict) {
786
                    // can't have this in strict html
787 3
                    $character = $this->content->getPosition();
788 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
789
                }
790 69
                $node->getTag()->$name = [
791
                    'value'       => null,
792
                    'doubleQuote' => true,
793
                ];
794 69
                if ($this->content->char() != '>') {
795 12
                    $this->content->rewind(1);
796
                }
797
            }
798
        }
799
800 240
        $this->content->skipByToken('blank');
801 240
        $tag = strtolower($tag);
802 240
        if ($this->content->char() == '/') {
803
            // self closing tag
804 120
            $node->getTag()->selfClosing();
805 120
            $this->content->fastForward(1);
806 237
        } elseif (in_array($tag, $this->selfClosing, true)) {
807
808
            // Should be a self closing tag, check if we are strict
809 84
            if ($this->options->strict) {
810 3
                $character = $this->content->getPosition();
811 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
812
            }
813
814
            // We force self closing on this tag.
815 81
            $node->getTag()->selfClosing();
816
817
            // Should this tag use a trailing slash?
818 81
            if(in_array($tag, $this->noSlash, true))
819
            {
820 3
                $node->getTag()->noTrailingSlash();
821
            }
822
823
        }
824
825 240
        $this->content->fastForward(1);
826
827 240
        $return['status'] = true;
828 240
        $return['node']   = $node;
829
830 240
        return $return;
831
    }
832
833
    /**
834
     * Attempts to detect the charset that the html was sent in.
835
     *
836
     * @return bool
837
     * @throws ChildNotFoundException
838
     */
839 240
    protected function detectCharset(): bool
840
    {
841
        // set the default
842 240
        $encode = new Encode;
843 240
        $encode->from($this->defaultCharset);
844 240
        $encode->to($this->defaultCharset);
845
846 240
        $enforceEncoding = $this->options->enforceEncoding;
847 240
        if ( ! is_null($enforceEncoding)) {
848
            //  they want to enforce the given encoding
849
            $encode->from($enforceEncoding);
850
            $encode->to($enforceEncoding);
851
852
            return false;
853
        }
854
855
        /** @var AbstractNode $meta */
856 240
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
857 240
        if (is_null($meta)) {
858
            // could not find meta tag
859 210
            $this->root->propagateEncoding($encode);
860
861 210
            return false;
862
        }
863 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
864 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
865
            // could not find content
866
            $this->root->propagateEncoding($encode);
867
868
            return false;
869
        }
870 30
        $matches = [];
871 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
872 30
            $encode->from(trim($matches[1]));
873 30
            $this->root->propagateEncoding($encode);
874
875 30
            return true;
876
        }
877
878
        // no charset found
879
        $this->root->propagateEncoding($encode);
880
881
        return false;
882
    }
883
}
884