Passed
Pull Request — master (#207)
by
unknown
02:09
created

Dom::countChildren()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 186
    public function load(string $str, array $options = []): Dom
149
    {
150 186
        AbstractNode::resetCount();
151
        // check if it's a file
152 186
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 180
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 180
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 249
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 249
        $this->options = new Options;
218 249
        $this->options->setOptions($this->globalOptions)
219 249
                      ->setOptions($option);
220
221 249
        $this->rawSize = strlen($str);
222 249
        $this->raw     = $str;
223
224 249
        $html = $this->clean($str);
225
226 249
        $this->size    = strlen($str);
227 249
        $this->content = new Content($html);
228
229 249
        $this->parse();
230 243
        $this->detectCharset();
231
232 243
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Get nth element by css selector on the root node.
273
     * @param string   $selector
274
     * @param int|null $nth
275
     * @return AbstractNode|null
276
     * @throws ChildNotFoundException
277
     * @throws NotLoadedException
278
     */
279 3
    public function get(string $selector, int $nth = 0)
280
    {
281 3
        $this->isLoaded();
282
283 3
        return $this->root->get($selector, $nth);
284
    }
285
286
    /**
287
     * Find element by Id on the root node
288
     * @param int $id
289
     * @return bool|AbstractNode
290
     * @throws ChildNotFoundException
291
     * @throws NotLoadedException
292
     * @throws ParentNotFoundException
293
     */
294 9
    public function findById(int $id)
295
    {
296 9
        $this->isLoaded();
297
298 9
        return $this->root->findById($id);
299
    }
300
301
    /**
302
     * Adds the tag (or tags in an array) to the list of tags that will always
303
     * be self closing.
304
     *
305
     * @param string|array $tag
306
     * @return Dom
307
     * @chainable
308
     */
309 6
    public function addSelfClosingTag($tag): Dom
310
    {
311 6
        if ( ! is_array($tag)) {
312 3
            $tag = [$tag];
313
        }
314 6
        foreach ($tag as $value) {
315 6
            $this->selfClosing[] = $value;
316
        }
317
318 6
        return $this;
319
    }
320
321
    /**
322
     * Removes the tag (or tags in an array) from the list of tags that will
323
     * always be self closing.
324
     *
325
     * @param string|array $tag
326
     * @return Dom
327
     * @chainable
328
     */
329 3
    public function removeSelfClosingTag($tag): Dom
330
    {
331 3
        if ( ! is_array($tag)) {
332 3
            $tag = [$tag];
333
        }
334 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
335
336 3
        return $this;
337
    }
338
339
    /**
340
     * Sets the list of self closing tags to empty.
341
     *
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function clearSelfClosingTags(): Dom
346
    {
347 3
        $this->selfClosing = [];
348
349 3
        return $this;
350
    }
351
352
353
    /**
354
     * Adds a tag to the list of self closing tags that should not have a trailing slash
355
     *
356
     * @param $tag
357
     * @return Dom
358
     * @chainable
359
     */
360 3
    public function addNoSlashTag($tag): Dom
361
    {
362 3
        if ( ! is_array($tag)) {
363 3
            $tag = [$tag];
364
        }
365 3
        foreach ($tag as $value) {
366 3
            $this->noSlash[] = $value;
367
        }
368
369 3
        return $this;
370
    }
371
372
    /**
373
     * Removes a tag from the list of no-slash tags.
374
     *
375
     * @param $tag
376
     * @return Dom
377
     * @chainable
378
     */
379
    public function removeNoSlashTag($tag): Dom
380
    {
381
        if ( ! is_array($tag)) {
382
            $tag = [$tag];
383
        }
384
        $this->noSlash = array_diff($this->noSlash, $tag);
385
386
        return $this;
387
    }
388
389
    /**
390
     * Empties the list of no-slash tags.
391
     *
392
     * @return Dom
393
     * @chainable
394
     */
395
    public function clearNoSlashTags(): Dom
396
    {
397
        $this->noSlash = [];
398
399
        return $this;
400
    }
401
402
    /**
403
     * Simple wrapper function that returns the first child.
404
     * @return AbstractNode
405
     * @throws ChildNotFoundException
406
     * @throws NotLoadedException
407
     */
408 3
    public function firstChild(): AbstractNode
409
    {
410 3
        $this->isLoaded();
411
412 3
        return $this->root->firstChild();
413
    }
414
415
    /**
416
     * Simple wrapper function that returns the last child.
417
     * @return AbstractNode
418
     * @throws ChildNotFoundException
419
     * @throws NotLoadedException
420
     */
421 3
    public function lastChild(): AbstractNode
422
    {
423 3
        $this->isLoaded();
424
425 3
        return $this->root->lastChild();
426
    }
427
428
    /**
429
     * Simple wrapper function that returns count of child elements
430
     *
431
     * @return int
432
     * @throws NotLoadedException
433
     */
434 3
    public function countChildren(): int
435
    {
436 3
        $this->isLoaded();
437
438 3
        return $this->root->countChildren();
439
    }
440
441
    /**
442
     * Get array of children
443
     *
444
     * @return array
445
     * @throws NotLoadedException
446
     */
447 3
    public function getChildren(): array
448
    {
449 3
        $this->isLoaded();
450
451 3
        return $this->root->getChildren();
452
    }
453
454
    /**
455
     * Check if node have children nodes
456
     *
457
     * @return bool
458
     * @throws NotLoadedException
459
     */
460 3
    public function hasChildren(): bool
461
    {
462 3
        $this->isLoaded();
463
464 3
        return $this->root->hasChildren();
465
    }
466
467
    /**
468
     * Simple wrapper function that returns an element by the
469
     * id.
470
     * @param $id
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 12
    public function getElementById($id)
476
    {
477 12
        $this->isLoaded();
478
479 12
        return $this->find('#'.$id, 0);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * tag name.
485
     * @param string $name
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 15
    public function getElementsByTag(string $name)
491
    {
492 15
        $this->isLoaded();
493
494 15
        return $this->find($name);
495
    }
496
497
    /**
498
     * Simple wrapper function that returns all elements by
499
     * class name.
500
     * @param string $class
501
     * @return mixed|Collection|null
502
     * @throws ChildNotFoundException
503
     * @throws NotLoadedException
504
     */
505 3
    public function getElementsByClass(string $class)
506
    {
507 3
        $this->isLoaded();
508
509 3
        return $this->find('.'.$class);
510
    }
511
512
    /**
513
     * Checks if the load methods have been called.
514
     *
515
     * @throws NotLoadedException
516
     */
517 207
    protected function isLoaded(): void
518
    {
519 207
        if (is_null($this->content)) {
520 3
            throw new NotLoadedException('Content is not loaded!');
521
        }
522 204
    }
523
524
    /**
525
     * Cleans the html of any none-html information.
526
     *
527
     * @param string $str
528
     * @return string
529
     */
530 249
    protected function clean(string $str): string
531
    {
532 249
        if ($this->options->get('cleanupInput') != true) {
533
            // skip entire cleanup step
534 6
            return $str;
535
        }
536
537 243
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
538 243
        if ($is_gzip) {
539
            $str = gzdecode($str);
540
            if ($str === false) {
541
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
542
            }
543
        }
544
545
        // remove white space before closing tags
546 243
        $str = mb_eregi_replace("'\s+>", "'>", $str);
547 243
        if ($str === false) {
548
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
549
        }
550 243
        $str = mb_eregi_replace('"\s+>', '">', $str);
551 243
        if ($str === false) {
552
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
553
        }
554
555
        // clean out the \n\r
556 243
        $replace = ' ';
557 243
        if ($this->options->get('preserveLineBreaks')) {
558 3
            $replace = '&#10;';
559
        }
560 243
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
561 243
        if ($str === false) {
562
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
563
        }
564
565
        // strip the doctype
566 243
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
567 243
        if ($str === false) {
568
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
569
        }
570
571
        // strip out comments
572 243
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
573 243
        if ($str === false) {
574
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
575
        }
576
577
        // strip out cdata
578 243
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
579 243
        if ($str === false) {
580
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
581
        }
582
583
        // strip out <script> tags
584 243
        if ($this->options->get('removeScripts')) {
585 240
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
586 240
            if ($str === false) {
587
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
588
            }
589 240
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
590 240
            if ($str === false) {
591
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
592
            }
593
        }
594
595
        // strip out <style> tags
596 243
        if ($this->options->get('removeStyles')) {
597 240
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
598 240
            if ($str === false) {
599
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
600
            }
601 240
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
602 240
            if ($str === false) {
603
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
604
            }
605
        }
606
607
        // strip out server side scripts
608 243
        if ($this->options->get('serverSideScripts')) {
609
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
610
            if ($str === false) {
611
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
612
            }
613
        }
614
615
        // strip smarty scripts
616 243
        if ($this->options->get('removeSmartyScripts')) {
617 240
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
618 240
            if ($str === false) {
619
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
620
            }
621
        }
622
623 243
        return $str;
624
    }
625
626
    /**
627
     * Attempts to parse the html in content.
628
     *
629
     * @return void
630
     * @throws ChildNotFoundException
631
     * @throws CircularException
632
     * @throws StrictException
633
     */
634 249
    protected function parse(): void
635
    {
636
        // add the root node
637 249
        $this->root = new HtmlNode('root');
638 249
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
639 249
        $activeNode = $this->root;
640 249
        while ( ! is_null($activeNode)) {
641 249
            $str = $this->content->copyUntil('<');
642 249
            if ($str == '') {
643 249
                $info = $this->parseTag();
644 249
                if ( ! $info['status']) {
645
                    // we are done here
646 243
                    $activeNode = null;
647 243
                    continue;
648
                }
649
650
                // check if it was a closing tag
651 243
                if ($info['closing']) {
652 234
                    $foundOpeningTag  = true;
653 234
                    $originalNode     = $activeNode;
654 234
                    while ($activeNode->getTag()->name() != $info['tag']) {
655 78
                        $activeNode = $activeNode->getParent();
656 78
                        if (is_null($activeNode)) {
657
                            // we could not find opening tag
658 36
                            $activeNode = $originalNode;
659 36
                            $foundOpeningTag = false;
660 36
                            break;
661
                        }
662
                    }
663 234
                    if ($foundOpeningTag) {
664 234
                        $activeNode = $activeNode->getParent();
665
                    }
666 234
                    continue;
667
                }
668
669 243
                if ( ! isset($info['node'])) {
670 12
                    continue;
671
                }
672
673
                /** @var AbstractNode $node */
674 243
                $node = $info['node'];
675 243
                $activeNode->addChild($node);
676
677
                // check if node is self closing
678 243
                if ( ! $node->getTag()->isSelfClosing()) {
679 243
                    $activeNode = $node;
680
                }
681 225
            } else if ($this->options->whitespaceTextNode ||
682 225
                trim($str) != ''
683
            ) {
684
                // we found text we care about
685 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
686 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
687 222
                $activeNode->addChild($textNode);
688
            }
689
        }
690 243
    }
691
692
    /**
693
     * Attempt to parse a tag out of the content.
694
     *
695
     * @return array
696
     * @throws StrictException
697
     */
698 249
    protected function parseTag(): array
699
    {
700
        $return = [
701 249
            'status'  => false,
702
            'closing' => false,
703
            'node'    => null,
704
        ];
705 249
        if ($this->content->char() != '<') {
706
            // we are not at the beginning of a tag
707 240
            return $return;
708
        }
709
710
        // check if this is a closing tag
711 243
        if ($this->content->fastForward(1)->char() == '/') {
712
            // end tag
713 234
            $tag = $this->content->fastForward(1)
714 234
                                 ->copyByToken('slash', true);
715
            // move to end of tag
716 234
            $this->content->copyUntil('>');
717 234
            $this->content->fastForward(1);
718
719
            // check if this closing tag counts
720 234
            $tag = strtolower($tag);
721 234
            if (in_array($tag, $this->selfClosing, true)) {
722 12
                $return['status'] = true;
723
724 12
                return $return;
725
            } else {
726 234
                $return['status']  = true;
727 234
                $return['closing'] = true;
728 234
                $return['tag']     = strtolower($tag);
729
            }
730
731 234
            return $return;
732
        }
733
734 243
        $tag  = strtolower($this->content->copyByToken('slash', true));
735 243
        if (trim($tag) == '')
736
        {
737
            // no tag found, invalid < found
738 3
            return $return;
739
        }
740 243
        $node = new HtmlNode($tag);
741 243
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
742
743
        // attributes
744 243
        while ($this->content->char() != '>' &&
745 243
            $this->content->char() != '/') {
746 234
            $space = $this->content->skipByToken('blank', true);
747 234
            if (empty($space)) {
748 6
                $this->content->fastForward(1);
749 6
                continue;
750
            }
751
752 234
            $name = $this->content->copyByToken('equal', true);
753 234
            if ($name == '/') {
754
                break;
755
            }
756
757 234
            if (empty($name)) {
758 126
				$this->content->skipByToken('blank');
759 126
				continue;
760
            }
761
762 231
            $this->content->skipByToken('blank');
763 231
            if ($this->content->char() == '=') {
764 231
                $attr = [];
765 231
                $this->content->fastForward(1)
766 231
                              ->skipByToken('blank');
767 231
                switch ($this->content->char()) {
768 231
                    case '"':
769 216
                        $attr['doubleQuote'] = true;
770 216
                        $this->content->fastForward(1);
771 216
                        $string = $this->content->copyUntil('"', true);
772
                        do {
773 216
                            $moreString = $this->content->copyUntilUnless('"', '=>');
774 216
                            $string .= $moreString;
775 216
                        } while ( ! empty($moreString));
776 216
                        $attr['value'] = $string;
777 216
                        $this->content->fastForward(1);
778 216
                        $node->getTag()->$name = $attr;
779 216
                        break;
780 21
                    case "'":
781 18
                        $attr['doubleQuote'] = false;
782 18
                        $this->content->fastForward(1);
783 18
                        $string = $this->content->copyUntil("'", true);
784
                        do {
785 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
786 18
                            $string .= $moreString;
787 18
                        } while ( ! empty($moreString));
788 18
                        $attr['value'] = $string;
789 18
                        $this->content->fastForward(1);
790 18
                        $node->getTag()->$name = $attr;
791 18
                        break;
792
                    default:
793 3
                        $attr['doubleQuote']   = true;
794 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
795 3
                        $node->getTag()->$name = $attr;
796 231
                        break;
797
                }
798
            } else {
799
                // no value attribute
800 72
                if ($this->options->strict) {
801
                    // can't have this in strict html
802 3
                    $character = $this->content->getPosition();
803 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
804
                }
805 69
                $node->getTag()->$name = [
806
                    'value'       => null,
807
                    'doubleQuote' => true,
808
                ];
809 69
                if ($this->content->char() != '>') {
810 12
                    $this->content->rewind(1);
811
                }
812
            }
813
        }
814
815 243
        $this->content->skipByToken('blank');
816 243
        $tag = strtolower($tag);
817 243
        if ($this->content->char() == '/') {
818
            // self closing tag
819 123
            $node->getTag()->selfClosing();
820 123
            $this->content->fastForward(1);
821 240
        } elseif (in_array($tag, $this->selfClosing, true)) {
822
823
            // Should be a self closing tag, check if we are strict
824 84
            if ($this->options->strict) {
825 3
                $character = $this->content->getPosition();
826 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
827
            }
828
829
            // We force self closing on this tag.
830 81
            $node->getTag()->selfClosing();
831
832
            // Should this tag use a trailing slash?
833 81
            if(in_array($tag, $this->noSlash, true))
834
            {
835 3
                $node->getTag()->noTrailingSlash();
836
            }
837
838
        }
839
840 243
        $this->content->fastForward(1);
841
842 243
        $return['status'] = true;
843 243
        $return['node']   = $node;
844
845 243
        return $return;
846
    }
847
848
    /**
849
     * Attempts to detect the charset that the html was sent in.
850
     *
851
     * @return bool
852
     * @throws ChildNotFoundException
853
     */
854 243
    protected function detectCharset(): bool
855
    {
856
        // set the default
857 243
        $encode = new Encode;
858 243
        $encode->from($this->defaultCharset);
859 243
        $encode->to($this->defaultCharset);
860
861 243
        $enforceEncoding = $this->options->enforceEncoding;
862 243
        if ( ! is_null($enforceEncoding)) {
863
            //  they want to enforce the given encoding
864
            $encode->from($enforceEncoding);
865
            $encode->to($enforceEncoding);
866
867
            return false;
868
        }
869
870
        /** @var AbstractNode $meta */
871 243
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
872 243
        if (is_null($meta)) {
873
            // could not find meta tag
874 213
            $this->root->propagateEncoding($encode);
875
876 213
            return false;
877
        }
878 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
879 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
880
            // could not find content
881
            $this->root->propagateEncoding($encode);
882
883
            return false;
884
        }
885 30
        $matches = [];
886 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
887 30
            $encode->from(trim($matches[1]));
888 30
            $this->root->propagateEncoding($encode);
889
890 30
            return true;
891
        }
892
893
        // no charset found
894
        $this->root->propagateEncoding($encode);
895
896
        return false;
897
    }
898
}
899