Completed
Pull Request — master (#212)
by
unknown
03:37
created

Dom::clean()   F

Complexity

Conditions 22
Paths 314

Size

Total Lines 99
Code Lines 53

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 36
CRAP Score 39.9204

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 22
eloc 53
c 3
b 0
f 0
nc 314
nop 1
dl 0
loc 99
ccs 36
cts 54
cp 0.6667
crap 39.9204
rs 1.9083

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php declare(strict_types=1);
2
namespace PHPHtmlParser;
3
4
use PHPHtmlParser\Dom\AbstractNode;
5
use PHPHtmlParser\Dom\Collection;
6
use PHPHtmlParser\Dom\HtmlNode;
7
use PHPHtmlParser\Dom\TextNode;
8
use PHPHtmlParser\Exceptions\ChildNotFoundException;
9
use PHPHtmlParser\Exceptions\CircularException;
10
use PHPHtmlParser\Exceptions\CurlException;
11
use PHPHtmlParser\Exceptions\NotLoadedException;
12
use PHPHtmlParser\Exceptions\ParentNotFoundException;
13
use PHPHtmlParser\Exceptions\StrictException;
14
use PHPHtmlParser\Exceptions\UnknownChildTypeException;
15
use PHPHtmlParser\Exceptions\LogicalException;
16
use stringEncode\Encode;
17
18
/**
19
 * Class Dom
20
 *
21
 * @package PHPHtmlParser
22
 */
23
class Dom
24
{
25
26
    /**
27
     * The charset we would like the output to be in.
28
     *
29
     * @var string
30
     */
31
    protected $defaultCharset = 'UTF-8';
32
33
    /**
34
     * Contains the root node of this dom tree.
35
     *
36
     * @var HtmlNode
37
     */
38
    public $root;
39
40
    /**
41
     * The raw version of the document string.
42
     *
43
     * @var string
44
     */
45
    protected $raw;
46
47
    /**
48
     * The document string.
49
     *
50
     * @var Content
51
     */
52
    protected $content = null;
53
54
    /**
55
     * The original file size of the document.
56
     *
57
     * @var int
58
     */
59
    protected $rawSize;
60
61
    /**
62
     * The size of the document after it is cleaned.
63
     *
64
     * @var int
65
     */
66
    protected $size;
67
68
    /**
69
     * A global options array to be used by all load calls.
70
     *
71
     * @var array
72
     */
73
    protected $globalOptions = [];
74
75
    /**
76
     * A persistent option object to be used for all options in the
77
     * parsing of the file.
78
     *
79
     * @var Options
80
     */
81
    protected $options;
82
83
    /**
84
     * A list of tags which will always be self closing
85
     *
86
     * @var array
87
     */
88
    protected $selfClosing = [
89
        'area',
90
        'base',
91
        'basefont',
92
        'br',
93
        'col',
94
        'embed',
95
        'hr',
96
        'img',
97
        'input',
98
        'keygen',
99
        'link',
100
        'meta',
101
        'param',
102
        'source',
103
        'spacer',
104
        'track',
105
        'wbr'
106
    ];
107
108
    /**
109
     * A list of tags where there should be no /> at the end (html5 style)
110
     *
111
     * @var array
112
     */
113
    protected $noSlash = [];
114
115
    /**
116
     * Returns the inner html of the root node.
117
     *
118
     * @return string
119
     * @throws ChildNotFoundException
120
     * @throws UnknownChildTypeException
121
     */
122 24
    public function __toString(): string
123
    {
124 24
        return $this->root->innerHtml();
125
    }
126
127
    /**
128
     * A simple wrapper around the root node.
129
     *
130
     * @param string $name
131
     * @return mixed
132
     */
133 18
    public function __get($name)
134
    {
135 18
        return $this->root->$name;
136
    }
137
138
    /**
139
     * Attempts to load the dom from any resource, string, file, or URL.
140
     * @param string $str
141
     * @param array  $options
142
     * @return Dom
143
     * @throws ChildNotFoundException
144
     * @throws CircularException
145
     * @throws CurlException
146
     * @throws StrictException
147
     */
148 183
    public function load(string $str, array $options = []): Dom
149
    {
150 183
        AbstractNode::resetCount();
151
        // check if it's a file
152 183
        if (strpos($str, "\n") === false && is_file($str)) {
153 6
            return $this->loadFromFile($str, $options);
154
        }
155
        // check if it's a url
156 177
        if (preg_match("/^https?:\/\//i", $str)) {
157
            return $this->loadFromUrl($str, $options);
158
        }
159
160 177
        return $this->loadStr($str, $options);
161
    }
162
163
    /**
164
     * Loads the dom from a document file/url
165
     * @param string $file
166
     * @param array  $options
167
     * @return Dom
168
     * @throws ChildNotFoundException
169
     * @throws CircularException
170
     * @throws StrictException
171
     * @throws LogicalException
172
     */
173 51
    public function loadFromFile(string $file, array $options = []): Dom
174
    {
175 51
        $content = file_get_contents($file);
176 51
        if ($content === false) {
177
            throw new LogicalException('file_get_contents failed and returned false when trying to read "'.$file.'".');
178
        }
179 51
        return $this->loadStr($content, $options);
180
    }
181
182
    /**
183
     * Use a curl interface implementation to attempt to load
184
     * the content from a url.
185
     * @param string                            $url
186
     * @param array                             $options
187
     * @param CurlInterface|null $curl
188
     * @return Dom
189
     * @throws ChildNotFoundException
190
     * @throws CircularException
191
     * @throws CurlException
192
     * @throws StrictException
193
     */
194 6
    public function loadFromUrl(string $url, array $options = [], CurlInterface $curl = null): Dom
195
    {
196 6
        if (is_null($curl)) {
197
            // use the default curl interface
198
            $curl = new Curl;
199
        }
200 6
        $content = $curl->get($url, $options);
201
202 6
        return $this->loadStr($content, $options);
203
    }
204
205
    /**
206
     * Parsers the html of the given string. Used for load(), loadFromFile(),
207
     * and loadFromUrl().
208
     * @param string $str
209
     * @param array  $option
210
     * @return Dom
211
     * @throws ChildNotFoundException
212
     * @throws CircularException
213
     * @throws StrictException
214
     */
215 246
    public function loadStr(string $str, array $option = []): Dom
216
    {
217 246
        $this->options = new Options;
218 246
        $this->options->setOptions($this->globalOptions)
219 246
                      ->setOptions($option);
220
221 246
        $this->rawSize = strlen($str);
222 246
        $this->raw     = $str;
223
224 246
        $html = $this->clean($str);
225
226 246
        $this->size    = strlen($str);
227 246
        $this->content = new Content($html);
228
229 246
        $this->parse();
230 240
        $this->detectCharset();
231
232 240
        return $this;
233
    }
234
235
    /**
236
     * Sets a global options array to be used by all load calls.
237
     *
238
     * @param array $options
239
     * @return Dom
240
     * @chainable
241
     */
242 54
    public function setOptions(array $options): Dom
243
    {
244 54
        $this->globalOptions = $options;
245
246 54
        return $this;
247
    }
248
249
    /**
250
     * Find elements by css selector on the root node.
251
     * @param string   $selector
252
     * @param int|null $nth
253
     * @return mixed|Collection|null
254
     * @throws ChildNotFoundException
255
     * @throws NotLoadedException
256
     */
257 180
    public function find(string $selector, int $nth = null)
258
    {
259 180
        $this->isLoaded();
260
261 177
        $depthFirstSearch = $this->options->get('depthFirstSearch');
262 177
        if (is_bool($depthFirstSearch)) {
263 177
            $result = $this->root->find($selector, $nth, $depthFirstSearch);
264
        } else {
265
            $result = $this->root->find($selector, $nth);
266
        }
267
268 177
        return $result;
269
    }
270
271
    /**
272
     * Find element by Id on the root node
273
     * @param int $id
274
     * @return bool|AbstractNode
275
     * @throws ChildNotFoundException
276
     * @throws NotLoadedException
277
     * @throws ParentNotFoundException
278
     */
279 9
    public function findById(int $id)
280
    {
281 9
        $this->isLoaded();
282
283 9
        return $this->root->findById($id);
284
    }
285
286
    /**
287
     * Adds the tag (or tags in an array) to the list of tags that will always
288
     * be self closing.
289
     *
290
     * @param string|array $tag
291
     * @return Dom
292
     * @chainable
293
     */
294 6
    public function addSelfClosingTag($tag): Dom
295
    {
296 6
        if ( ! is_array($tag)) {
297 3
            $tag = [$tag];
298
        }
299 6
        foreach ($tag as $value) {
300 6
            $this->selfClosing[] = $value;
301
        }
302
303 6
        return $this;
304
    }
305
306
    /**
307
     * Removes the tag (or tags in an array) from the list of tags that will
308
     * always be self closing.
309
     *
310
     * @param string|array $tag
311
     * @return Dom
312
     * @chainable
313
     */
314 3
    public function removeSelfClosingTag($tag): Dom
315
    {
316 3
        if ( ! is_array($tag)) {
317 3
            $tag = [$tag];
318
        }
319 3
        $this->selfClosing = array_diff($this->selfClosing, $tag);
320
321 3
        return $this;
322
    }
323
324
    /**
325
     * Sets the list of self closing tags to empty.
326
     *
327
     * @return Dom
328
     * @chainable
329
     */
330 3
    public function clearSelfClosingTags(): Dom
331
    {
332 3
        $this->selfClosing = [];
333
334 3
        return $this;
335
    }
336
337
338
    /**
339
     * Adds a tag to the list of self closing tags that should not have a trailing slash
340
     *
341
     * @param $tag
342
     * @return Dom
343
     * @chainable
344
     */
345 3
    public function addNoSlashTag($tag): Dom
346
    {
347 3
        if ( ! is_array($tag)) {
348 3
            $tag = [$tag];
349
        }
350 3
        foreach ($tag as $value) {
351 3
            $this->noSlash[] = $value;
352
        }
353
354 3
        return $this;
355
    }
356
357
    /**
358
     * Removes a tag from the list of no-slash tags.
359
     *
360
     * @param $tag
361
     * @return Dom
362
     * @chainable
363
     */
364
    public function removeNoSlashTag($tag): Dom
365
    {
366
        if ( ! is_array($tag)) {
367
            $tag = [$tag];
368
        }
369
        $this->noSlash = array_diff($this->noSlash, $tag);
370
371
        return $this;
372
    }
373
374
    /**
375
     * Empties the list of no-slash tags.
376
     *
377
     * @return Dom
378
     * @chainable
379
     */
380
    public function clearNoSlashTags(): Dom
381
    {
382
        $this->noSlash = [];
383
384
        return $this;
385
    }
386
387
    /**
388
     * Simple wrapper function that returns the first child.
389
     * @return AbstractNode
390
     * @throws ChildNotFoundException
391
     * @throws NotLoadedException
392
     */
393 3
    public function firstChild(): AbstractNode
394
    {
395 3
        $this->isLoaded();
396
397 3
        return $this->root->firstChild();
398
    }
399
400
    /**
401
     * Simple wrapper function that returns the last child.
402
     * @return AbstractNode
403
     * @throws ChildNotFoundException
404
     * @throws NotLoadedException
405
     */
406 3
    public function lastChild(): AbstractNode
407
    {
408 3
        $this->isLoaded();
409
410 3
        return $this->root->lastChild();
411
    }
412
413
    /**
414
     * Simple wrapper function that returns count of child elements
415
     *
416
     * @return int
417
     * @throws NotLoadedException
418
     */
419 3
    public function countChildren(): int
420
    {
421 3
        $this->isLoaded();
422
423 3
        return $this->root->countChildren();
424
    }
425
426
    /**
427
     * Get array of children
428
     *
429
     * @return array
430
     * @throws NotLoadedException
431
     */
432 3
    public function getChildren(): array
433
    {
434 3
        $this->isLoaded();
435
436 3
        return $this->root->getChildren();
437
    }
438
439
    /**
440
     * Check if node have children nodes
441
     *
442
     * @return bool
443
     * @throws NotLoadedException
444
     */
445 3
    public function hasChildren(): bool
446
    {
447 3
        $this->isLoaded();
448
449 3
        return $this->root->hasChildren();
450
    }
451
452
    /**
453
     * Simple wrapper function that returns an element by the
454
     * id.
455
     * @param $id
456
     * @return mixed|Collection|null
457
     * @throws ChildNotFoundException
458
     * @throws NotLoadedException
459
     */
460 12
    public function getElementById($id)
461
    {
462 12
        $this->isLoaded();
463
464 12
        return $this->find('#'.$id, 0);
465
    }
466
467
    /**
468
     * Simple wrapper function that returns all elements by
469
     * tag name.
470
     * @param string $name
471
     * @return mixed|Collection|null
472
     * @throws ChildNotFoundException
473
     * @throws NotLoadedException
474
     */
475 15
    public function getElementsByTag(string $name)
476
    {
477 15
        $this->isLoaded();
478
479 15
        return $this->find($name);
480
    }
481
482
    /**
483
     * Simple wrapper function that returns all elements by
484
     * class name.
485
     * @param string $class
486
     * @return mixed|Collection|null
487
     * @throws ChildNotFoundException
488
     * @throws NotLoadedException
489
     */
490 3
    public function getElementsByClass(string $class)
491
    {
492 3
        $this->isLoaded();
493
494 3
        return $this->find('.'.$class);
495
    }
496
497
    /**
498
     * Checks if the load methods have been called.
499
     *
500
     * @throws NotLoadedException
501
     */
502 204
    protected function isLoaded(): void
503
    {
504 204
        if (is_null($this->content)) {
505 3
            throw new NotLoadedException('Content is not loaded!');
506
        }
507 201
    }
508
509
    /**
510
     * Cleans the html of any none-html information.
511
     *
512
     * @param string $str
513
     * @return string
514
     */
515 246
    protected function clean(string $str): string
516
    {
517 246
        if ($this->options->get('cleanupInput') != true) {
518
            // skip entire cleanup step
519 6
            return $str;
520
        }
521
522 240
        $is_gzip = 0 === mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, "US-ASCII");
523 240
        if ($is_gzip) {
524
            $str = gzdecode($str);
525
            if ($str === false) {
526
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
527
            }
528
        }
529
530
        //sometime need predicate an encode is from encoding 
531 240
        if ($this->options->get('useFromEncoding') != NULL) {
532
            $str = mb_convert_encoding( $str, "UTF-8", $this->options->get('useFromEncoding'));
533
        }
534
535
        // remove white space before closing tags
536 240
        $str = mb_eregi_replace("'\s+>", "'>", $str);
537 240
        if ($str === false) {
538
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
539
        }
540 240
        $str = mb_eregi_replace('"\s+>', '">', $str);
541 240
        if ($str === false) {
542
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
543
        }
544
545
        // clean out the \n\r
546 240
        $replace = ' ';
547 240
        if ($this->options->get('preserveLineBreaks')) {
548 3
            $replace = '&#10;';
549
        }
550 240
        $str = str_replace(["\r\n", "\r", "\n"], $replace, $str);
551 240
        if ($str === false) {
552
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
553
        }
554
555
        // strip the doctype
556 240
        $str = mb_eregi_replace("<!doctype(.*?)>", '', $str);
557 240
        if ($str === false) {
558
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
559
        }
560
561
        // strip out comments
562 240
        $str = mb_eregi_replace("<!--(.*?)-->", '', $str);
563 240
        if ($str === false) {
564
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
565
        }
566
567
        // strip out cdata
568 240
        $str = mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
569 240
        if ($str === false) {
570
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
571
        }
572
573
        // strip out <script> tags
574 240
        if ($this->options->get('removeScripts')) {
575 237
            $str = mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
576 237
            if ($str === false) {
577
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
578
            }
579 237
            $str = mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
580 237
            if ($str === false) {
581
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
582
            }
583
        }
584
585
        // strip out <style> tags
586 240
        if ($this->options->get('removeStyles')) {
587 237
            $str = mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
588 237
            if ($str === false) {
589
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
590
            }
591 237
            $str = mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
592 237
            if ($str === false) {
593
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
594
            }
595
        }
596
597
        // strip out server side scripts
598 240
        if ($this->options->get('serverSideScripts')) {
599
            $str = mb_eregi_replace("(<\?)(.*?)(\?>)", '', $str);
600
            if ($str === false) {
601
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out service side scripts.');
602
            }
603
        }
604
605
        // strip smarty scripts
606 240
        if ($this->options->get('removeSmartyScripts')) {
607 237
            $str = mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
608 237
            if ($str === false) {
609
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
610
            }
611
        }
612
613 240
        return $str;
614
    }
615
616
    /**
617
     * Attempts to parse the html in content.
618
     *
619
     * @return void
620
     * @throws ChildNotFoundException
621
     * @throws CircularException
622
     * @throws StrictException
623
     */
624 246
    protected function parse(): void
625
    {
626
        // add the root node
627 246
        $this->root = new HtmlNode('root');
628 246
        $this->root->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
629 246
        $activeNode = $this->root;
630 246
        while ( ! is_null($activeNode)) {
631 246
            $str = $this->content->copyUntil('<');
632 246
            if ($str == '') {
633 246
                $info = $this->parseTag();
634 246
                if ( ! $info['status']) {
635
                    // we are done here
636 240
                    $activeNode = null;
637 240
                    continue;
638
                }
639
640
                // check if it was a closing tag
641 240
                if ($info['closing']) {
642 231
                    $foundOpeningTag  = true;
643 231
                    $originalNode     = $activeNode;
644 231
                    while ($activeNode->getTag()->name() != $info['tag']) {
645 78
                        $activeNode = $activeNode->getParent();
646 78
                        if (is_null($activeNode)) {
647
                            // we could not find opening tag
648 36
                            $activeNode = $originalNode;
649 36
                            $foundOpeningTag = false;
650 36
                            break;
651
                        }
652
                    }
653 231
                    if ($foundOpeningTag) {
654 231
                        $activeNode = $activeNode->getParent();
655
                    }
656 231
                    continue;
657
                }
658
659 240
                if ( ! isset($info['node'])) {
660 12
                    continue;
661
                }
662
663
                /** @var AbstractNode $node */
664 240
                $node = $info['node'];
665 240
                $activeNode->addChild($node);
666
667
                // check if node is self closing
668 240
                if ( ! $node->getTag()->isSelfClosing()) {
669 240
                    $activeNode = $node;
670
                }
671 225
            } else if ($this->options->whitespaceTextNode ||
672 225
                trim($str) != ''
673
            ) {
674
                // we found text we care about
675 222
                $textNode = new TextNode($str, $this->options->removeDoubleSpace);
676 222
                $textNode->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
677 222
                $activeNode->addChild($textNode);
678
            }
679
        }
680 240
    }
681
682
    /**
683
     * Attempt to parse a tag out of the content.
684
     *
685
     * @return array
686
     * @throws StrictException
687
     */
688 246
    protected function parseTag(): array
689
    {
690
        $return = [
691 246
            'status'  => false,
692
            'closing' => false,
693
            'node'    => null,
694
        ];
695 246
        if ($this->content->char() != '<') {
696
            // we are not at the beginning of a tag
697 237
            return $return;
698
        }
699
700
        // check if this is a closing tag
701 240
        if ($this->content->fastForward(1)->char() == '/') {
702
            // end tag
703 231
            $tag = $this->content->fastForward(1)
704 231
                                 ->copyByToken('slash', true);
705
            // move to end of tag
706 231
            $this->content->copyUntil('>');
707 231
            $this->content->fastForward(1);
708
709
            // check if this closing tag counts
710 231
            $tag = strtolower($tag);
711 231
            if (in_array($tag, $this->selfClosing, true)) {
712 12
                $return['status'] = true;
713
714 12
                return $return;
715
            } else {
716 231
                $return['status']  = true;
717 231
                $return['closing'] = true;
718 231
                $return['tag']     = strtolower($tag);
719
            }
720
721 231
            return $return;
722
        }
723
724 240
        $tag  = strtolower($this->content->copyByToken('slash', true));
725 240
        if (trim($tag) == '')
726
        {
727
            // no tag found, invalid < found
728 3
            return $return;
729
        }
730 240
        $node = new HtmlNode($tag);
731 240
        $node->setHtmlSpecialCharsDecode($this->options->htmlSpecialCharsDecode);
732
733
        // attributes
734 240
        while ($this->content->char() != '>' &&
735 240
            $this->content->char() != '/') {
736 231
            $space = $this->content->skipByToken('blank', true);
737 231
            if (empty($space)) {
738 6
                $this->content->fastForward(1);
739 6
                continue;
740
            }
741
742 231
            $name = $this->content->copyByToken('equal', true);
743 231
            if ($name == '/') {
744
                break;
745
            }
746
747 231
            if (empty($name)) {
748 123
				$this->content->skipByToken('blank');
749 123
				continue;
750
            }
751
752 228
            $this->content->skipByToken('blank');
753 228
            if ($this->content->char() == '=') {
754 228
                $attr = [];
755 228
                $this->content->fastForward(1)
756 228
                              ->skipByToken('blank');
757 228
                switch ($this->content->char()) {
758 228
                    case '"':
759 213
                        $attr['doubleQuote'] = true;
760 213
                        $this->content->fastForward(1);
761 213
                        $string = $this->content->copyUntil('"', true);
762
                        do {
763 213
                            $moreString = $this->content->copyUntilUnless('"', '=>');
764 213
                            $string .= $moreString;
765 213
                        } while ( ! empty($moreString));
766 213
                        $attr['value'] = $string;
767 213
                        $this->content->fastForward(1);
768 213
                        $node->getTag()->$name = $attr;
769 213
                        break;
770 21
                    case "'":
771 18
                        $attr['doubleQuote'] = false;
772 18
                        $this->content->fastForward(1);
773 18
                        $string = $this->content->copyUntil("'", true);
774
                        do {
775 18
                            $moreString = $this->content->copyUntilUnless("'", '=>');
776 18
                            $string .= $moreString;
777 18
                        } while ( ! empty($moreString));
778 18
                        $attr['value'] = $string;
779 18
                        $this->content->fastForward(1);
780 18
                        $node->getTag()->$name = $attr;
781 18
                        break;
782
                    default:
783 3
                        $attr['doubleQuote']   = true;
784 3
                        $attr['value']         = $this->content->copyByToken('attr', true);
785 3
                        $node->getTag()->$name = $attr;
786 228
                        break;
787
                }
788
            } else {
789
                // no value attribute
790 72
                if ($this->options->strict) {
791
                    // can't have this in strict html
792 3
                    $character = $this->content->getPosition();
793 3
                    throw new StrictException("Tag '$tag' has an attribute '$name' with out a value! (character #$character)");
794
                }
795 69
                $node->getTag()->$name = [
796
                    'value'       => null,
797
                    'doubleQuote' => true,
798
                ];
799 69
                if ($this->content->char() != '>') {
800 12
                    $this->content->rewind(1);
801
                }
802
            }
803
        }
804
805 240
        $this->content->skipByToken('blank');
806 240
        $tag = strtolower($tag);
807 240
        if ($this->content->char() == '/') {
808
            // self closing tag
809 120
            $node->getTag()->selfClosing();
810 120
            $this->content->fastForward(1);
811 237
        } elseif (in_array($tag, $this->selfClosing, true)) {
812
813
            // Should be a self closing tag, check if we are strict
814 84
            if ($this->options->strict) {
815 3
                $character = $this->content->getPosition();
816 3
                throw new StrictException("Tag '$tag' is not self closing! (character #$character)");
817
            }
818
819
            // We force self closing on this tag.
820 81
            $node->getTag()->selfClosing();
821
822
            // Should this tag use a trailing slash?
823 81
            if(in_array($tag, $this->noSlash, true))
824
            {
825 3
                $node->getTag()->noTrailingSlash();
826
            }
827
828
        }
829
830 240
        $this->content->fastForward(1);
831
832 240
        $return['status'] = true;
833 240
        $return['node']   = $node;
834
835 240
        return $return;
836
    }
837
838
    /**
839
     * Attempts to detect the charset that the html was sent in.
840
     *
841
     * @return bool
842
     * @throws ChildNotFoundException
843
     */
844 240
    protected function detectCharset(): bool
845
    {
846
        // set the default
847 240
        $encode = new Encode;
848 240
        $encode->from($this->defaultCharset);
849 240
        $encode->to($this->defaultCharset);
850
851 240
        $enforceEncoding = $this->options->enforceEncoding;
852 240
        if ( ! is_null($enforceEncoding)) {
853
            //  they want to enforce the given encoding
854
            $encode->from($enforceEncoding);
855
            $encode->to($enforceEncoding);
856
857
            return false;
858
        }
859
860
        /** @var AbstractNode $meta */
861 240
        $meta = $this->root->find('meta[http-equiv=Content-Type]', 0);
862 240
        if (is_null($meta)) {
863
            // could not find meta tag
864 210
            $this->root->propagateEncoding($encode);
865
866 210
            return false;
867
        }
868 30
        $content = $meta->getAttribute('content');
0 ignored issues
show
Bug introduced by
Are you sure the assignment to $content is correct as $meta->getAttribute('content') targeting PHPHtmlParser\Dom\AbstractNode::getAttribute() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
869 30
        if (is_null($content)) {
0 ignored issues
show
introduced by
The condition is_null($content) is always true.
Loading history...
870
            // could not find content
871
            $this->root->propagateEncoding($encode);
872
873
            return false;
874
        }
875 30
        $matches = [];
876 30
        if (preg_match('/charset=(.+)/', $content, $matches)) {
877 30
            $encode->from(trim($matches[1]));
878 30
            $this->root->propagateEncoding($encode);
879
880 30
            return true;
881
        }
882
883
        // no charset found
884
        $this->root->propagateEncoding($encode);
885
886
        return false;
887
    }
888
}
889