Passed
Branch dev/3.0.0 (c487fc)
by Gilles
01:48
created

Cleaner   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 93
Duplicated Lines 0 %

Test Coverage

Coverage 70.83%

Importance

Changes 0
Metric Value
eloc 48
c 0
b 0
f 0
dl 0
loc 93
ccs 34
cts 48
cp 0.7083
rs 10
wmc 19

1 Method

Rating   Name   Duplication   Size   Complexity  
D clean() 0 86 19
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PHPHtmlParser\Dom;
6
7
use PHPHtmlParser\Contracts\Dom\CleanerInterface;
8
use PHPHtmlParser\Exceptions\LogicalException;
9
use PHPHtmlParser\Options;
10
11
class Cleaner implements CleanerInterface
12
{
13
    /**
14
     * Cleans the html of any none-html information.
15
     *
16
     * @throws LogicalException
17
     */
18 285
    public function clean(string $str, Options $options): string
19
    {
20 285
        if (!$options->isCleanupInput()) {
21
            // skip entire cleanup step
22 12
            return $str;
23
        }
24
25 273
        $is_gzip = 0 === \mb_strpos($str, "\x1f" . "\x8b" . "\x08", 0, 'US-ASCII');
26 273
        if ($is_gzip) {
27
            $str = \gzdecode($str);
28
            if ($str === false) {
29
                throw new LogicalException('gzdecode returned false. Error when trying to decode the string.');
30
            }
31
        }
32
33
        // remove white space before closing tags
34 273
        $str = \mb_eregi_replace("'\s+>", "'>", $str);
35 273
        if ($str === false) {
36
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean single quotes.');
37
        }
38 273
        $str = \mb_eregi_replace('"\s+>', '">', $str);
39 273
        if ($str === false) {
40
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to clean double quotes.');
41
        }
42
43
        // clean out the \n\r
44 273
        $replace = ' ';
45 273
        if ($options->isPreserveLineBreaks()) {
46 3
            $replace = '&#10;';
47
        }
48 273
        $str = \str_replace(["\r\n", "\r", "\n"], $replace, $str);
49 273
        if ($str === false) {
50
            throw new LogicalException('str_replace returned false instead of a string. Error when attempting to clean input string.');
51
        }
52
53
        // strip the doctype
54 273
        $str = \mb_eregi_replace('<!doctype(.*?)>', '', $str);
55 273
        if ($str === false) {
56
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip the doctype.');
57
        }
58
59
        // strip out comments
60 273
        $str = \mb_eregi_replace('<!--(.*?)-->', '', $str);
61 273
        if ($str === false) {
62
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip comments.');
63
        }
64
65
        // strip out cdata
66 273
        $str = \mb_eregi_replace("<!\[CDATA\[(.*?)\]\]>", '', $str);
67 273
        if ($str === false) {
68
            throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out cdata.');
69
        }
70
71
        // strip out <script> tags
72 273
        if ($options->isRemoveScripts()) {
73 270
            $str = \mb_eregi_replace("<\s*script[^>]*[^/]>(.*?)<\s*/\s*script\s*>", '', $str);
74 270
            if ($str === false) {
75
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 1.');
76
            }
77 270
            $str = \mb_eregi_replace("<\s*script\s*>(.*?)<\s*/\s*script\s*>", '', $str);
78 270
            if ($str === false) {
79
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove scripts 2.');
80
            }
81
        }
82
83
        // strip out <style> tags
84 273
        if ($options->isRemoveStyles()) {
85 270
            $str = \mb_eregi_replace("<\s*style[^>]*[^/]>(.*?)<\s*/\s*style\s*>", '', $str);
86 270
            if ($str === false) {
87
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 1.');
88
            }
89 270
            $str = \mb_eregi_replace("<\s*style\s*>(.*?)<\s*/\s*style\s*>", '', $str);
90 270
            if ($str === false) {
91
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to strip out style tags 2.');
92
            }
93
        }
94
95
        // strip smarty scripts
96 273
        if ($options->isRemoveSmartyScripts()) {
97 270
            $str = \mb_eregi_replace("(\{\w)(.*?)(\})", '', $str);
98 270
            if ($str === false) {
99
                throw new LogicalException('mb_eregi_replace returned false instead of a string. Error when attempting to remove smarty scripts.');
100
            }
101
        }
102
103 273
        return $str;
104
    }
105
106
107
}
108
109