Sanitizer   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 112
Duplicated Lines 0 %

Test Coverage

Coverage 92.86%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 12
eloc 28
c 1
b 0
f 0
dl 0
loc 112
ccs 26
cts 28
cp 0.9286
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A doSanitize() 0 25 4
A __construct() 0 10 2
A isValidUtf8() 0 4 2
A sanitize() 0 11 3
A create() 0 6 1
1
<?php
2
/**
3
 * (c) Steve Nebes <[email protected]>.
4
 *
5
 *  For the full copyright and license information, please view the LICENSE
6
 *  file that was distributed with this source code.
7
 */
8
9
declare(strict_types=1);
10
11
namespace SN\HtmlSanitizer;
12
13
use Psr\Log\NullLogger;
14
use SN\HtmlSanitizer\Extension\HTML5Extension;
15
use SN\HtmlSanitizer\Parser\MastermindsParser;
16
use SN\HtmlSanitizer\Parser\ParserInterface;
17
use Psr\Log\LoggerInterface;
18
19
/**
20
 * @author Steve Nebes <[email protected]>
21
 *
22
 * @final
23
 */
24
class Sanitizer
25
{
26
    /**
27
     * @var DomVisitor
28
     */
29
    private $domVisitor;
30
31
    /**
32
     * @var int
33
     */
34
    private $maxInputLength;
35
36
    /**
37
     * @var ParserInterface
38
     */
39
    private $parser;
40
41
    /**
42
     * @var LoggerInterface|null
43
     */
44
    private $logger;
45
46
    /**
47
     * Default values.
48
     *
49
     * @param DomVisitor           $domVisitor
50
     * @param int                  $maxInputLength
51
     * @param ParserInterface|null $parser
52
     * @param LoggerInterface|null $logger
53
     */
54 5
    public function __construct(
55
        DomVisitor $domVisitor,
56
        int $maxInputLength,
57
        ?ParserInterface $parser = null,
58
        ?LoggerInterface $logger = null
59
    ) {
60 5
        $this->domVisitor = $domVisitor;
61 5
        $this->maxInputLength = $maxInputLength;
62 5
        $this->parser = $parser ?: new MastermindsParser();
63 5
        $this->logger = $logger;
64 5
    }
65
66
    /**
67
     * Quickly create an already configured sanitizer using the default builder.
68
     *
69
     * @param array $config
70
     * @return Sanitizer
71
     */
72 3
    public static function create(array $config): Sanitizer
73
    {
74 3
        $builder = new SanitizerBuilder();
75 3
        $builder->registerExtension(new HTML5Extension());
76
77 3
        return $builder->build($config);
78
    }
79
80
    /**
81
     * @param string $html
82
     * @return string
83
     */
84 4
    public function sanitize(string $html): string
85
    {
86 4
        $sanitized = $this->doSanitize($html);
87
88 4
        if ($this->logger) {
89 1
            $this->logger->debug('Sanitized given input to "{output}".', [
90 1
                'output' => \mb_substr($sanitized, 0, 50).(\mb_strlen($sanitized) > 50 ? '...' : ''),
91
            ]);
92
        }
93
94 4
        return $sanitized;
95
    }
96
97
    /**
98
     * @param string $html
99
     * @return string
100
     */
101 4
    private function doSanitize(string $html): string
102
    {
103
        // Prevent DOS attack induced by extremely long HTML strings
104 4
        if (\mb_strlen($html) > $this->maxInputLength) {
105 1
            $html = \mb_substr($html, 0, $this->maxInputLength);
106
        }
107
108
        /*
109
         * Only operate on valid UTF-8 strings. This is necessary to prevent cross
110
         * site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
111
         */
112 4
        if (!$this->isValidUtf8($html)) {
113 1
            return '';
114
        }
115
116
        // Remove NULL character
117 3
        $html = \str_replace(\chr(0), '', $html);
118
119
        try {
120 3
            $parsed = $this->parser->parse($html);
121
        } catch (\Exception $exception) {
122
            return '';
123
        }
124
125 3
        return $this->domVisitor->visit($parsed)->render();
126
    }
127
128
    /**
129
     * @param string $html
130
     * @return bool
131
     */
132 4
    private function isValidUtf8(string $html): bool
133
    {
134
        // preg_match() fails silently on strings containing invalid UTF-8.
135 4
        return '' === $html || 1 === \preg_match('/^./us', $html);
136
    }
137
}
138