Sanitizer   A
last analyzed

Complexity

Total Complexity 12

Size/Duplication

Total Lines 108
Duplicated Lines 0 %

Test Coverage

Coverage 92.86%

Importance

Changes 0
Metric Value
wmc 12
eloc 28
dl 0
loc 108
ccs 26
cts 28
cp 0.9286
rs 10
c 0
b 0
f 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A doSanitize() 0 25 4
A __construct() 0 6 2
A isValidUtf8() 0 4 2
A sanitize() 0 11 3
A create() 0 6 1
1
<?php
2
/**
3
 * (c) Steve Nebes <[email protected]>.
4
 *
5
 *  For the full copyright and license information, please view the LICENSE
6
 *  file that was distributed with this source code.
7
 */
8
9
declare(strict_types=1);
10
11
namespace SN\HtmlSanitizer;
12
13
use Psr\Log\NullLogger;
14
use SN\HtmlSanitizer\Extension\HTML5Extension;
15
use SN\HtmlSanitizer\Parser\MastermindsParser;
16
use SN\HtmlSanitizer\Parser\ParserInterface;
17
use Psr\Log\LoggerInterface;
18
19
/**
20
 * @author Steve Nebes <[email protected]>
21
 *
22
 * @final
23
 */
24
class Sanitizer
25
{
26
    /**
27
     * @var DomVisitor
28
     */
29
    private $domVisitor;
30
31
    /**
32
     * @var int
33
     */
34
    private $maxInputLength;
35
36
    /**
37
     * @var ParserInterface
38
     */
39
    private $parser;
40
41
    /**
42
     * @var LoggerInterface|null
43
     */
44
    private $logger;
45
46
    /**
47
     * Default values.
48
     *
49
     * @param DomVisitor           $domVisitor
50
     * @param int                  $maxInputLength
51
     * @param ParserInterface|null $parser
52
     * @param LoggerInterface|null $logger
53
     */
54 5
    public function __construct(DomVisitor $domVisitor, int $maxInputLength, ParserInterface $parser = null, LoggerInterface $logger = null)
55
    {
56 5
        $this->domVisitor = $domVisitor;
57 5
        $this->maxInputLength = $maxInputLength;
58 5
        $this->parser = $parser ?: new MastermindsParser();
59 5
        $this->logger = $logger;
60 5
    }
61
62
    /**
63
     * Quickly create an already configured sanitizer using the default builder.
64
     *
65
     * @param array $config
66
     * @return Sanitizer
67
     */
68 3
    public static function create(array $config): Sanitizer
69
    {
70 3
        $builder = new SanitizerBuilder();
71 3
        $builder->registerExtension(new HTML5Extension());
72
73 3
        return $builder->build($config);
74
    }
75
76
    /**
77
     * @param string $html
78
     * @return string
79
     */
80 4
    public function sanitize(string $html): string
81
    {
82 4
        $sanitized = $this->doSanitize($html);
83
84 4
        if ($this->logger) {
85 1
            $this->logger->debug('Sanitized given input to "{output}".', [
86 1
                'output' => \mb_substr($sanitized, 0, 50).(\mb_strlen($sanitized) > 50 ? '...' : ''),
87
            ]);
88
        }
89
90 4
        return $sanitized;
91
    }
92
93
    /**
94
     * @param string $html
95
     * @return string
96
     */
97 4
    private function doSanitize(string $html): string
98
    {
99
        // Prevent DOS attack induced by extremely long HTML strings
100 4
        if (\mb_strlen($html) > $this->maxInputLength) {
101 1
            $html = \mb_substr($html, 0, $this->maxInputLength);
102
        }
103
104
        /*
105
         * Only operate on valid UTF-8 strings. This is necessary to prevent cross
106
         * site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
107
         */
108 4
        if (!$this->isValidUtf8($html)) {
109 1
            return '';
110
        }
111
112
        // Remove NULL character
113 3
        $html = \str_replace(\chr(0), '', $html);
114
115
        try {
116 3
            $parsed = $this->parser->parse($html);
117
        } catch (\Exception $exception) {
118
            return '';
119
        }
120
121 3
        return $this->domVisitor->visit($parsed)->render();
122
    }
123
124
    /**
125
     * @param string $html
126
     * @return bool
127
     */
128 4
    private function isValidUtf8(string $html): bool
129
    {
130
        // preg_match() fails silently on strings containing invalid UTF-8.
131 4
        return '' === $html || 1 === \preg_match('/^./us', $html);
132
    }
133
}
134