Failed Conditions
Push — master ( b01a48...e5185e )
by Adrien
190:50 queued 106:17
created

XmlScanner::__construct()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 5
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 10
ccs 6
cts 6
cp 1
crap 2
rs 10
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
4
5
use PhpOffice\PhpSpreadsheet\Reader;
6
use PhpOffice\PhpSpreadsheet\Settings;
7
8
class XmlScanner
9
{
10
    /**
11
     * String used to identify risky xml elements.
12
     *
13
     * @var string
14
     */
15
    private $pattern;
16
17
    private $callback;
18
19
    private static $libxmlDisableEntityLoaderValue;
20
21
    /**
22
     * @var bool
23
     */
24
    private static $shutdownRegistered = false;
25
26 421
    public function __construct($pattern = '<!DOCTYPE')
27
    {
28 421
        $this->pattern = $pattern;
29
30 421
        $this->disableEntityLoaderCheck();
31
32
        // A fatal error will bypass the destructor, so we register a shutdown here
33 421
        if (!self::$shutdownRegistered) {
34 18
            self::$shutdownRegistered = true;
35 18
            register_shutdown_function([__CLASS__, 'shutdown']);
36
        }
37 421
    }
38
39 420
    public static function getInstance(Reader\IReader $reader)
40
    {
41
        switch (true) {
42 420
            case $reader instanceof Reader\Html:
43 179
                return new self('<!ENTITY');
44 244
            case $reader instanceof Reader\Xlsx:
45 101
            case $reader instanceof Reader\Xml:
46 55
            case $reader instanceof Reader\Ods:
47 20
            case $reader instanceof Reader\Gnumeric:
48 244
                return new self('<!DOCTYPE');
49
            default:
50
                return new self('<!DOCTYPE');
51
        }
52
    }
53
54
    public static function threadSafeLibxmlDisableEntityLoaderAvailability()
55
    {
56
        if (PHP_MAJOR_VERSION == 7) {
57
            switch (PHP_MINOR_VERSION) {
58
                case 2:
59
                    return PHP_RELEASE_VERSION >= 1;
60
                case 1:
61
                    return PHP_RELEASE_VERSION >= 13;
62
                case 0:
63
                    return PHP_RELEASE_VERSION >= 27;
64
            }
65
66
            return true;
67
        }
68
69
        return false;
70
    }
71
72 421
    private function disableEntityLoaderCheck(): void
73
    {
74 421
        if (Settings::getLibXmlDisableEntityLoader() && \PHP_VERSION_ID < 80000) {
75 421
            $libxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);
76
77 421
            if (self::$libxmlDisableEntityLoaderValue === null) {
78 421
                self::$libxmlDisableEntityLoaderValue = $libxmlDisableEntityLoaderValue;
79
            }
80
        }
81 421
    }
82
83 421
    public static function shutdown(): void
84
    {
85 421
        if (self::$libxmlDisableEntityLoaderValue !== null && \PHP_VERSION_ID < 80000) {
86 421
            libxml_disable_entity_loader(self::$libxmlDisableEntityLoaderValue);
87 421
            self::$libxmlDisableEntityLoaderValue = null;
88
        }
89 421
    }
90
91 421
    public function __destruct()
92
    {
93 421
        self::shutdown();
94 421
    }
95
96 1
    public function setAdditionalCallback(callable $callback): void
97
    {
98 1
        $this->callback = $callback;
99 1
    }
100
101 386
    private function toUtf8($xml)
102
    {
103 386
        $pattern = '/encoding="(.*?)"/';
104 386
        $result = preg_match($pattern, $xml, $matches);
105 386
        $charset = strtoupper($result ? $matches[1] : 'UTF-8');
106
107 386
        if ($charset !== 'UTF-8') {
108 4
            $xml = mb_convert_encoding($xml, 'UTF-8', $charset);
109
110 4
            $result = preg_match($pattern, $xml, $matches);
1 ignored issue
show
Bug introduced by
It seems like $xml can also be of type array; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

110
            $result = preg_match($pattern, /** @scrutinizer ignore-type */ $xml, $matches);
Loading history...
111 4
            $charset = strtoupper($result ? $matches[1] : 'UTF-8');
112 4
            if ($charset !== 'UTF-8') {
113 4
                throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
114
            }
115
        }
116
117 382
        return $xml;
118
    }
119
120
    /**
121
     * Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
122
     *
123
     * @param mixed $xml
124
     *
125
     * @return string
126
     */
127 386
    public function scan($xml)
128
    {
129 386
        $this->disableEntityLoaderCheck();
130
131 386
        $xml = $this->toUtf8($xml);
132
133
        // Don't rely purely on libxml_disable_entity_loader()
134 382
        $pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
1 ignored issue
show
Bug introduced by
It seems like str_split($this->pattern) can also be of type true; however, parameter $pieces of implode() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

134
        $pattern = '/\\0?' . implode('\\0?', /** @scrutinizer ignore-type */ str_split($this->pattern)) . '\\0?/';
Loading history...
135
136 382
        if (preg_match($pattern, $xml)) {
1 ignored issue
show
Bug introduced by
It seems like $xml can also be of type array; however, parameter $subject of preg_match() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

136
        if (preg_match($pattern, /** @scrutinizer ignore-type */ $xml)) {
Loading history...
137 8
            throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
138
        }
139
140 374
        if ($this->callback !== null && is_callable($this->callback)) {
141 1
            $xml = call_user_func($this->callback, $xml);
142
        }
143
144 374
        return $xml;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $xml also could return the type array which is incompatible with the documented return type string.
Loading history...
145
    }
146
147
    /**
148
     * Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
149
     *
150
     * @param string $filestream
151
     *
152
     * @return string
153
     */
154 182
    public function scanFile($filestream)
155
    {
156 182
        return $this->scan(file_get_contents($filestream));
157
    }
158
}
159