Passed
Pull Request — master (#4119)
by Adrien
15:02
created

XmlScanner::toUtf8()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 7
dl 0
loc 13
ccs 10
cts 10
cp 1
rs 10
c 0
b 0
f 0
cc 3
nc 3
nop 1
crap 3
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
4
5
use PhpOffice\PhpSpreadsheet\Reader;
6
7
class XmlScanner
8
{
9
    private string $pattern;
10
11
    /** @var ?callable */
12
    private $callback;
13
14 1296
    public function __construct(string $pattern = '<!DOCTYPE')
15
    {
16 1296
        $this->pattern = $pattern;
17
    }
18
19 1295
    public static function getInstance(Reader\IReader $reader): self
20
    {
21 1295
        $pattern = ($reader instanceof Reader\Html) ? '<!ENTITY' : '<!DOCTYPE';
22
23 1295
        return new self($pattern);
24
    }
25
26 1
    public function setAdditionalCallback(callable $callback): void
27
    {
28 1
        $this->callback = $callback;
29
    }
30
31 2
    private static function forceString(mixed $arg): string
32
    {
33 2
        return is_string($arg) ? $arg : '';
34
    }
35
36 1248
    private function toUtf8(string $xml): string
37
    {
38 1248
        $charset = $this->findCharSet($xml);
39 1248
        if ($charset !== 'UTF-8') {
40 1248
            $xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset));
41
42 1248
            $charset = $this->findCharSet($xml);
43 2
            if ($charset !== 'UTF-8') {
44
                throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
45 2
            }
46 2
        }
47 2
48 2
        return $xml;
49
    }
50
51
    private function findCharSet(string $xml): string
52 1246
    {
53
        $patterns = [
54
            '/encoding="([^"]*]?)"/',
55
            "/encoding='([^']*?)'/",
56
        ];
57
58
        foreach ($patterns as $pattern) {
59
            if (preg_match($pattern, $xml, $matches)) {
60 1248
                return strtoupper($matches[1]);
61
            }
62 1248
        }
63
64 1248
        return 'UTF-8';
65
    }
66
67 1246
    /**
68
     * Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
69 1246
     *
70 5
     * @param false|string $xml
71
     */
72
    public function scan($xml): string
73 1241
    {
74 1
        $xml = "$xml";
75
76
        $xml = $this->toUtf8($xml);
77 1241
78
        // Don't rely purely on libxml_disable_entity_loader()
79
        $pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
80
81
        if (preg_match($pattern, $xml)) {
82
            throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
83 470
        }
84
85 470
        if ($this->callback !== null) {
86
            $xml = call_user_func($this->callback, $xml);
87
        }
88
89
        return $xml;
90
    }
91
92
    /**
93
     * Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
94
     */
95
    public function scanFile(string $filestream): string
96
    {
97
        return $this->scan(file_get_contents($filestream));
98
    }
99
}
100