Completed
Push — develop ( f28289...e8c25c )
by Adrien
130:26 queued 124:41
created

XmlScanner::getInstance()   A

Complexity

Conditions 6
Paths 6

Size

Total Lines 12
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 6.0493

Importance

Changes 0
Metric Value
cc 6
eloc 10
nc 6
nop 1
dl 0
loc 12
ccs 8
cts 9
cp 0.8889
crap 6.0493
rs 9.2222
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader\Security;
4
5
use PhpOffice\PhpSpreadsheet\Reader;
6
7
class XmlScanner
8
{
9
    /**
10
     * Identifies whether the thread-safe libxmlDisableEntityLoader() function is available.
11
     *
12
     * @var bool
13
     */
14
    private $libxmlDisableEntityLoader = false;
15
16
    /**
17
     * String used to identify risky xml elements.
18
     *
19
     * @var string
20
     */
21
    private $pattern;
22
23
    private $callback;
24
25 110
    private function __construct($pattern = '<!DOCTYPE')
26
    {
27 110
        $this->pattern = $pattern;
28 110
        $this->libxmlDisableEntityLoader = $this->identifyLibxmlDisableEntityLoaderAvailability();
29 110
    }
30
31 110
    public static function getInstance(Reader\IReader $reader)
32
    {
33
        switch (true) {
34 110
            case $reader instanceof Reader\Html:
35 18
                return new self('<!ENTITY');
36 92
            case $reader instanceof Reader\Xlsx:
37 47
            case $reader instanceof Reader\Xml:
38 20
            case $reader instanceof Reader\Ods:
39 4
            case $reader instanceof Reader\Gnumeric:
40 92
                return new self('<!DOCTYPE');
41
            default:
42
                return new self('<!DOCTYPE');
43
        }
44
    }
45
46 110
    private function identifyLibxmlDisableEntityLoaderAvailability()
47
    {
48 110
        if (PHP_MAJOR_VERSION == 7) {
49 110
            switch (PHP_MINOR_VERSION) {
50 110
                case 2:
51 110
                    return PHP_RELEASE_VERSION >= 1;
52
                case 1:
53
                    return PHP_RELEASE_VERSION >= 13;
54
                case 0:
55
                    return PHP_RELEASE_VERSION >= 27;
56
            }
57
58
            return true;
59
        }
60
61
        return false;
62
    }
63
64 1
    public function setAdditionalCallback(callable $callback)
65
    {
66 1
        $this->callback = $callback;
67 1
    }
68
69
    /**
70
     * Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
71
     *
72
     * @param mixed $xml
73
     *
74
     * @throws Reader\Exception
75
     *
76
     * @return string
77
     */
78 88
    public function scan($xml)
79
    {
80 88
        if ($this->libxmlDisableEntityLoader) {
81 88
            $previousLibxmlDisableEntityLoaderValue = libxml_disable_entity_loader(true);
82
        }
83
84 88
        $pattern = '/encoding="(.*?)"/';
85 88
        $result = preg_match($pattern, $xml, $matches);
86 88
        $charset = $result ? $matches[1] : 'UTF-8';
87
88 88
        if ($charset !== 'UTF-8') {
89 2
            $xml = mb_convert_encoding($xml, 'UTF-8', $charset);
90
        }
91
92
        // Don't rely purely on libxml_disable_entity_loader()
93 88
        $pattern = '/\\0?' . implode('\\0?', str_split($this->pattern)) . '\\0?/';
94
95
        try {
96 88
            if (preg_match($pattern, $xml)) {
97 10
                throw new Reader\Exception('Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
98
            }
99
100 78
            if ($this->callback !== null && is_callable($this->callback)) {
101 1
                $xml = call_user_func($this->callback, $xml);
102
            }
103 78
        } finally {
104 88
            if ($this->libxmlDisableEntityLoader) {
105 88
                libxml_disable_entity_loader($previousLibxmlDisableEntityLoaderValue);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $previousLibxmlDisableEntityLoaderValue does not seem to be defined for all execution paths leading up to this point.
Loading history...
106
            }
107
        }
108
109 78
        return $xml;
110
    }
111
112
    /**
113
     * Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks.
114
     *
115
     * @param string $filestream
116
     *
117
     * @throws Reader\Exception
118
     *
119
     * @return string
120
     */
121 30
    public function scanFile($filestream)
122
    {
123 30
        return $this->scan(file_get_contents($filestream));
124
    }
125
}
126