Completed
Push — improve_xlsx_escaper_perf ( 994fe3 )
by Adrien
03:08
created

XLSX::escape()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
ccs 4
cts 4
cp 1
cc 1
eloc 4
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Box\Spout\Common\Escaper;
4
5
use Box\Spout\Common\Singleton;
6
7
/**
8
 * Class XLSX
9
 * Provides functions to escape and unescape data for XLSX files
10
 *
11
 * @package Box\Spout\Common\Escaper
12
 */
13
class XLSX implements EscaperInterface
14
{
15
    use Singleton;
16
17
    /** @var string Regex pattern to detect control characters that need to be escaped */
18
    protected $escapableControlCharactersPattern;
19
20
    /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
21
    protected $controlCharactersEscapingMap;
22
23
    /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
24
    protected $controlCharactersEscapingReverseMap;
25
26
    /**
27
     * Initializes the singleton instance
28
     */
1 ignored issue
show
introduced by
Instead of declaring the constructor as final, maybe you should declare the entire class as final.
Loading history...
29 3
    protected function init()
30
    {
31 3
        $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
32 3
        $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
33 3
        $this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
34 3
    }
35
36
    /**
37
     * Escapes the given string to make it compatible with XLSX
38
     *
39
     * @param string $string The string to escape
40
     * @return string The escaped string
41
     */
42 117
    public function escape($string)
43
    {
44 117
        $escapedString = $this->escapeControlCharacters($string);
45 117
        $escapedString = htmlspecialchars($escapedString, ENT_QUOTES);
46
47 117
        return $escapedString;
48
    }
49
50
    /**
51
     * Unescapes the given string to make it compatible with XLSX
52
     *
53
     * @param string $string The string to unescape
54
     * @return string The unescaped string
55
     */
56 150
    public function unescape($string)
57
    {
58 150
        $unescapedString = htmlspecialchars_decode($string, ENT_QUOTES);
59 150
        $unescapedString = $this->unescapeControlCharacters($unescapedString);
60
61 150
        return $unescapedString;
62
    }
63
64
    /**
65
     * @return string Regex pattern containing all escapable control characters
66
     */
67 3
    protected function getEscapableControlCharactersPattern()
68
    {
69
        // control characters values are from 0 to 1F (hex values) in the ASCII table
70
        // some characters should not be escaped though: "\t", "\r" and "\n".
71
        return '[\x00-\x08' .
72
                // skipping "\t" (0x9) and "\n" (0xA)
73 3
                '\x0B-\x0C' .
74
                // skipping "\r" (0xD)
75 3
                '\x0E-\x1F]';
76
    }
77
78
    /**
79
     * Builds the map containing control characters to be escaped
80
     * mapped to their escaped values.
81
     * "\t", "\r" and "\n" don't need to be escaped.
82
     *
83
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
84
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
85
     *
86
     * @return string[]
87
     */
88 3
    protected function getControlCharactersEscapingMap()
89
    {
90 3
        $controlCharactersEscapingMap = [];
91
92
        // control characters values are from 0 to 1F (hex values) in the ASCII table
93 3
        for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
94 3
            $character = chr($charValue);
95 3
            if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
96 3
                $charHexValue = dechex($charValue);
97 3
                $escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
98 3
                $controlCharactersEscapingMap[$escapedChar] = $character;
99 3
            }
100 3
        }
101
102 3
        return $controlCharactersEscapingMap;
103
    }
104
105
    /**
106
     * Converts PHP control characters from the given string to OpenXML escaped control characters
107
     *
108
     * Excel escapes control characters with _xHHHH_ and also escapes any
109
     * literal strings of that type by encoding the leading underscore.
110
     * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
111
     *
112
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
113
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
114
     *
115
     * @param string $string String to escape
116
     * @return string
117
     */
118 117
    protected function escapeControlCharacters($string)
119
    {
120 117
        $escapedString = $this->escapeEscapeCharacter($string);
121
122
        // if no control characters
123 117
        if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
124 102
            return $escapedString;
125
        }
126
127 18
        return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
128 18
            return $this->controlCharactersEscapingReverseMap[$matches[0]];
129 18
        }, $escapedString);
130
    }
131
132
    /**
133
     * Escapes the escape character: "_x0000_" -> "_x005F_x0000_"
134
     *
135
     * @param string $string String to escape
136
     * @return string The escaped string
137
     */
138 117
    protected function escapeEscapeCharacter($string)
139
    {
140 117
        return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
141
    }
142
143
    /**
144
     * Converts OpenXML escaped control characters from the given string to PHP control characters
145
     *
146
     * Excel escapes control characters with _xHHHH_ and also escapes any
147
     * literal strings of that type by encoding the leading underscore.
148
     * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_"
149
     *
150
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
151
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
152
     *
153
     * @param string $string String to unescape
154
     * @return string
155
     */
156 150
    protected function unescapeControlCharacters($string)
157
    {
158 150
        $unescapedString = $string;
159
160 150
        foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
161
            // only unescape characters that don't contain the escaped escape character for now
162 150
            $unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
163 150
        }
164
165 150
        return $this->unescapeEscapeCharacter($unescapedString);
166
    }
167
168
    /**
169
     * Unecapes the escape character: "_x005F_x0000_" => "_x0000_"
170
     *
171
     * @param string $string String to unescape
172
     * @return string The unescaped string
173
     */
174 150
    protected function unescapeEscapeCharacter($string)
175
    {
176 150
        return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
177
    }
178
}
179