Failed Conditions
Push — perf-tests ( db6806...778b2f )
by Adrien
13:47
created

XLSX::getEscapableControlCharactersPattern()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 10
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 10
rs 9.4285
cc 1
eloc 4
nc 1
nop 0
1
<?php
2
3
namespace Box\Spout\Common\Escaper;
4
5
use Box\Spout\Common\Singleton;
6
7
/**
8
 * Class XLSX
9
 * Provides functions to escape and unescape data for XLSX files
10
 *
11
 * @package Box\Spout\Common\Escaper
12
 */
13
class XLSX implements EscaperInterface
14
{
15
    use Singleton;
16
17
    /** @var string Regex pattern to detect control characters that need to be escaped */
18
    protected $escapableControlCharactersPattern;
19
20
    /** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
21
    protected $controlCharactersEscapingMap;
22
23
    /** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
24
    protected $controlCharactersEscapingReverseMap;
25
26
    /**
27
     * Initializes the singleton instance
28
     */
1 ignored issue
show
introduced by
Instead of declaring the constructor as final, maybe you should declare the entire class as final.
Loading history...
29
    protected function init()
30
    {
31
        $this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
32
        $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
33
        $this->controlCharactersEscapingReverseMap = array_flip($this->controlCharactersEscapingMap);
34
    }
35
36
    /**
37
     * Escapes the given string to make it compatible with XLSX
38
     *
39
     * @param string $string The string to escape
40
     * @return string The escaped string
41
     */
42
    public function escape($string)
43
    {
44
        $escapedString = $this->escapeControlCharacters($string);
45
        $escapedString = htmlspecialchars($escapedString, ENT_QUOTES);
46
47
        return $escapedString;
48
    }
49
50
    /**
51
     * Unescapes the given string to make it compatible with XLSX
52
     *
53
     * @param string $string The string to unescape
54
     * @return string The unescaped string
55
     */
56
    public function unescape($string)
57
    {
58
        $unescapedString = htmlspecialchars_decode($string, ENT_QUOTES);
59
        $unescapedString = $this->unescapeControlCharacters($unescapedString);
60
61
        return $unescapedString;
62
    }
63
64
    /**
65
     * @return string Regex pattern containing all escapable control characters
66
     */
67
    protected function getEscapableControlCharactersPattern()
68
    {
69
        // control characters values are from 0 to 1F (hex values) in the ASCII table
70
        // some characters should not be escaped though: "\t", "\r" and "\n".
71
        return '[\x00-\x08' .
72
                // skipping "\t" (0x9) and "\n" (0xA)
73
                '\x0B-\x0C' .
74
                // skipping "\r" (0xD)
75
                '\x0E-\x1F]';
76
    }
77
78
    /**
79
     * Builds the map containing control characters to be escaped
80
     * mapped to their escaped values.
81
     * "\t", "\r" and "\n" don't need to be escaped.
82
     *
83
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
84
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
85
     *
86
     * @return string[]
87
     */
88
    protected function getControlCharactersEscapingMap()
89
    {
90
        $controlCharactersEscapingMap = [];
91
92
        // control characters values are from 0 to 1F (hex values) in the ASCII table
93
        for ($charValue = 0x00; $charValue <= 0x1F; $charValue++) {
94
            $character = chr($charValue);
95
            if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
96
                $charHexValue = dechex($charValue);
97
                $escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
98
                $controlCharactersEscapingMap[$escapedChar] = $character;
99
            }
100
        }
101
102
        return $controlCharactersEscapingMap;
103
    }
104
105
    /**
106
     * Converts PHP control characters from the given string to OpenXML escaped control characters
107
     *
108
     * Excel escapes control characters with _xHHHH_ and also escapes any
109
     * literal strings of that type by encoding the leading underscore.
110
     * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
111
     *
112
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
113
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
114
     *
115
     * @param string $string String to escape
116
     * @return string
117
     */
118
    protected function escapeControlCharacters($string)
119
    {
120
        $escapedString = $this->escapeEscapeCharacter($string);
121
122
        // if no control characters
123
        if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
124
            return $escapedString;
125
        }
126
127
        return preg_replace_callback("/({$this->escapableControlCharactersPattern})/", function($matches) {
128
            return $this->controlCharactersEscapingReverseMap[$matches[0]];
129
        }, $escapedString);
130
    }
131
132
    /**
133
     * Escapes the escape character: "_x0000_" -> "_x005F_x0000_"
134
     *
135
     * @param string $string String to escape
136
     * @return string The escaped string
137
     */
138
    protected function escapeEscapeCharacter($string)
139
    {
140
        return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
141
    }
142
143
    /**
144
     * Converts OpenXML escaped control characters from the given string to PHP control characters
145
     *
146
     * Excel escapes control characters with _xHHHH_ and also escapes any
147
     * literal strings of that type by encoding the leading underscore.
148
     * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_"
149
     *
150
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
151
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
152
     *
153
     * @param string $string String to unescape
154
     * @return string
155
     */
156
    protected function unescapeControlCharacters($string)
157
    {
158
        $unescapedString = $string;
159
160
        foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
161
            // only unescape characters that don't contain the escaped escape character for now
162
            $unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
163
        }
164
165
        return $this->unescapeEscapeCharacter($unescapedString);
166
    }
167
168
    /**
169
     * Unecapes the escape character: "_x005F_x0000_" => "_x0000_"
170
     *
171
     * @param string $string String to unescape
172
     * @return string The unescaped string
173
     */
174
    protected function unescapeEscapeCharacter($string)
175
    {
176
        return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
177
    }
178
}
179