Completed
Branch master (dc31d6)
by Adrien
13:39
created

XLSX::escapeEscapeCharacter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 0
cts 0
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace Box\Spout\Common\Escaper;
4
5
use Box\Spout\Common\Singleton;
6
7
/**
8
 * Class XLSX
9
 * Provides functions to escape and unescape data for XLSX files
10
 *
11
 * @package Box\Spout\Common\Escaper
12
 */
13
class XLSX implements EscaperInterface
14
{
15
    use Singleton;
16
17
    /** @var string[] Control characters to be escaped */
18
    protected $controlCharactersEscapingMap;
19 330
20
    /**
21 330
     * Initializes the singleton instance
22 330
     */
23
    protected function init()
24
    {
25
        $this->controlCharactersEscapingMap = $this->getControlCharactersEscapingMap();
26
    }
27
28
    /**
1 ignored issue
show
introduced by
Instead of declaring the constructor as final, maybe you should declare the entire class as final.
Loading history...
29
     * Escapes the given string to make it compatible with XLSX
30 87
     *
31
     * @param string $string The string to escape
32 87
     * @return string The escaped string
33 87
     */
34
    public function escape($string)
35 87
    {
36
        $escapedString = $this->escapeControlCharacters($string);
37
        $escapedString = htmlspecialchars($escapedString, ENT_QUOTES);
38
39
        return $escapedString;
40
    }
41
42
    /**
43
     * Unescapes the given string to make it compatible with XLSX
44 129
     *
45
     * @param string $string The string to unescape
46 129
     * @return string The unescaped string
47 129
     */
48
    public function unescape($string)
49 129
    {
50
        $unescapedString = htmlspecialchars_decode($string, ENT_QUOTES);
51
        $unescapedString = $this->unescapeControlCharacters($unescapedString);
52
53
        return $unescapedString;
54
    }
55
56
    /**
57
     * Builds the map containing control characters to be escaped
58
     * mapped to their escaped values.
59
     * "\t", "\r" and "\n" don't need to be escaped.
60
     *
61
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
62 330
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
63
     *
64 330
     * @return string[]
65 330
     */
66
    protected function getControlCharactersEscapingMap()
67
    {
68 330
        $controlCharactersEscapingMap = [];
69 330
        $whitelistedControlCharacters = ["\t", "\r", "\n"];
70 330
71 330
        // control characters values are from 0 to 1F (hex values) in the ASCII table
72 330
        for ($charValue = 0x0; $charValue <= 0x1F; $charValue++) {
73 330
            if (!in_array(chr($charValue), $whitelistedControlCharacters)) {
74 330
                $charHexValue = dechex($charValue);
75
                $escapedChar = '_x' . sprintf('%04s' , strtoupper($charHexValue)) . '_';
76 330
                $controlCharactersEscapingMap[$escapedChar] = chr($charValue);
77
            }
78
        }
79
80
        return $controlCharactersEscapingMap;
81
    }
82
83
    /**
84
     * Converts PHP control characters from the given string to OpenXML escaped control characters
85
     *
86
     * Excel escapes control characters with _xHHHH_ and also escapes any
87
     * literal strings of that type by encoding the leading underscore.
88
     * So "\0" -> _x0000_ and "_x0000_" -> _x005F_x0000_.
89
     *
90
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
91
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
92 87
     *
93
     * @param string $string String to escape
94 87
     * @return string
95 87
     */
96
    protected function escapeControlCharacters($string)
97
    {
98
        $escapedString = $this->escapeEscapeCharacter($string);
99
        return str_replace(array_values($this->controlCharactersEscapingMap), array_keys($this->controlCharactersEscapingMap), $escapedString);
100
    }
101
102
    /**
103
     * Escapes the escape character: "_x0000_" -> "_x005F_x0000_"
104 87
     *
105
     * @param string $string String to escape
106 87
     * @return string The escaped string
107
     */
108
    protected function escapeEscapeCharacter($string)
109
    {
110
        return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
111
    }
112
113
    /**
114
     * Converts OpenXML escaped control characters from the given string to PHP control characters
115
     *
116
     * Excel escapes control characters with _xHHHH_ and also escapes any
117
     * literal strings of that type by encoding the leading underscore.
118
     * So "_x0000_" -> "\0" and "_x005F_x0000_" -> "_x0000_"
119
     *
120
     * NOTE: the logic has been adapted from the XlsxWriter library (BSD License)
121
     * @link https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
122 129
     *
123
     * @param string $string String to unescape
124 129
     * @return string
125 129
     */
126
    protected function unescapeControlCharacters($string)
127 129
    {
128 129
        $unescapedString = $string;
129
        foreach ($this->controlCharactersEscapingMap as $escapedCharValue => $charValue) {
130 129
            // only unescape characters that don't contain the escaped escape character for now
131
            $unescapedString = preg_replace("/(?<!_x005F)($escapedCharValue)/", $charValue, $unescapedString);
132
        }
133
134
        return $this->unescapeEscapeCharacter($unescapedString);
135
    }
136
137
    /**
138
     * Unecapes the escape character: "_x005F_x0000_" => "_x0000_"
139 129
     *
140
     * @param string $string String to unescape
141 129
     * @return string The unescaped string
142
     */
143
    protected function unescapeEscapeCharacter($string)
144
    {
145
        return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
146
    }
147
}
148