1 | <?php |
||
12 | class CellValueFormatter |
||
13 | { |
||
14 | /** Definition of all possible cell types */ |
||
15 | const CELL_TYPE_INLINE_STRING = 'inlineStr'; |
||
16 | const CELL_TYPE_STR = 'str'; |
||
17 | const CELL_TYPE_SHARED_STRING = 's'; |
||
18 | const CELL_TYPE_BOOLEAN = 'b'; |
||
19 | const CELL_TYPE_NUMERIC = 'n'; |
||
20 | const CELL_TYPE_DATE = 'd'; |
||
21 | const CELL_TYPE_ERROR = 'e'; |
||
22 | |||
23 | /** Definition of XML nodes names used to parse data */ |
||
24 | const XML_NODE_VALUE = 'v'; |
||
25 | const XML_NODE_INLINE_STRING_VALUE = 't'; |
||
26 | |||
27 | /** Definition of XML attributes used to parse data */ |
||
28 | const XML_ATTRIBUTE_TYPE = 't'; |
||
29 | const XML_ATTRIBUTE_STYLE_ID = 's'; |
||
30 | |||
31 | /** Constants used for date formatting */ |
||
32 | const NUM_SECONDS_IN_ONE_DAY = 86400; |
||
33 | const NUM_SECONDS_IN_ONE_HOUR = 3600; |
||
34 | const NUM_SECONDS_IN_ONE_MINUTE = 60; |
||
35 | |||
36 | /** |
||
37 | * February 29th, 1900 is NOT a leap year but Excel thinks it is... |
||
38 | * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel |
||
39 | */ |
||
40 | const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60; |
||
41 | |||
42 | /** @var SharedStringsManager Manages shared strings */ |
||
43 | protected $sharedStringsManager; |
||
44 | |||
45 | /** @var StyleManager Manages styles */ |
||
46 | protected $styleManager; |
||
47 | |||
48 | /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ |
||
49 | protected $shouldFormatDates; |
||
50 | |||
51 | /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */ |
||
52 | protected $shouldUse1904Dates; |
||
53 | |||
54 | /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */ |
||
55 | protected $escaper; |
||
56 | |||
57 | /** |
||
58 | * @param SharedStringsManager $sharedStringsManager Manages shared strings |
||
59 | * @param StyleManager $styleManager Manages styles |
||
60 | * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings |
||
61 | * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 |
||
62 | * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data |
||
63 | */ |
||
64 | 79 | public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper) |
|
65 | { |
||
66 | 79 | $this->sharedStringsManager = $sharedStringsManager; |
|
67 | 79 | $this->styleManager = $styleManager; |
|
68 | 79 | $this->shouldFormatDates = $shouldFormatDates; |
|
69 | 79 | $this->shouldUse1904Dates = $shouldUse1904Dates; |
|
70 | 79 | $this->escaper = $escaper; |
|
71 | 79 | } |
|
72 | |||
73 | /** |
||
74 | * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. |
||
75 | * |
||
76 | * @param \DOMNode $node |
||
77 | * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) |
||
78 | */ |
||
79 | 59 | public function extractAndFormatNodeValue($node) |
|
80 | { |
||
81 | // Default cell type is "n" |
||
82 | 59 | $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; |
|
83 | 59 | $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID); |
|
84 | 59 | $vNodeValue = $this->getVNodeValue($node); |
|
85 | |||
86 | 59 | if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { |
|
87 | 1 | return $vNodeValue; |
|
88 | } |
||
89 | |||
90 | switch ($cellType) { |
||
91 | 59 | case self::CELL_TYPE_INLINE_STRING: |
|
92 | 7 | return $this->formatInlineStringCellValue($node); |
|
93 | 53 | case self::CELL_TYPE_SHARED_STRING: |
|
94 | 18 | return $this->formatSharedStringCellValue($vNodeValue); |
|
95 | 39 | case self::CELL_TYPE_STR: |
|
96 | 1 | return $this->formatStrCellValue($vNodeValue); |
|
97 | 39 | case self::CELL_TYPE_BOOLEAN: |
|
98 | 1 | return $this->formatBooleanCellValue($vNodeValue); |
|
99 | 39 | case self::CELL_TYPE_NUMERIC: |
|
100 | 39 | return $this->formatNumericCellValue($vNodeValue, $cellStyleId); |
|
101 | 2 | case self::CELL_TYPE_DATE: |
|
102 | 2 | return $this->formatDateCellValue($vNodeValue); |
|
103 | default: |
||
104 | 1 | return null; |
|
105 | } |
||
106 | } |
||
107 | |||
108 | /** |
||
109 | * Returns the cell's string value from a node's nested value node |
||
110 | * |
||
111 | * @param \DOMNode $node |
||
112 | * @return string The value associated with the cell |
||
113 | */ |
||
114 | 59 | protected function getVNodeValue($node) |
|
115 | { |
||
116 | // for cell types having a "v" tag containing the value. |
||
117 | // if not, the returned value should be empty string. |
||
118 | 59 | $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); |
|
119 | |||
120 | 59 | return ($vNode !== null) ? $vNode->nodeValue : ''; |
|
121 | } |
||
122 | |||
123 | /** |
||
124 | * Returns the cell String value where string is inline. |
||
125 | * |
||
126 | * @param \DOMNode $node |
||
127 | * @return string The value associated with the cell (null when the cell has an error) |
||
128 | */ |
||
129 | 11 | protected function formatInlineStringCellValue($node) |
|
130 | { |
||
131 | // inline strings are formatted this way: |
||
132 | // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c> |
||
133 | 11 | $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0); |
|
134 | 11 | $cellValue = $this->escaper->unescape($tNode->nodeValue); |
|
135 | |||
136 | 11 | return $cellValue; |
|
137 | } |
||
138 | |||
139 | /** |
||
140 | * Returns the cell String value from shared-strings file using nodeValue index. |
||
141 | * |
||
142 | * @param string $nodeValue |
||
143 | * @return string The value associated with the cell (null when the cell has an error) |
||
144 | */ |
||
145 | 18 | protected function formatSharedStringCellValue($nodeValue) |
|
146 | { |
||
147 | // shared strings are formatted this way: |
||
148 | // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c> |
||
149 | 18 | $sharedStringIndex = (int) $nodeValue; |
|
150 | 18 | $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex); |
|
151 | 18 | $cellValue = $this->escaper->unescape($escapedCellValue); |
|
152 | |||
153 | 18 | return $cellValue; |
|
154 | } |
||
155 | |||
156 | /** |
||
157 | * Returns the cell String value, where string is stored in value node. |
||
158 | * |
||
159 | * @param string $nodeValue |
||
160 | * @return string The value associated with the cell (null when the cell has an error) |
||
161 | */ |
||
162 | 1 | protected function formatStrCellValue($nodeValue) |
|
163 | { |
||
164 | 1 | $escapedCellValue = trim($nodeValue); |
|
165 | 1 | $cellValue = $this->escaper->unescape($escapedCellValue); |
|
166 | |||
167 | 1 | return $cellValue; |
|
168 | } |
||
169 | |||
170 | /** |
||
171 | * Returns the cell Numeric value from string of nodeValue. |
||
172 | * The value can also represent a timestamp and a DateTime will be returned. |
||
173 | * |
||
174 | * @param string $nodeValue |
||
175 | * @param int $cellStyleId 0 being the default style |
||
176 | * @return int|float|\DateTime|null The value associated with the cell |
||
177 | */ |
||
178 | 52 | protected function formatNumericCellValue($nodeValue, $cellStyleId) |
|
179 | { |
||
180 | // Numeric values can represent numbers as well as timestamps. |
||
181 | // We need to look at the style of the cell to determine whether it is one or the other. |
||
182 | 52 | $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId); |
|
183 | |||
184 | 52 | if ($shouldFormatAsDate) { |
|
185 | 34 | $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId); |
|
186 | } else { |
||
187 | 19 | $nodeIntValue = (int) $nodeValue; |
|
188 | 19 | $nodeFloatValue = (float) $nodeValue; |
|
189 | 19 | $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; |
|
190 | } |
||
191 | |||
192 | 52 | return $cellValue; |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * Returns a cell's PHP Date value, associated to the given timestamp. |
||
197 | * NOTE: The timestamp is a float representing the number of days since the base Excel date: |
||
198 | * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. |
||
199 | * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. |
||
200 | * |
||
201 | * @see ECMA-376 Part 1 - §18.17.4 |
||
202 | * |
||
203 | * @param float $nodeValue |
||
204 | * @param int $cellStyleId 0 being the default style |
||
205 | * @return \DateTime|null The value associated with the cell or NULL if invalid date value |
||
206 | */ |
||
207 | 34 | protected function formatExcelTimestampValue($nodeValue, $cellStyleId) |
|
218 | |||
219 | /** |
||
220 | * Returns whether the given timestamp is supported by SpreadsheetML |
||
221 | * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries. |
||
222 | * |
||
223 | * @param float $timestampValue |
||
224 | * @return bool |
||
225 | */ |
||
226 | 34 | protected function isValidTimestampValue($timestampValue) |
|
227 | { |
||
228 | // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011) |
||
229 | return ( |
||
230 | 34 | $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 || |
|
231 | 34 | !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884 |
|
232 | ); |
||
233 | } |
||
234 | |||
235 | /** |
||
236 | * Returns a cell's PHP DateTime value, associated to the given timestamp. |
||
237 | * Only the time value matters. The date part is set to the base Excel date: |
||
238 | * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. |
||
239 | * |
||
240 | * @param float $nodeValue |
||
241 | * @param int $cellStyleId 0 being the default style |
||
242 | * @return \DateTime|string The value associated with the cell |
||
243 | */ |
||
244 | 30 | protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId) |
|
245 | { |
||
246 | 30 | $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30'; |
|
247 | |||
248 | 30 | $daysSinceBaseDate = (int) $nodeValue; |
|
249 | 30 | $timeRemainder = fmod($nodeValue, 1); |
|
250 | 30 | $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0); |
|
251 | |||
252 | 30 | $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate); |
|
253 | 30 | $dateObj->modify('+' . $daysSinceBaseDate . 'days'); |
|
254 | 30 | $dateObj->modify('+' . $secondsRemainder . 'seconds'); |
|
255 | |||
256 | 30 | if ($this->shouldFormatDates) { |
|
257 | 2 | $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId); |
|
258 | 2 | $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); |
|
259 | 2 | $cellValue = $dateObj->format($phpDateFormat); |
|
260 | } else { |
||
261 | 28 | $cellValue = $dateObj; |
|
262 | } |
||
263 | |||
264 | 30 | return $cellValue; |
|
265 | } |
||
266 | |||
267 | /** |
||
268 | * Returns the cell Boolean value from a specific node's Value. |
||
269 | * |
||
270 | * @param string $nodeValue |
||
271 | * @return bool The value associated with the cell |
||
272 | */ |
||
273 | 1 | protected function formatBooleanCellValue($nodeValue) |
|
277 | |||
278 | /** |
||
279 | * Returns a cell's PHP Date value, associated to the given stored nodeValue. |
||
280 | * @see ECMA-376 Part 1 - §18.17.4 |
||
281 | * |
||
282 | * @param string $nodeValue ISO 8601 Date string |
||
283 | * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value |
||
284 | */ |
||
285 | 2 | protected function formatDateCellValue($nodeValue) |
|
286 | { |
||
296 | } |
||
297 |