Passed
Pull Request — develop_3.0 (#466)
by Adrien
04:47
created

formatExcelTimestampValueAsDateValue()   B

Complexity

Conditions 3
Paths 8

Size

Total Lines 25
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 3.0261

Importance

Changes 0
Metric Value
dl 0
loc 25
rs 8.8571
c 0
b 0
f 0
ccs 12
cts 14
cp 0.8571
cc 3
eloc 16
nc 8
nop 2
crap 3.0261
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
6
use Box\Spout\Reader\XLSX\Manager\StyleManager;
7
8
/**
9
 * Class CellValueFormatter
10
 * This class provides helper functions to format cell values
11
 */
12
class CellValueFormatter
13
{
14
    /** Definition of all possible cell types */
15
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
16
    const CELL_TYPE_STR = 'str';
17
    const CELL_TYPE_SHARED_STRING = 's';
18
    const CELL_TYPE_BOOLEAN = 'b';
19
    const CELL_TYPE_NUMERIC = 'n';
20
    const CELL_TYPE_DATE = 'd';
21
    const CELL_TYPE_ERROR = 'e';
22
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_VALUE = 'v';
25
    const XML_NODE_INLINE_STRING_VALUE = 't';
26
27
    /** Definition of XML attributes used to parse data */
28
    const XML_ATTRIBUTE_TYPE = 't';
29
    const XML_ATTRIBUTE_STYLE_ID = 's';
30
31
    /** Constants used for date formatting */
32
    const NUM_SECONDS_IN_ONE_DAY = 86400;
33
    const NUM_SECONDS_IN_ONE_HOUR = 3600;
34
    const NUM_SECONDS_IN_ONE_MINUTE = 60;
35
36
    /**
37
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
38
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
39
     */
40
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
41
42
    /** @var SharedStringsManager Manages shared strings */
43
    protected $sharedStringsManager;
44
45
    /** @var StyleManager Manages styles */
46
    protected $styleManager;
47
48
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
49
    protected $shouldFormatDates;
50
51
    /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
52
    protected $escaper;
53
54
    /**
55
     * @param SharedStringsManager $sharedStringsManager Manages shared strings
56
     * @param StyleManager $styleManager Manages styles
57
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
58
     * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
59
     */
60 61
    public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $escaper)
61
    {
62 61
        $this->sharedStringsManager = $sharedStringsManager;
63 61
        $this->styleManager = $styleManager;
64 61
        $this->shouldFormatDates = $shouldFormatDates;
65 61
        $this->escaper = $escaper;
66 61
    }
67
68
    /**
69
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
70
     *
71
     * @param \DOMNode $node
72
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
73
     */
74 41
    public function extractAndFormatNodeValue($node)
75
    {
76
        // Default cell type is "n"
77 41
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
78 41
        $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
79 41
        $vNodeValue = $this->getVNodeValue($node);
80
81 41
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
82 1
            return $vNodeValue;
83
        }
84
85
        switch ($cellType) {
86 41
            case self::CELL_TYPE_INLINE_STRING:
87 7
                return $this->formatInlineStringCellValue($node);
88 35
            case self::CELL_TYPE_SHARED_STRING:
89 18
                return $this->formatSharedStringCellValue($vNodeValue);
90 21
            case self::CELL_TYPE_STR:
91 1
                return $this->formatStrCellValue($vNodeValue);
92 21
            case self::CELL_TYPE_BOOLEAN:
93 1
                return $this->formatBooleanCellValue($vNodeValue);
94 21
            case self::CELL_TYPE_NUMERIC:
95 21
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
96 2
            case self::CELL_TYPE_DATE:
97 2
                return $this->formatDateCellValue($vNodeValue);
98
            default:
99 1
                return null;
100
        }
101
    }
102
103
    /**
104
     * Returns the cell's string value from a node's nested value node
105
     *
106
     * @param \DOMNode $node
107
     * @return string The value associated with the cell
108
     */
109 41
    protected function getVNodeValue($node)
110
    {
111
        // for cell types having a "v" tag containing the value.
112
        // if not, the returned value should be empty string.
113 41
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
114
115 41
        return ($vNode !== null) ? $vNode->nodeValue : '';
116
    }
117
118
    /**
119
     * Returns the cell String value where string is inline.
120
     *
121
     * @param \DOMNode $node
122
     * @return string The value associated with the cell (null when the cell has an error)
123
     */
124 11
    protected function formatInlineStringCellValue($node)
125
    {
126
        // inline strings are formatted this way:
127
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
128 11
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
129 11
        $cellValue = $this->escaper->unescape($tNode->nodeValue);
130
131 11
        return $cellValue;
132
    }
133
134
    /**
135
     * Returns the cell String value from shared-strings file using nodeValue index.
136
     *
137
     * @param string $nodeValue
138
     * @return string The value associated with the cell (null when the cell has an error)
139
     */
140 18
    protected function formatSharedStringCellValue($nodeValue)
141
    {
142
        // shared strings are formatted this way:
143
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
144 18
        $sharedStringIndex = (int) $nodeValue;
145 18
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
146 18
        $cellValue = $this->escaper->unescape($escapedCellValue);
147
148 18
        return $cellValue;
149
    }
150
151
    /**
152
     * Returns the cell String value, where string is stored in value node.
153
     *
154
     * @param string $nodeValue
155
     * @return string The value associated with the cell (null when the cell has an error)
156
     */
157 1
    protected function formatStrCellValue($nodeValue)
158
    {
159 1
        $escapedCellValue = trim($nodeValue);
160 1
        $cellValue = $this->escaper->unescape($escapedCellValue);
161
162 1
        return $cellValue;
163
    }
164
165
    /**
166
     * Returns the cell Numeric value from string of nodeValue.
167
     * The value can also represent a timestamp and a DateTime will be returned.
168
     *
169
     * @param string $nodeValue
170
     * @param int $cellStyleId 0 being the default style
171
     * @return int|float|\DateTime|null The value associated with the cell
172
     */
173 34
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
174
    {
175
        // Numeric values can represent numbers as well as timestamps.
176
        // We need to look at the style of the cell to determine whether it is one or the other.
177 34
        $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
178
179 34
        if ($shouldFormatAsDate) {
180 16
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
181
        } else {
182 19
            $nodeIntValue = (int) $nodeValue;
183 19
            $nodeFloatValue = (float) $nodeValue;
184 19
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
185
        }
186
187 34
        return $cellValue;
188
    }
189
190
    /**
191
     * Returns a cell's PHP Date value, associated to the given timestamp.
192
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
193
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
194
     *
195
     * @param float $nodeValue
196
     * @param int $cellStyleId 0 being the default style
197
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
198
     */
199 16
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
200
    {
201
        // Fix for the erroneous leap year in Excel
202 16
        if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
203 7
            --$nodeValue;
204
        }
205
206 16
        if ($nodeValue >= 1) {
207
            // Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
208 9
            $cellValue = $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
209 9
        } elseif ($nodeValue >= 0) {
210
            // Values between 0 and 1 represent "times".
211 8
            $cellValue = $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
212
        } else {
213
            // invalid date
214 1
            $cellValue = null;
215
        }
216
217 16
        return $cellValue;
218
    }
219
220
    /**
221
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
222
     * Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
223
     *
224
     * @param float $nodeValue
225
     * @param int $cellStyleId 0 being the default style
226
     * @return \DateTime|string The value associated with the cell
227
     */
228 8
    protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
229
    {
230 8
        $time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
231 8
        $hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
232 8
        $minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
233 8
        $seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
234
235
        // using the base Excel date (Jan 1st, 1900) - not relevant here
236 8
        $dateObj = new \DateTime('1900-01-01');
237 8
        $dateObj->setTime($hours, $minutes, $seconds);
238
239 8
        if ($this->shouldFormatDates) {
240 2
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
241 2
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
242 2
            $cellValue = $dateObj->format($phpDateFormat);
243
        } else {
244 6
            $cellValue = $dateObj;
245
        }
246
247 8
        return $cellValue;
248
    }
249
250
    /**
251
     * Returns a cell's PHP Date value, associated to the given timestamp.
252
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
253
     *
254
     * @param float $nodeValue
255
     * @param int $cellStyleId 0 being the default style
256
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
257
     */
258 9
    protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
259
    {
260
        // Do not use any unix timestamps for calculation to prevent
261
        // issues with numbers exceeding 2^31.
262 9
        $secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
263 9
        $secondsRemainder = round($secondsRemainder, 0);
264
265
        try {
266 9
            $dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
267 9
            $dateObj->modify('+' . (int) $nodeValue . 'days');
268 9
            $dateObj->modify('+' . $secondsRemainder . 'seconds');
269
270 9
            if ($this->shouldFormatDates) {
271 2
                $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
272 2
                $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
273 2
                $cellValue = $dateObj->format($phpDateFormat);
274
            } else {
275 9
                $cellValue = $dateObj;
276
            }
277
        } catch (\Exception $e) {
278
            $cellValue = null;
279
        }
280
281 9
        return $cellValue;
282
    }
283
284
    /**
285
     * Returns the cell Boolean value from a specific node's Value.
286
     *
287
     * @param string $nodeValue
288
     * @return bool The value associated with the cell
289
     */
290 1
    protected function formatBooleanCellValue($nodeValue)
291
    {
292 1
        return (bool) $nodeValue;
293
    }
294
295
    /**
296
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
297
     * @see ECMA-376 Part 1 - §18.17.4
298
     *
299
     * @param string $nodeValue ISO 8601 Date string
300
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
301
     */
302 2
    protected function formatDateCellValue($nodeValue)
303
    {
304
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
305
        try {
306 2
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
307 1
        } catch (\Exception $e) {
308 1
            $cellValue = null;
309
        }
310
311 2
        return $cellValue;
312
    }
313
}
314