Passed
Pull Request — develop_3.0 (#483)
by Adrien
04:33 queued 01:53
created

CellValueFormatter::getVNodeValue()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
ccs 3
cts 3
cp 1
cc 2
eloc 3
nc 2
nop 1
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
6
use Box\Spout\Reader\XLSX\Manager\StyleManager;
7
8
/**
9
 * Class CellValueFormatter
10
 * This class provides helper functions to format cell values
11
 */
12
class CellValueFormatter
13
{
14
    /** Definition of all possible cell types */
15
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
16
    const CELL_TYPE_STR = 'str';
17
    const CELL_TYPE_SHARED_STRING = 's';
18
    const CELL_TYPE_BOOLEAN = 'b';
19
    const CELL_TYPE_NUMERIC = 'n';
20
    const CELL_TYPE_DATE = 'd';
21
    const CELL_TYPE_ERROR = 'e';
22
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_VALUE = 'v';
25
    const XML_NODE_INLINE_STRING_VALUE = 't';
26
27
    /** Definition of XML attributes used to parse data */
28
    const XML_ATTRIBUTE_TYPE = 't';
29
    const XML_ATTRIBUTE_STYLE_ID = 's';
30
31
    /** Constants used for date formatting */
32
    const NUM_SECONDS_IN_ONE_DAY = 86400;
33
    const NUM_SECONDS_IN_ONE_HOUR = 3600;
34
    const NUM_SECONDS_IN_ONE_MINUTE = 60;
35
36
    /**
37
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
38
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
39
     */
40
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
41
42
    /** @var SharedStringsManager Manages shared strings */
43
    protected $sharedStringsManager;
44
45
    /** @var StyleManager Manages styles */
46
    protected $styleManager;
47
48
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
49
    protected $shouldFormatDates;
50
51
    /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
52
    protected $shouldUse1904Dates;
53
54
    /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
55
    protected $escaper;
56
57
    /**
58
     * @param SharedStringsManager $sharedStringsManager Manages shared strings
59
     * @param StyleManager $styleManager Manages styles
60
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
61
     * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
62
     * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
63
     */
64 79
    public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
65
    {
66 79
        $this->sharedStringsManager = $sharedStringsManager;
67 79
        $this->styleManager = $styleManager;
68 79
        $this->shouldFormatDates = $shouldFormatDates;
69 79
        $this->shouldUse1904Dates = $shouldUse1904Dates;
70 79
        $this->escaper = $escaper;
71 79
    }
72
73
    /**
74
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
75
     *
76
     * @param \DOMNode $node
77
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
78
     */
79 59
    public function extractAndFormatNodeValue($node)
80
    {
81
        // Default cell type is "n"
82 59
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
83 59
        $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
84 59
        $vNodeValue = $this->getVNodeValue($node);
85
86 59
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
87 1
            return $vNodeValue;
88
        }
89
90
        switch ($cellType) {
91 59
            case self::CELL_TYPE_INLINE_STRING:
92 7
                return $this->formatInlineStringCellValue($node);
93 53
            case self::CELL_TYPE_SHARED_STRING:
94 18
                return $this->formatSharedStringCellValue($vNodeValue);
95 39
            case self::CELL_TYPE_STR:
96 1
                return $this->formatStrCellValue($vNodeValue);
97 39
            case self::CELL_TYPE_BOOLEAN:
98 1
                return $this->formatBooleanCellValue($vNodeValue);
99 39
            case self::CELL_TYPE_NUMERIC:
100 39
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
101 2
            case self::CELL_TYPE_DATE:
102 2
                return $this->formatDateCellValue($vNodeValue);
103
            default:
104 1
                return null;
105
        }
106
    }
107
108
    /**
109
     * Returns the cell's string value from a node's nested value node
110
     *
111
     * @param \DOMNode $node
112
     * @return string The value associated with the cell
113
     */
114 59
    protected function getVNodeValue($node)
115
    {
116
        // for cell types having a "v" tag containing the value.
117
        // if not, the returned value should be empty string.
118 59
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
119
120 59
        return ($vNode !== null) ? $vNode->nodeValue : '';
121
    }
122
123
    /**
124
     * Returns the cell String value where string is inline.
125
     *
126
     * @param \DOMNode $node
127
     * @return string The value associated with the cell (null when the cell has an error)
128
     */
129 11
    protected function formatInlineStringCellValue($node)
130
    {
131
        // inline strings are formatted this way:
132
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
133 11
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
134 11
        $cellValue = $this->escaper->unescape($tNode->nodeValue);
135
136 11
        return $cellValue;
137
    }
138
139
    /**
140
     * Returns the cell String value from shared-strings file using nodeValue index.
141
     *
142
     * @param string $nodeValue
143
     * @return string The value associated with the cell (null when the cell has an error)
144
     */
145 18
    protected function formatSharedStringCellValue($nodeValue)
146
    {
147
        // shared strings are formatted this way:
148
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
149 18
        $sharedStringIndex = (int) $nodeValue;
150 18
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
151 18
        $cellValue = $this->escaper->unescape($escapedCellValue);
152
153 18
        return $cellValue;
154
    }
155
156
    /**
157
     * Returns the cell String value, where string is stored in value node.
158
     *
159
     * @param string $nodeValue
160
     * @return string The value associated with the cell (null when the cell has an error)
161
     */
162 1
    protected function formatStrCellValue($nodeValue)
163
    {
164 1
        $escapedCellValue = trim($nodeValue);
165 1
        $cellValue = $this->escaper->unescape($escapedCellValue);
166
167 1
        return $cellValue;
168
    }
169
170
    /**
171
     * Returns the cell Numeric value from string of nodeValue.
172
     * The value can also represent a timestamp and a DateTime will be returned.
173
     *
174
     * @param string $nodeValue
175
     * @param int $cellStyleId 0 being the default style
176
     * @return int|float|\DateTime|null The value associated with the cell
177
     */
178 52
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
179
    {
180
        // Numeric values can represent numbers as well as timestamps.
181
        // We need to look at the style of the cell to determine whether it is one or the other.
182 52
        $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
183
184 52
        if ($shouldFormatAsDate) {
185 34
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
186
        } else {
187 19
            $nodeIntValue = (int) $nodeValue;
188 19
            $nodeFloatValue = (float) $nodeValue;
189 19
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
190
        }
191
192 52
        return $cellValue;
193
    }
194
195
    /**
196
     * Returns a cell's PHP Date value, associated to the given timestamp.
197
     * NOTE: The timestamp is a float representing the number of days since the base Excel date:
198
     *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
199
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
200
     *
201
     * @see ECMA-376 Part 1 - §18.17.4
202
     *
203
     * @param float $nodeValue
204
     * @param int $cellStyleId 0 being the default style
205
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
206
     */
207 34
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
208
    {
209 34
        if ($this->isValidTimestampValue($nodeValue)) {
210 30
            $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
211
        } else {
212
            // invalid date
213 4
            $cellValue = null;
214
        }
215
216 34
        return $cellValue;
217
    }
218
219
    /**
220
     * Returns whether the given timestamp is supported by SpreadsheetML
221
     * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
222
     *
223
     * @param $timestampValue
224
     * @return bool
225
     */
226 34
    protected function isValidTimestampValue($timestampValue)
227
    {
228
        // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
229
        return (
230 34
            $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 ||
231 34
            !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
232
        );
233
    }
234
235
    /**
236
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
237
     * Only the time value matters. The date part is set to the base Excel date:
238
     * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
239
     *
240
     * @param float $nodeValue
241
     * @param int $cellStyleId 0 being the default style
242
     * @return \DateTime|string The value associated with the cell
243
     */
244 30
    protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
245
    {
246 30
        $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
247
248 30
        $daysSinceBaseDate = (int) $nodeValue;
249 30
        $timeRemainder = fmod($nodeValue, 1);
250 30
        $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
251
252 30
        $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
253 30
        $dateObj->modify('+' . $daysSinceBaseDate . 'days');
254 30
        $dateObj->modify('+' . $secondsRemainder . 'seconds');
255
256 30
        if ($this->shouldFormatDates) {
257 2
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
258 2
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
259 2
            $cellValue = $dateObj->format($phpDateFormat);
260
        } else {
261 28
            $cellValue = $dateObj;
262
        }
263
264 30
        return $cellValue;
265
    }
266
267
    /**
268
     * Returns the cell Boolean value from a specific node's Value.
269
     *
270
     * @param string $nodeValue
271
     * @return bool The value associated with the cell
272
     */
273 1
    protected function formatBooleanCellValue($nodeValue)
274
    {
275 1
        return (bool) $nodeValue;
276
    }
277
278
    /**
279
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
280
     * @see ECMA-376 Part 1 - §18.17.4
281
     *
282
     * @param string $nodeValue ISO 8601 Date string
283
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
284
     */
285 2
    protected function formatDateCellValue($nodeValue)
286
    {
287
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
288
        try {
289 2
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
290 1
        } catch (\Exception $e) {
291 1
            $cellValue = null;
292
        }
293
294 2
        return $cellValue;
295
    }
296
}
297