CellValueFormatter::formatDateCellValue()   A
last analyzed

Complexity

Conditions 3
Paths 4

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 11
ccs 5
cts 5
cp 1
rs 9.9
c 0
b 0
f 0
cc 3
nc 4
nop 1
crap 3
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
use Box\Spout\Reader\Exception\InvalidValueException;
6
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
7
use Box\Spout\Reader\XLSX\Manager\StyleManager;
8
9
/**
10
 * Class CellValueFormatter
11
 * This class provides helper functions to format cell values
12
 */
13
class CellValueFormatter
14
{
15
    /** Definition of all possible cell types */
16
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
17
    const CELL_TYPE_STR = 'str';
18
    const CELL_TYPE_SHARED_STRING = 's';
19
    const CELL_TYPE_BOOLEAN = 'b';
20
    const CELL_TYPE_NUMERIC = 'n';
21
    const CELL_TYPE_DATE = 'd';
22
    const CELL_TYPE_ERROR = 'e';
23
24
    /** Definition of XML nodes names used to parse data */
25
    const XML_NODE_VALUE = 'v';
26
    const XML_NODE_INLINE_STRING_VALUE = 't';
27
28
    /** Definition of XML attributes used to parse data */
29
    const XML_ATTRIBUTE_TYPE = 't';
30
    const XML_ATTRIBUTE_STYLE_ID = 's';
31
32
    /** Constants used for date formatting */
33
    const NUM_SECONDS_IN_ONE_DAY = 86400;
34
35
    /** @var SharedStringsManager Manages shared strings */
36
    protected $sharedStringsManager;
37
38
    /** @var StyleManager Manages styles */
39
    protected $styleManager;
40
41
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
42
    protected $shouldFormatDates;
43
44
    /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
45
    protected $shouldUse1904Dates;
46
47
    /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
48
    protected $escaper;
49
50
    /**
51
     * @param SharedStringsManager $sharedStringsManager Manages shared strings
52
     * @param StyleManager $styleManager Manages styles
53
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
54
     * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
55
     * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
56
     */
57 86
    public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
58
    {
59 86
        $this->sharedStringsManager = $sharedStringsManager;
60 86
        $this->styleManager = $styleManager;
61 86
        $this->shouldFormatDates = $shouldFormatDates;
62 86
        $this->shouldUse1904Dates = $shouldUse1904Dates;
63 86
        $this->escaper = $escaper;
64 86
    }
65
66
    /**
67
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
68
     *
69
     * @param \DOMNode $node
70
     * @throws InvalidValueException If the value is not valid
71
     * @return string|int|float|bool|\DateTime The value associated with the cell
72
     */
73 65
    public function extractAndFormatNodeValue($node)
74
    {
75
        // Default cell type is "n"
76 65
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
77 65
        $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
78 65
        $vNodeValue = $this->getVNodeValue($node);
79
80 65
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
81 2
            return $vNodeValue;
82
        }
83
84
        switch ($cellType) {
85 65
            case self::CELL_TYPE_INLINE_STRING:
86 9
                return $this->formatInlineStringCellValue($node);
87 57
            case self::CELL_TYPE_SHARED_STRING:
88 22
                return $this->formatSharedStringCellValue($vNodeValue);
89 41
            case self::CELL_TYPE_STR:
90 1
                return $this->formatStrCellValue($vNodeValue);
91 41
            case self::CELL_TYPE_BOOLEAN:
92 1
                return $this->formatBooleanCellValue($vNodeValue);
93 41
            case self::CELL_TYPE_NUMERIC:
94 41
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
95 3
            case self::CELL_TYPE_DATE:
96 3
                return $this->formatDateCellValue($vNodeValue);
97
            default:
98 1
                throw new InvalidValueException($vNodeValue);
99
        }
100
    }
101
102
    /**
103
     * Returns the cell's string value from a node's nested value node
104
     *
105
     * @param \DOMNode $node
106
     * @return string The value associated with the cell
107
     */
108 65
    protected function getVNodeValue($node)
109
    {
110
        // for cell types having a "v" tag containing the value.
111
        // if not, the returned value should be empty string.
112 65
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
113
114 65
        return ($vNode !== null) ? $vNode->nodeValue : '';
115
    }
116
117
    /**
118
     * Returns the cell String value where string is inline.
119
     *
120
     * @param \DOMNode $node
121
     * @return string The value associated with the cell
122
     */
123 13
    protected function formatInlineStringCellValue($node)
124
    {
125
        // inline strings are formatted this way (they can contain any number of <t> nodes):
126
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
127 13
        $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE);
128
129 13
        $cellValue = '';
130 13
        for ($i = 0; $i < $tNodes->count(); $i++) {
131 13
            $tNode = $tNodes->item($i);
132 13
            $cellValue .= $this->escaper->unescape($tNode->nodeValue);
133
        }
134
135 13
        return $cellValue;
136
    }
137
138
    /**
139
     * Returns the cell String value from shared-strings file using nodeValue index.
140
     *
141
     * @param string $nodeValue
142
     * @return string The value associated with the cell
143
     */
144 22
    protected function formatSharedStringCellValue($nodeValue)
145
    {
146
        // shared strings are formatted this way:
147
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
148 22
        $sharedStringIndex = (int) $nodeValue;
149 22
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
150 22
        $cellValue = $this->escaper->unescape($escapedCellValue);
151
152 22
        return $cellValue;
153
    }
154
155
    /**
156
     * Returns the cell String value, where string is stored in value node.
157
     *
158
     * @param string $nodeValue
159
     * @return string The value associated with the cell
160
     */
161 1
    protected function formatStrCellValue($nodeValue)
162
    {
163 1
        $escapedCellValue = \trim($nodeValue);
164 1
        $cellValue = $this->escaper->unescape($escapedCellValue);
165
166 1
        return $cellValue;
167
    }
168
169
    /**
170
     * Returns the cell Numeric value from string of nodeValue.
171
     * The value can also represent a timestamp and a DateTime will be returned.
172
     *
173
     * @param string $nodeValue
174
     * @param int $cellStyleId 0 being the default style
175
     * @return int|float|\DateTime The value associated with the cell
176
     */
177 54
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
178
    {
179
        // Numeric values can represent numbers as well as timestamps.
180
        // We need to look at the style of the cell to determine whether it is one or the other.
181 54
        $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
182
183 54
        if ($shouldFormatAsDate) {
184 34
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
185
        } else {
186 21
            $nodeIntValue = (int) $nodeValue;
187 21
            $nodeFloatValue = (float) $nodeValue;
188 21
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
189
        }
190
191 50
        return $cellValue;
192
    }
193
194
    /**
195
     * Returns a cell's PHP Date value, associated to the given timestamp.
196
     * NOTE: The timestamp is a float representing the number of days since the base Excel date:
197
     *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
198
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
199
     *
200
     * @see ECMA-376 Part 1 - §18.17.4
201
     *
202
     * @param float $nodeValue
203
     * @param int $cellStyleId 0 being the default style
204
     * @throws InvalidValueException If the value is not a valid timestamp
205
     * @return \DateTime The value associated with the cell
206
     */
207 34
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
208
    {
209 34
        if ($this->isValidTimestampValue($nodeValue)) {
210 30
            $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
211
        } else {
212 4
            throw new InvalidValueException($nodeValue);
213
        }
214
215 30
        return $cellValue;
216
    }
217
218
    /**
219
     * Returns whether the given timestamp is supported by SpreadsheetML
220
     * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
221
     *
222
     * @param float $timestampValue
223
     * @return bool
224
     */
225 34
    protected function isValidTimestampValue($timestampValue)
226
    {
227
        // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
228
        return (
229 34
            $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 ||
230 34
            !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
231
        );
232
    }
233
234
    /**
235
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
236
     * Only the time value matters. The date part is set to the base Excel date:
237
     * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
238
     *
239
     * @param float $nodeValue
240
     * @param int $cellStyleId 0 being the default style
241
     * @return \DateTime|string The value associated with the cell
242
     */
243 30
    protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
244
    {
245 30
        $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
246
247 30
        $daysSinceBaseDate = (int) $nodeValue;
248 30
        $timeRemainder = \fmod($nodeValue, 1);
249 30
        $secondsRemainder = \round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
250
251 30
        $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
252 30
        $dateObj->modify('+' . $daysSinceBaseDate . 'days');
253 30
        $dateObj->modify('+' . $secondsRemainder . 'seconds');
254
255 30
        if ($this->shouldFormatDates) {
256 2
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
257 2
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
258 2
            $cellValue = $dateObj->format($phpDateFormat);
259
        } else {
260 28
            $cellValue = $dateObj;
261
        }
262
263 30
        return $cellValue;
264
    }
265
266
    /**
267
     * Returns the cell Boolean value from a specific node's Value.
268
     *
269
     * @param string $nodeValue
270
     * @return bool The value associated with the cell
271
     */
272 1
    protected function formatBooleanCellValue($nodeValue)
273
    {
274 1
        return (bool) $nodeValue;
275
    }
276
277
    /**
278
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
279
     * @see ECMA-376 Part 1 - §18.17.4
280
     *
281
     * @param string $nodeValue ISO 8601 Date string
282
     * @throws InvalidValueException If the value is not a valid date
283
     * @return \DateTime|string The value associated with the cell
284
     */
285 3
    protected function formatDateCellValue($nodeValue)
286
    {
287
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
288
        try {
289 3
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
290 1
        } catch (\Exception $e) {
291 1
            throw new InvalidValueException($nodeValue);
292
        }
293
294 3
        return $cellValue;
295
    }
296
}
297