Completed
Pull Request — master (#557)
by Adrien
03:10
created

CellValueFormatter::formatStrCellValue()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
rs 9.4285
c 0
b 0
f 0
ccs 4
cts 4
cp 1
cc 1
eloc 4
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
use Box\Spout\Reader\Exception\InvalidValueException;
6
use Box\Spout\Reader\XLSX\Manager\SharedStringsManager;
7
use Box\Spout\Reader\XLSX\Manager\StyleManager;
8
9
/**
10
 * Class CellValueFormatter
11
 * This class provides helper functions to format cell values
12
 */
13
class CellValueFormatter
14
{
15
    /** Definition of all possible cell types */
16
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
17
    const CELL_TYPE_STR = 'str';
18
    const CELL_TYPE_SHARED_STRING = 's';
19
    const CELL_TYPE_BOOLEAN = 'b';
20
    const CELL_TYPE_NUMERIC = 'n';
21
    const CELL_TYPE_DATE = 'd';
22
    const CELL_TYPE_ERROR = 'e';
23
24
    /** Definition of XML nodes names used to parse data */
25
    const XML_NODE_VALUE = 'v';
26
    const XML_NODE_INLINE_STRING_VALUE = 't';
27
28
    /** Definition of XML attributes used to parse data */
29
    const XML_ATTRIBUTE_TYPE = 't';
30
    const XML_ATTRIBUTE_STYLE_ID = 's';
31
32
    /** Constants used for date formatting */
33
    const NUM_SECONDS_IN_ONE_DAY = 86400;
34
    const NUM_SECONDS_IN_ONE_HOUR = 3600;
35
    const NUM_SECONDS_IN_ONE_MINUTE = 60;
36
37
    /**
38
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
39
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
40
     */
41
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
42
43
    /** @var SharedStringsManager Manages shared strings */
44
    protected $sharedStringsManager;
45
46
    /** @var StyleManager Manages styles */
47
    protected $styleManager;
48
49
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
50
    protected $shouldFormatDates;
51
52
    /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
53
    protected $shouldUse1904Dates;
54
55
    /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */
56
    protected $escaper;
57
58
    /**
59
     * @param SharedStringsManager $sharedStringsManager Manages shared strings
60
     * @param StyleManager $styleManager Manages styles
61
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
62
     * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
63
     * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
64
     */
65 83
    public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
66
    {
67 83
        $this->sharedStringsManager = $sharedStringsManager;
68 83
        $this->styleManager = $styleManager;
69 83
        $this->shouldFormatDates = $shouldFormatDates;
70 83
        $this->shouldUse1904Dates = $shouldUse1904Dates;
71 83
        $this->escaper = $escaper;
72 83
    }
73
74
    /**
75
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
76
     *
77
     * @param \DOMNode $node
78
     * @throws InvalidValueException If the value is not valid
79
     * @return string|int|float|bool|\DateTime The value associated with the cell
80
     */
81 62
    public function extractAndFormatNodeValue($node)
82
    {
83
        // Default cell type is "n"
84 62
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
85 62
        $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
86 62
        $vNodeValue = $this->getVNodeValue($node);
87
88 62
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
89 1
            return $vNodeValue;
90
        }
91
92
        switch ($cellType) {
93 62
            case self::CELL_TYPE_INLINE_STRING:
94 7
                return $this->formatInlineStringCellValue($node);
95 56
            case self::CELL_TYPE_SHARED_STRING:
96 21
                return $this->formatSharedStringCellValue($vNodeValue);
97 39
            case self::CELL_TYPE_STR:
98 1
                return $this->formatStrCellValue($vNodeValue);
99 39
            case self::CELL_TYPE_BOOLEAN:
100 1
                return $this->formatBooleanCellValue($vNodeValue);
101 39
            case self::CELL_TYPE_NUMERIC:
102 39
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
103 2
            case self::CELL_TYPE_DATE:
104 2
                return $this->formatDateCellValue($vNodeValue);
105
            default:
106 1
                throw new InvalidValueException($vNodeValue);
107
        }
108
    }
109
110
    /**
111
     * Returns the cell's string value from a node's nested value node
112
     *
113
     * @param \DOMNode $node
114
     * @return string The value associated with the cell
115
     */
116 62
    protected function getVNodeValue($node)
117
    {
118
        // for cell types having a "v" tag containing the value.
119
        // if not, the returned value should be empty string.
120 62
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
121
122 62
        return ($vNode !== null) ? $vNode->nodeValue : '';
123
    }
124
125
    /**
126
     * Returns the cell String value where string is inline.
127
     *
128
     * @param \DOMNode $node
129
     * @return string The value associated with the cell
130
     */
131 11
    protected function formatInlineStringCellValue($node)
132
    {
133
        // inline strings are formatted this way:
134
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
135 11
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
136 11
        $cellValue = $this->escaper->unescape($tNode->nodeValue);
137
138 11
        return $cellValue;
139
    }
140
141
    /**
142
     * Returns the cell String value from shared-strings file using nodeValue index.
143
     *
144
     * @param string $nodeValue
145
     * @return string The value associated with the cell
146
     */
147 21
    protected function formatSharedStringCellValue($nodeValue)
148
    {
149
        // shared strings are formatted this way:
150
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
151 21
        $sharedStringIndex = (int) $nodeValue;
152 21
        $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex);
153 21
        $cellValue = $this->escaper->unescape($escapedCellValue);
154
155 21
        return $cellValue;
156
    }
157
158
    /**
159
     * Returns the cell String value, where string is stored in value node.
160
     *
161
     * @param string $nodeValue
162
     * @return string The value associated with the cell
163
     */
164 1
    protected function formatStrCellValue($nodeValue)
165
    {
166 1
        $escapedCellValue = trim($nodeValue);
167 1
        $cellValue = $this->escaper->unescape($escapedCellValue);
168
169 1
        return $cellValue;
170
    }
171
172
    /**
173
     * Returns the cell Numeric value from string of nodeValue.
174
     * The value can also represent a timestamp and a DateTime will be returned.
175
     *
176
     * @param string $nodeValue
177
     * @param int $cellStyleId 0 being the default style
178
     * @return int|float|\DateTime The value associated with the cell
179
     */
180 52
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
181
    {
182
        // Numeric values can represent numbers as well as timestamps.
183
        // We need to look at the style of the cell to determine whether it is one or the other.
184 52
        $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
185
186 52
        if ($shouldFormatAsDate) {
187 34
            $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
188
        } else {
189 19
            $nodeIntValue = (int) $nodeValue;
190 19
            $nodeFloatValue = (float) $nodeValue;
191 19
            $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
192
        }
193
194 48
        return $cellValue;
195
    }
196
197
    /**
198
     * Returns a cell's PHP Date value, associated to the given timestamp.
199
     * NOTE: The timestamp is a float representing the number of days since the base Excel date:
200
     *       Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
201
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
202
     *
203
     * @see ECMA-376 Part 1 - §18.17.4
204
     *
205
     * @param float $nodeValue
206
     * @param int $cellStyleId 0 being the default style
207
     * @throws InvalidValueException If the value is not a valid timestamp
208
     * @return \DateTime The value associated with the cell
209
     */
210 34
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
211
    {
212 34
        if ($this->isValidTimestampValue($nodeValue)) {
213 30
            $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
214
        } else {
215 4
            throw new InvalidValueException($nodeValue);
216
        }
217
218 30
        return $cellValue;
219
    }
220
221
    /**
222
     * Returns whether the given timestamp is supported by SpreadsheetML
223
     * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
224
     *
225
     * @param float $timestampValue
226
     * @return bool
227
     */
228 34
    protected function isValidTimestampValue($timestampValue)
229
    {
230
        // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
231
        return (
232 34
            $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 ||
233 34
            !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
234
        );
235
    }
236
237
    /**
238
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
239
     * Only the time value matters. The date part is set to the base Excel date:
240
     * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
241
     *
242
     * @param float $nodeValue
243
     * @param int $cellStyleId 0 being the default style
244
     * @return \DateTime|string The value associated with the cell
245
     */
246 30
    protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
247
    {
248 30
        $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
249
250 30
        $daysSinceBaseDate = (int) $nodeValue;
251 30
        $timeRemainder = fmod($nodeValue, 1);
252 30
        $secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
253
254 30
        $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
255 30
        $dateObj->modify('+' . $daysSinceBaseDate . 'days');
256 30
        $dateObj->modify('+' . $secondsRemainder . 'seconds');
257
258 30
        if ($this->shouldFormatDates) {
259 2
            $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
260 2
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
261 2
            $cellValue = $dateObj->format($phpDateFormat);
262
        } else {
263 28
            $cellValue = $dateObj;
264
        }
265
266 30
        return $cellValue;
267
    }
268
269
    /**
270
     * Returns the cell Boolean value from a specific node's Value.
271
     *
272
     * @param string $nodeValue
273
     * @return bool The value associated with the cell
274
     */
275 1
    protected function formatBooleanCellValue($nodeValue)
276
    {
277 1
        return (bool) $nodeValue;
278
    }
279
280
    /**
281
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
282
     * @see ECMA-376 Part 1 - §18.17.4
283
     *
284
     * @param string $nodeValue ISO 8601 Date string
285
     * @throws InvalidValueException If the value is not a valid date
286
     * @return \DateTime|string The value associated with the cell
287
     */
288 2
    protected function formatDateCellValue($nodeValue)
289
    {
290
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
291
        try {
292 2
            $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
293 1
        } catch (\Exception $e) {
294 1
            throw new InvalidValueException($nodeValue);
295
        }
296
297 2
        return $cellValue;
298
    }
299
}
300