Failed Conditions
Pull Request — master (#316)
by Adrien
02:36
created

CellValueFormatter::formatBooleanCellValue()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 6
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
/**
6
 * Class CellValueFormatter
7
 * This class provides helper functions to format cell values
8
 *
9
 * @package Box\Spout\Reader\XLSX\Helper
10
 */
11
class CellValueFormatter
12
{
13
    /** Definition of all possible cell types */
14
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
15
    const CELL_TYPE_STR = 'str';
16
    const CELL_TYPE_SHARED_STRING = 's';
17
    const CELL_TYPE_BOOLEAN = 'b';
18
    const CELL_TYPE_NUMERIC = 'n';
19
    const CELL_TYPE_DATE = 'd';
20
    const CELL_TYPE_ERROR = 'e';
21
22
    /** Definition of XML nodes names used to parse data */
23
    const XML_NODE_VALUE = 'v';
24
    const XML_NODE_INLINE_STRING_VALUE = 't';
25
26
    /** Definition of XML attributes used to parse data */
27
    const XML_ATTRIBUTE_TYPE = 't';
28
    const XML_ATTRIBUTE_STYLE_ID = 's';
29
30
    /** Constants used for date formatting */
31
    const NUM_SECONDS_IN_ONE_DAY = 86400;
32
    const NUM_SECONDS_IN_ONE_HOUR = 3600;
33
    const NUM_SECONDS_IN_ONE_MINUTE = 60;
34
35
    /**
36
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
37
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
38
     */
39
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
40
41
    /** @var SharedStringsHelper Helper to work with shared strings */
42
    protected $sharedStringsHelper;
43
44
    /** @var StyleHelper Helper to work with styles */
45
    protected $styleHelper;
46
47
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
48
    protected $shouldFormatDates;
49
50
    /** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
51
    protected $escaper;
52
53
    /**
54
     * @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
55
     * @param StyleHelper $styleHelper Helper to work with styles
56
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
57
     */
58 177
    public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates)
59
    {
60 177
        $this->sharedStringsHelper = $sharedStringsHelper;
61 177
        $this->styleHelper = $styleHelper;
62 177
        $this->shouldFormatDates = $shouldFormatDates;
63
64
        /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
65 177
        $this->escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
66 177
    }
67
68
    /**
69
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
70
     *
71
     * @param \DOMNode $node
72
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
73
     */
74 117
    public function extractAndFormatNodeValue($node)
75
    {
76
        // Default cell type is "n"
77 117
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
78 117
        $cellStyleId = intval($node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID));
79 117
        $vNodeValue = $this->getVNodeValue($node);
80
81 117
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
82 3
            return $vNodeValue;
83
        }
84
85
        switch ($cellType) {
86 117
            case self::CELL_TYPE_INLINE_STRING:
87 21
                return $this->formatInlineStringCellValue($node);
88 99
            case self::CELL_TYPE_SHARED_STRING:
89 48
                return $this->formatSharedStringCellValue($vNodeValue);
90 63
            case self::CELL_TYPE_STR:
91 3
                return $this->formatStrCellValue($vNodeValue);
92 63
            case self::CELL_TYPE_BOOLEAN:
93 3
                return $this->formatBooleanCellValue($vNodeValue);
94 63
            case self::CELL_TYPE_NUMERIC:
95 63
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
96 6
            case self::CELL_TYPE_DATE:
97 6
                return $this->formatDateCellValue($vNodeValue);
98 3
            default:
99 3
                return null;
100 3
        }
101
    }
102
103
    /**
104
     * Returns the cell's string value from a node's nested value node
105
     *
106
     * @param \DOMNode $node
107
     * @return string The value associated with the cell
108
     */
109 117
    protected function getVNodeValue($node)
110
    {
111
        // for cell types having a "v" tag containing the value.
112
        // if not, the returned value should be empty string.
113 117
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
114 117
        return ($vNode !== null) ? $vNode->nodeValue : '';
115
    }
116
117
    /**
118
     * Returns the cell String value where string is inline.
119
     *
120
     * @param \DOMNode $node
121
     * @return string The value associated with the cell (null when the cell has an error)
122
     */
123 33
    protected function formatInlineStringCellValue($node)
124
    {
125
        // inline strings are formatted this way:
126
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
127 33
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
128 33
        $cellValue = $this->escaper->unescape($tNode->nodeValue);
129 33
        return $cellValue;
130
    }
131
132
    /**
133
     * Returns the cell String value from shared-strings file using nodeValue index.
134
     *
135
     * @param string $nodeValue
136
     * @return string The value associated with the cell (null when the cell has an error)
137
     */
138 48
    protected function formatSharedStringCellValue($nodeValue)
139
    {
140
        // shared strings are formatted this way:
141
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
142 48
        $sharedStringIndex = intval($nodeValue);
143 48
        $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
144 48
        $cellValue = $this->escaper->unescape($escapedCellValue);
145 48
        return $cellValue;
146
    }
147
148
    /**
149
     * Returns the cell String value, where string is stored in value node.
150
     *
151
     * @param string $nodeValue
152
     * @return string The value associated with the cell (null when the cell has an error)
153
     */
154 3
    protected function formatStrCellValue($nodeValue)
155
    {
156 3
        $escapedCellValue = trim($nodeValue);
157 3
        $cellValue = $this->escaper->unescape($escapedCellValue);
158 3
        return $cellValue;
159
    }
160
161
    /**
162
     * Returns the cell Numeric value from string of nodeValue.
163
     * The value can also represent a timestamp and a DateTime will be returned.
164
     *
165
     * @param string $nodeValue
166
     * @param int $cellStyleId 0 being the default style
167
     * @return int|float|\DateTime|null The value associated with the cell
168
     */
169 102
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
170
    {
171
        // Numeric values can represent numbers as well as timestamps.
172
        // We need to look at the style of the cell to determine whether it is one or the other.
173 102
        $shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
174
175 102
        if ($shouldFormatAsDate) {
176 33
            return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId);
177
        } else {
178 69
            $nodeIntValue = intval($nodeValue);
179 69
            return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
180
        }
181
    }
182
183
    /**
184
     * Returns a cell's PHP Date value, associated to the given timestamp.
185
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
186
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
187
     *
188
     * @param float $nodeValue
189
     * @param int $cellStyleId 0 being the default style
190
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
191
     */
192 33
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
193
    {
194
        // Fix for the erroneous leap year in Excel
195 33
        if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
196 9
            --$nodeValue;
197 9
        }
198
199 33
        if ($nodeValue >= 1) {
200
            // Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
201 15
            return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
202 18
        } else if ($nodeValue >= 0) {
203
            // Values between 0 and 1 represent "times".
204 15
            return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
205
        } else {
206
            // invalid date
207 3
            return null;
208
        }
209
    }
210
211
    /**
212
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
213
     * Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
214
     *
215
     * @param float $nodeValue
216
     * @param int $cellStyleId 0 being the default style
217
     * @return \DateTime|string The value associated with the cell
218
     */
219 15
    protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
220
    {
221 15
        $time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
222 15
        $hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
223 15
        $minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
224 15
        $seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
225
226
        // using the base Excel date (Jan 1st, 1900) - not relevant here
227 15
        $dateObj = new \DateTime('1900-01-01');
228 15
        $dateObj->setTime($hours, $minutes, $seconds);
229
230 15
        if ($this->shouldFormatDates) {
231
            $styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
232
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
233
            return $dateObj->format($phpDateFormat);
234
        } else {
235 15
            return $dateObj;
236
        }
237
    }
238
239
    /**
240
     * Returns a cell's PHP Date value, associated to the given timestamp.
241
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
242
     *
243
     * @param float $nodeValue
244
     * @param int $cellStyleId 0 being the default style
245
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
246
     */
247 15
    protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
248
    {
249
        // Do not use any unix timestamps for calculation to prevent
250
        // issues with numbers exceeding 2^31.
251 15
        $secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
252 15
        $secondsRemainder = round($secondsRemainder, 0);
253
254
        try {
255 15
            $dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
256 15
            $dateObj->modify('+' . intval($nodeValue) . 'days');
257 15
            $dateObj->modify('+' . $secondsRemainder . 'seconds');
258
259 15
            if ($this->shouldFormatDates) {
260
                $styleNumberFormatCode = $this->styleHelper->getNumberFormatCode($cellStyleId);
261
                $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
262
                return $dateObj->format($phpDateFormat);
263
            } else {
264 15
                return $dateObj;
265
            }
266
        } catch (\Exception $e) {
267
            return null;
268
        }
269
    }
270
271
    /**
272
     * Returns the cell Boolean value from a specific node's Value.
273
     *
274
     * @param string $nodeValue
275
     * @return bool The value associated with the cell
276
     */
277 3
    protected function formatBooleanCellValue($nodeValue)
278
    {
279
        // !! is similar to boolval()
280 3
        $cellValue = !!$nodeValue;
281 3
        return $cellValue;
282
    }
283
284
    /**
285
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
286
     * @see ECMA-376 Part 1 - §18.17.4
287
     *
288
     * @param string $nodeValue ISO 8601 Date string
289
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
290
     */
291 6
    protected function formatDateCellValue($nodeValue)
292
    {
293
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
294
        try {
295 6
            return ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
296 3
        } catch (\Exception $e) {
297 3
            return null;
298
        }
299
    }
300
}
301