Failed Conditions
Push — perf-tests ( 50942d...2fc93e )
by Adrien
14:53
created

CellValueFormatter::extractAndFormatNodeValue()   D

Complexity

Conditions 10
Paths 16

Size

Total Lines 28
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 28
rs 4.8196
cc 10
eloc 21
nc 16
nop 1

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
/**
6
 * Class CellValueFormatter
7
 * This class provides helper functions to format cell values
8
 *
9
 * @package Box\Spout\Reader\XLSX\Helper
10
 */
11
class CellValueFormatter
12
{
13
    /** Definition of all possible cell types */
14
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
15
    const CELL_TYPE_STR = 'str';
16
    const CELL_TYPE_SHARED_STRING = 's';
17
    const CELL_TYPE_BOOLEAN = 'b';
18
    const CELL_TYPE_NUMERIC = 'n';
19
    const CELL_TYPE_DATE = 'd';
20
    const CELL_TYPE_ERROR = 'e';
21
22
    /** Definition of XML nodes names used to parse data */
23
    const XML_NODE_VALUE = 'v';
24
    const XML_NODE_INLINE_STRING_VALUE = 't';
25
26
    /** Definition of XML attributes used to parse data */
27
    const XML_ATTRIBUTE_TYPE = 't';
28
    const XML_ATTRIBUTE_STYLE_ID = 's';
29
30
    /** Constants used for date formatting */
31
    const NUM_SECONDS_IN_ONE_DAY = 86400;
32
    const NUM_SECONDS_IN_ONE_HOUR = 3600;
33
    const NUM_SECONDS_IN_ONE_MINUTE = 60;
34
35
    /**
36
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
37
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
38
     */
39
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
40
41
    /** @var SharedStringsHelper Helper to work with shared strings */
42
    protected $sharedStringsHelper;
43
44
    /** @var StyleHelper Helper to work with styles */
45
    protected $styleHelper;
46
47
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
48
    protected $shouldFormatDates;
49
50
    /** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
51
    protected $escaper;
52
53
    /**
54
     * @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
55
     * @param StyleHelper $styleHelper Helper to work with styles
56
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
57
     */
58
    public function __construct($sharedStringsHelper, $styleHelper, $shouldFormatDates)
59
    {
60
        $this->sharedStringsHelper = $sharedStringsHelper;
61
        $this->styleHelper = $styleHelper;
62
        $this->shouldFormatDates = $shouldFormatDates;
63
64
        /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
65
        $this->escaper = \Box\Spout\Common\Escaper\XLSX::getInstance();
66
    }
67
68
    /**
69
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
70
     *
71
     * @param \DOMNode $node
72
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
73
     */
74
    public function extractAndFormatNodeValue($node)
75
    {
76
        // Default cell type is "n"
77
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
78
        $cellStyleId = intval($node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID));
79
        $vNodeValue = $this->getVNodeValue($node);
80
81
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
82
            return $vNodeValue;
83
        }
84
85
        switch ($cellType) {
86
            case self::CELL_TYPE_INLINE_STRING:
87
                return $this->formatInlineStringCellValue($node);
88
            case self::CELL_TYPE_SHARED_STRING:
89
                return $this->formatSharedStringCellValue($vNodeValue);
90
            case self::CELL_TYPE_STR:
91
                return $this->formatStrCellValue($vNodeValue);
92
            case self::CELL_TYPE_BOOLEAN:
93
                return $this->formatBooleanCellValue($vNodeValue);
94
            case self::CELL_TYPE_NUMERIC:
95
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
96
            case self::CELL_TYPE_DATE:
97
                return $this->formatDateCellValue($vNodeValue);
98
            default:
99
                return null;
100
        }
101
    }
102
103
    /**
104
     * Returns the cell's string value from a node's nested value node
105
     *
106
     * @param \DOMNode $node
107
     * @return string The value associated with the cell
108
     */
109
    protected function getVNodeValue($node)
110
    {
111
        // for cell types having a "v" tag containing the value.
112
        // if not, the returned value should be empty string.
113
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
114
        return ($vNode !== null) ? $vNode->nodeValue : '';
115
    }
116
117
    /**
118
     * Returns the cell String value where string is inline.
119
     *
120
     * @param \DOMNode $node
121
     * @return string The value associated with the cell (null when the cell has an error)
122
     */
123
    protected function formatInlineStringCellValue($node)
124
    {
125
        // inline strings are formatted this way:
126
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
127
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
128
        $cellValue = $this->escaper->unescape($tNode->nodeValue);
129
        return $cellValue;
130
    }
131
132
    /**
133
     * Returns the cell String value from shared-strings file using nodeValue index.
134
     *
135
     * @param string $nodeValue
136
     * @return string The value associated with the cell (null when the cell has an error)
137
     */
138
    protected function formatSharedStringCellValue($nodeValue)
139
    {
140
        // shared strings are formatted this way:
141
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
142
        $sharedStringIndex = intval($nodeValue);
143
        $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
144
        $cellValue = $this->escaper->unescape($escapedCellValue);
145
        return $cellValue;
146
    }
147
148
    /**
149
     * Returns the cell String value, where string is stored in value node.
150
     *
151
     * @param string $nodeValue
152
     * @return string The value associated with the cell (null when the cell has an error)
153
     */
154
    protected function formatStrCellValue($nodeValue)
155
    {
156
        $escapedCellValue = trim($nodeValue);
157
        $cellValue = $this->escaper->unescape($escapedCellValue);
158
        return $cellValue;
159
    }
160
161
    /**
162
     * Returns the cell Numeric value from string of nodeValue.
163
     * The value can also represent a timestamp and a DateTime will be returned.
164
     *
165
     * @param string $nodeValue
166
     * @param int $cellStyleId 0 being the default style
167
     * @return int|float|\DateTime|null The value associated with the cell
168
     */
169
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
170
    {
171
        // Numeric values can represent numbers as well as timestamps.
172
        // We need to look at the style of the cell to determine whether it is one or the other.
173
        $shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
174
175
        if ($shouldFormatAsDate) {
176
            return $this->formatExcelTimestampValue(floatval($nodeValue), $cellStyleId);
177
        } else {
178
            $nodeIntValue = intval($nodeValue);
179
            return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
180
        }
181
    }
182
183
    /**
184
     * Returns a cell's PHP Date value, associated to the given timestamp.
185
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
186
     * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
187
     *
188
     * @param float $nodeValue
189
     * @param int $cellStyleId 0 being the default style
190
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
191
     */
192
    protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
193
    {
194
        // Fix for the erroneous leap year in Excel
195
        if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
196
            --$nodeValue;
197
        }
198
199
        if ($nodeValue >= 1) {
200
            // Values greater than 1 represent "dates". The value 1.0 representing the "base" date: 1900-01-01.
201
            return $this->formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId);
202
        } else if ($nodeValue >= 0) {
203
            // Values between 0 and 1 represent "times".
204
            return $this->formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId);
205
        } else {
206
            // invalid date
207
            return null;
208
        }
209
    }
210
211
    /**
212
     * Returns a cell's PHP DateTime value, associated to the given timestamp.
213
     * Only the time value matters. The date part is set to Jan 1st, 1900 (base Excel date).
214
     *
215
     * @param float $nodeValue
216
     * @param int $cellStyleId 0 being the default style
217
     * @return \DateTime|string The value associated with the cell
218
     */
219
    protected function formatExcelTimestampValueAsTimeValue($nodeValue, $cellStyleId)
220
    {
221
        $time = round($nodeValue * self::NUM_SECONDS_IN_ONE_DAY);
222
        $hours = floor($time / self::NUM_SECONDS_IN_ONE_HOUR);
223
        $minutes = floor($time / self::NUM_SECONDS_IN_ONE_MINUTE) - ($hours * self::NUM_SECONDS_IN_ONE_MINUTE);
224
        $seconds = $time - ($hours * self::NUM_SECONDS_IN_ONE_HOUR) - ($minutes * self::NUM_SECONDS_IN_ONE_MINUTE);
225
226
        // using the base Excel date (Jan 1st, 1900) - not relevant here
227
        $dateObj = new \DateTime('1900-01-01');
228
        $dateObj->setTime($hours, $minutes, $seconds);
229
230
        if ($this->shouldFormatDates) {
231
            $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
232
            $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
233
            return $dateObj->format($phpDateFormat);
234
        } else {
235
            return $dateObj;
236
        }
237
    }
238
239
    /**
240
     * Returns a cell's PHP Date value, associated to the given timestamp.
241
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
242
     *
243
     * @param float $nodeValue
244
     * @param int $cellStyleId 0 being the default style
245
     * @return \DateTime|string|null The value associated with the cell or NULL if invalid date value
246
     */
247
    protected function formatExcelTimestampValueAsDateValue($nodeValue, $cellStyleId)
248
    {
249
        // Do not use any unix timestamps for calculation to prevent
250
        // issues with numbers exceeding 2^31.
251
        $secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
252
        $secondsRemainder = round($secondsRemainder, 0);
253
254
        try {
255
            $dateObj = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
256
            $dateObj->modify('+' . intval($nodeValue) . 'days');
257
            $dateObj->modify('+' . $secondsRemainder . 'seconds');
258
259
            if ($this->shouldFormatDates) {
260
                $styleNumberFormat = $this->styleHelper->getNumberFormat($cellStyleId);
261
                $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormat);
262
                return $dateObj->format($phpDateFormat);
263
            } else {
264
                return $dateObj;
265
            }
266
        } catch (\Exception $e) {
267
            return null;
268
        }
269
    }
270
271
    /**
272
     * Returns the cell Boolean value from a specific node's Value.
273
     *
274
     * @param string $nodeValue
275
     * @return bool The value associated with the cell
276
     */
277
    protected function formatBooleanCellValue($nodeValue)
278
    {
279
        // !! is similar to boolval()
280
        $cellValue = !!$nodeValue;
281
        return $cellValue;
282
    }
283
284
    /**
285
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
286
     *
287
     * @param string $nodeValue
288
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
289
     */
290
    protected function formatDateCellValue($nodeValue)
291
    {
292
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
293
        try {
294
            $cellValue = new \DateTime($nodeValue);
295
            return $cellValue;
296
        } catch (\Exception $e) {
297
            return null;
298
        }
299
    }
300
}
301