Completed
Push — master ( c48c07...209372 )
by Adrien
02:50
created

CellValueFormatter::extractAndFormatNodeValue()   D

Complexity

Conditions 10
Paths 16

Size

Total Lines 28
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 10

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 28
rs 4.8196
ccs 21
cts 21
cp 1
cc 10
eloc 21
nc 16
nop 1
crap 10

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Box\Spout\Reader\XLSX\Helper;
4
5
/**
6
 * Class CellValueFormatter
7
 * This class provides helper functions to format cell values
8
 *
9
 * @package Box\Spout\Reader\XLSX\Helper
10
 */
11
class CellValueFormatter
12
{
13
    /** Definition of all possible cell types */
14
    const CELL_TYPE_INLINE_STRING = 'inlineStr';
15
    const CELL_TYPE_STR = 'str';
16
    const CELL_TYPE_SHARED_STRING = 's';
17
    const CELL_TYPE_BOOLEAN = 'b';
18
    const CELL_TYPE_NUMERIC = 'n';
19
    const CELL_TYPE_DATE = 'd';
20
    const CELL_TYPE_ERROR = 'e';
21
22
    /** Definition of XML nodes names used to parse data */
23
    const XML_NODE_VALUE = 'v';
24
    const XML_NODE_INLINE_STRING_VALUE = 't';
25
26
    /** Definition of XML attributes used to parse data */
27
    const XML_ATTRIBUTE_TYPE = 't';
28
    const XML_ATTRIBUTE_STYLE_ID = 's';
29
30
    /** Constants used for date formatting */
31
    const NUM_SECONDS_IN_ONE_DAY = 86400;
32
33
    /**
34
     * February 29th, 1900 is NOT a leap year but Excel thinks it is...
35
     * @see https://en.wikipedia.org/wiki/Year_1900_problem#Microsoft_Excel
36
     */
37
    const ERRONEOUS_EXCEL_LEAP_YEAR_DAY = 60;
38
39
    /** @var SharedStringsHelper Helper to work with shared strings */
40
    protected $sharedStringsHelper;
41
42
    /** @var StyleHelper Helper to work with styles */
43
    protected $styleHelper;
44
45
    /** @var \Box\Spout\Common\Escaper\XLSX Used to unescape XML data */
46
    protected $escaper;
47
48
    /**
49
     * @param SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
50
     * @param StyleHelper $styleHelper Helper to work with styles
51
     */
52 126
    public function __construct($sharedStringsHelper, $styleHelper)
53
    {
54 126
        $this->sharedStringsHelper = $sharedStringsHelper;
55 126
        $this->styleHelper = $styleHelper;
56
57
        /** @noinspection PhpUnnecessaryFullyQualifiedNameInspection */
58 126
        $this->escaper = new \Box\Spout\Common\Escaper\XLSX();
59 126
    }
60
61
    /**
62
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
63
     *
64
     * @param \DOMNode $node
65
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
66
     */
67 78
    public function extractAndFormatNodeValue($node)
68
    {
69
        // Default cell type is "n"
70 78
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
71 78
        $cellStyleId = intval($node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID));
72 78
        $vNodeValue = $this->getVNodeValue($node);
73
74 78
        if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) {
75 3
            return $vNodeValue;
76
        }
77
78
        switch ($cellType) {
79 78
            case self::CELL_TYPE_INLINE_STRING:
80 15
                return $this->formatInlineStringCellValue($node);
81 66
            case self::CELL_TYPE_SHARED_STRING:
82 33
                return $this->formatSharedStringCellValue($vNodeValue);
83 39
            case self::CELL_TYPE_STR:
84 3
                return $this->formatStrCellValue($vNodeValue);
85 39
            case self::CELL_TYPE_BOOLEAN:
86 3
                return $this->formatBooleanCellValue($vNodeValue);
87 39
            case self::CELL_TYPE_NUMERIC:
88 39
                return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
89 3
            case self::CELL_TYPE_DATE:
90 3
                return $this->formatDateCellValue($vNodeValue);
91 3
            default:
92 3
                return null;
93 3
        }
94
    }
95
96
    /**
97
     * Returns the cell's string value from a node's nested value node
98
     *
99
     * @param \DOMNode $node
100
     * @return string The value associated with the cell
101
     */
102 78
    protected function getVNodeValue($node)
103
    {
104
        // for cell types having a "v" tag containing the value.
105
        // if not, the returned value should be empty string.
106 78
        $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
107 78
        return ($vNode !== null) ? $vNode->nodeValue : '';
108
    }
109
110
    /**
111
     * Returns the cell String value where string is inline.
112
     *
113
     * @param \DOMNode $node
114
     * @return string The value associated with the cell (null when the cell has an error)
115
     */
116 15
    protected function formatInlineStringCellValue($node)
117
    {
118
        // inline strings are formatted this way:
119
        // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t></is></c>
120 15
        $tNode = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE)->item(0);
121 15
        $escapedCellValue = trim($tNode->nodeValue);
122 15
        $cellValue = $this->escaper->unescape($escapedCellValue);
123 15
        return $cellValue;
124
    }
125
126
    /**
127
     * Returns the cell String value from shared-strings file using nodeValue index.
128
     *
129
     * @param string $nodeValue
130
     * @return string The value associated with the cell (null when the cell has an error)
131
     */
132 33
    protected function formatSharedStringCellValue($nodeValue)
133
    {
134
        // shared strings are formatted this way:
135
        // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
136 33
        $sharedStringIndex = intval($nodeValue);
137 33
        $escapedCellValue = $this->sharedStringsHelper->getStringAtIndex($sharedStringIndex);
138 33
        $cellValue = $this->escaper->unescape($escapedCellValue);
139 33
        return $cellValue;
140
    }
141
142
    /**
143
     * Returns the cell String value, where string is stored in value node.
144
     *
145
     * @param string $nodeValue
146
     * @return string The value associated with the cell (null when the cell has an error)
147
     */
148 3
    protected function formatStrCellValue($nodeValue)
149
    {
150 3
        $escapedCellValue = trim($nodeValue);
151 3
        $cellValue = $this->escaper->unescape($escapedCellValue);
152 3
        return $cellValue;
153
    }
154
155
    /**
156
     * Returns the cell Numeric value from string of nodeValue.
157
     * The value can also represent a timestamp and a DateTime will be returned.
158
     *
159
     * @param string $nodeValue
160
     * @param int $cellStyleId 0 being the default style
161
     * @return int|float|\DateTime|null The value associated with the cell
0 ignored issues
show
Documentation introduced by
Should the return type not be null|\DateTime|false|integer|double?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
162
     */
163 78
    protected function formatNumericCellValue($nodeValue, $cellStyleId)
164
    {
165
        // Numeric values can represent numbers as well as timestamps.
166
        // We need to look at the style of the cell to determine whether it is one or the other.
167 78
        $shouldFormatAsDate = $this->styleHelper->shouldFormatNumericValueAsDate($cellStyleId);
168
169 78
        if ($shouldFormatAsDate) {
170 30
            return $this->formatExcelTimestampValue(floatval($nodeValue));
171
        } else {
172 48
            $nodeIntValue = intval($nodeValue);
173 48
            return ($nodeIntValue == $nodeValue) ? $nodeIntValue : floatval($nodeValue);
174
        }
175
    }
176
177
    /**
178
     * Returns a cell's PHP Date value, associated to the given timestamp.
179
     * NOTE: The timestamp is a float representing the number of days since January 1st, 1900.
180
     *
181
     * @param float $nodeValue
182
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
0 ignored issues
show
Documentation introduced by
Should the return type not be null|\DateTime|false?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
183
     */
184 30
    protected function formatExcelTimestampValue($nodeValue)
185
    {
186
        // Fix for the erroneous leap year in Excel
187 30
        if (ceil($nodeValue) > self::ERRONEOUS_EXCEL_LEAP_YEAR_DAY) {
188 15
            --$nodeValue;
189 15
        }
190
191
        // The value 1.0 represents 1900-01-01. Numbers below 1.0 are not valid Excel dates.
192 30
        if ($nodeValue < 1.0) {
193 9
            return null;
194
        }
195
196
        // Do not use any unix timestamps for calculation to prevent
197
        // issues with numbers exceeding 2^31.
198 21
        $secondsRemainder = fmod($nodeValue, 1) * self::NUM_SECONDS_IN_ONE_DAY;
199 21
        $secondsRemainder = round($secondsRemainder, 0);
200
201
        try {
202 21
            $cellValue = \DateTime::createFromFormat('|Y-m-d', '1899-12-31');
203 21
            $cellValue->modify('+' . intval($nodeValue) . 'days');
204 21
            $cellValue->modify('+' . $secondsRemainder . 'seconds');
205
206 21
            return $cellValue;
207
        } catch (\Exception $e) {
208
            return null;
209
        }
210
    }
211
212
    /**
213
     * Returns the cell Boolean value from a specific node's Value.
214
     *
215
     * @param string $nodeValue
216
     * @return bool The value associated with the cell
217
     */
218 3
    protected function formatBooleanCellValue($nodeValue)
219
    {
220
        // !! is similar to boolval()
221 3
        $cellValue = !!$nodeValue;
222 3
        return $cellValue;
223
    }
224
225
    /**
226
     * Returns a cell's PHP Date value, associated to the given stored nodeValue.
227
     *
228
     * @param string $nodeValue
229
     * @return \DateTime|null The value associated with the cell or NULL if invalid date value
230
     */
231 3
    protected function formatDateCellValue($nodeValue)
232
    {
233
        // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
234
        try {
235 3
            $cellValue = new \DateTime($nodeValue);
236 3
            return $cellValue;
237 3
        } catch (\Exception $e) {
238 3
            return null;
239
        }
240
    }
241
}
242