Completed
Push — master ( 9f4c09...dbdf5f )
by Adrien
01:49
created

CellValueFormatter::isWhitespaceNode()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
rs 10
c 0
b 0
f 0
ccs 2
cts 2
cp 1
cc 1
nc 1
nop 1
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS\Helper;
4
5
use Box\Spout\Reader\Exception\InvalidValueException;
6
7
/**
8
 * Class CellValueFormatter
9
 * This class provides helper functions to format cell values
10
 */
11
class CellValueFormatter
12
{
13
    /** Definition of all possible cell types */
14
    const CELL_TYPE_STRING = 'string';
15
    const CELL_TYPE_FLOAT = 'float';
16
    const CELL_TYPE_BOOLEAN = 'boolean';
17
    const CELL_TYPE_DATE = 'date';
18
    const CELL_TYPE_TIME = 'time';
19
    const CELL_TYPE_CURRENCY = 'currency';
20
    const CELL_TYPE_PERCENTAGE = 'percentage';
21
    const CELL_TYPE_VOID = 'void';
22
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_P = 'p';
25
    const XML_NODE_TEXT_A = 'text:a';
26
    const XML_NODE_TEXT_SPAN = 'text:span';
27
    const XML_NODE_TEXT_S = 'text:s';
28
    const XML_NODE_TEXT_TAB = 'text:tab';
29
    const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
30
31
    /** Definition of XML attributes used to parse data */
32
    const XML_ATTRIBUTE_TYPE = 'office:value-type';
33
    const XML_ATTRIBUTE_VALUE = 'office:value';
34
    const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
35
    const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value';
36
    const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value';
37
    const XML_ATTRIBUTE_CURRENCY = 'office:currency';
38
    const XML_ATTRIBUTE_C = 'text:c';
39
40
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
41
    protected $shouldFormatDates;
42
43
    /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */
44
    protected $escaper;
45
46
    /** @var array List of XML nodes representing whitespaces and their corresponding value */
47
    private static $WHITESPACE_XML_NODES = [
48
        self::XML_NODE_TEXT_S => ' ',
49
        self::XML_NODE_TEXT_TAB => "\t",
50
        self::XML_NODE_TEXT_LINE_BREAK => "\n",
51
    ];
52
53
    /**
54
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
55
     * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
56
     */
57 30
    public function __construct($shouldFormatDates, $escaper)
58
    {
59 30
        $this->shouldFormatDates = $shouldFormatDates;
60 30
        $this->escaper = $escaper;
61 30
    }
62
63
    /**
64
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
65
     * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
66
     *
67
     * @param \DOMNode $node
68
     * @throws InvalidValueException If the node value is not valid
69
     * @return string|int|float|bool|\DateTime|\DateInterval The value associated with the cell, empty string if cell's type is void/undefined
70
     */
71 27
    public function extractAndFormatNodeValue($node)
72
    {
73 27
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
74
75
        switch ($cellType) {
76 27
            case self::CELL_TYPE_STRING:
77 24
                return $this->formatStringCellValue($node);
78 15
            case self::CELL_TYPE_FLOAT:
79 8
                return $this->formatFloatCellValue($node);
80 12
            case self::CELL_TYPE_BOOLEAN:
81 2
                return $this->formatBooleanCellValue($node);
82 12
            case self::CELL_TYPE_DATE:
83 3
                return $this->formatDateCellValue($node);
84 12
            case self::CELL_TYPE_TIME:
85 3
                return $this->formatTimeCellValue($node);
86 10
            case self::CELL_TYPE_CURRENCY:
87 1
                return $this->formatCurrencyCellValue($node);
88 10
            case self::CELL_TYPE_PERCENTAGE:
89 1
                return $this->formatPercentageCellValue($node);
90 10
            case self::CELL_TYPE_VOID:
91
            default:
92 10
                return '';
93
        }
94
    }
95
96
    /**
97
     * Returns the cell String value.
98
     *
99
     * @param \DOMNode $node
100
     * @return string The value associated with the cell
101
     */
102 24
    protected function formatStringCellValue($node)
103
    {
104 24
        $pNodeValues = [];
105 24
        $pNodes = $node->getElementsByTagName(self::XML_NODE_P);
106
107 24
        foreach ($pNodes as $pNode) {
108 24
            $pNodeValues[] = $this->extractTextValueFromNode($pNode);
109
        }
110
111 24
        $escapedCellValue = implode("\n", $pNodeValues);
112 24
        $cellValue = $this->escaper->unescape($escapedCellValue);
113
114 24
        return $cellValue;
115
    }
116
117
    /**
118
     * @param $pNode
119
     * @return string
120
     */
121 24
    private function extractTextValueFromNode($pNode)
122
    {
123 24
        $textValue = '';
124
125 24
        foreach ($pNode->childNodes as $childNode) {
126 24
            if ($childNode instanceof \DOMText) {
127 24
                $textValue .= $childNode->nodeValue;
128 5
            } elseif ($this->isWhitespaceNode($childNode->nodeName)) {
129 2
                $textValue .= $this->transformWhitespaceNode($childNode);
130 4
            } elseif ($childNode->nodeName === self::XML_NODE_TEXT_A || $childNode->nodeName === self::XML_NODE_TEXT_SPAN) {
131 24
                $textValue .= $this->extractTextValueFromNode($childNode);
132
            }
133
        }
134
135 24
        return $textValue;
136
    }
137
138
    /**
139
     * Returns whether the given node is a whitespace node. It must be one of these:
140
     *  - <text:s />
141
     *  - <text:tab />
142
     *  - <text:line-break />
143
     *
144
     * @param string $nodeName
145
     * @return bool
146
     */
147 5
    private function isWhitespaceNode($nodeName)
148
    {
149 5
        return isset(self::$WHITESPACE_XML_NODES[$nodeName]);
150
    }
151
152
    /**
153
     * The "<text:p>" node can contain the string value directly
154
     * or contain child elements. In this case, whitespaces contain in
155
     * the child elements should be replaced by their XML equivalent:
156
     *  - space => <text:s />
157
     *  - tab => <text:tab />
158
     *  - line break => <text:line-break />
159
     *
160
     * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
161
     *
162
     * @param \DOMNode $node The XML node representing a whitespace
163
     * @return string The corresponding whitespace value
164
     */
165 2
    private function transformWhitespaceNode($node)
166
    {
167 2
        $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>"
168 2
        $numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1;
169
170 2
        return str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
171
    }
172
173
    /**
174
     * Returns the cell Numeric value from the given node.
175
     *
176
     * @param \DOMNode $node
177
     * @return int|float The value associated with the cell
178
     */
179 8
    protected function formatFloatCellValue($node)
180
    {
181 8
        $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
182
183 8
        $nodeIntValue = (int) $nodeValue;
184 8
        $nodeFloatValue = (float) $nodeValue;
185 8
        $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
186
187 8
        return $cellValue;
188
    }
189
190
    /**
191
     * Returns the cell Boolean value from the given node.
192
     *
193
     * @param \DOMNode $node
194
     * @return bool The value associated with the cell
195
     */
196 2
    protected function formatBooleanCellValue($node)
197
    {
198 2
        $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
199
200 2
        return (bool) $nodeValue;
201
    }
202
203
    /**
204
     * Returns the cell Date value from the given node.
205
     *
206
     * @param \DOMNode $node
207
     * @throws InvalidValueException If the value is not a valid date
208
     * @return \DateTime|string The value associated with the cell
209
     */
210 3
    protected function formatDateCellValue($node)
211
    {
212
        // The XML node looks like this:
213
        // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
214
        //   <text:p>05/19/16 04:39 PM</text:p>
215
        // </table:table-cell>
216
217 3
        if ($this->shouldFormatDates) {
218
            // The date is already formatted in the "p" tag
219 1
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
220 1
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
221
        } else {
222
            // otherwise, get it from the "date-value" attribute
223 2
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
224
            try {
225 2
                $cellValue = new \DateTime($nodeValue);
226 1
            } catch (\Exception $e) {
227 1
                throw new InvalidValueException($nodeValue);
228
            }
229
        }
230
231 2
        return $cellValue;
232
    }
233
234
    /**
235
     * Returns the cell Time value from the given node.
236
     *
237
     * @param \DOMNode $node
238
     * @throws InvalidValueException If the value is not a valid time
239
     * @return \DateInterval|string The value associated with the cell
240
     */
241 3
    protected function formatTimeCellValue($node)
242
    {
243
        // The XML node looks like this:
244
        // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
245
        //   <text:p>01:24:00 PM</text:p>
246
        // </table:table-cell>
247
248 3
        if ($this->shouldFormatDates) {
249
            // The date is already formatted in the "p" tag
250 1
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
251 1
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
252
        } else {
253
            // otherwise, get it from the "time-value" attribute
254 2
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
255
            try {
256 2
                $cellValue = new \DateInterval($nodeValue);
257 1
            } catch (\Exception $e) {
258 1
                throw new InvalidValueException($nodeValue);
259
            }
260
        }
261
262 2
        return $cellValue;
263
    }
264
265
    /**
266
     * Returns the cell Currency value from the given node.
267
     *
268
     * @param \DOMNode $node
269
     * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
270
     */
271 1
    protected function formatCurrencyCellValue($node)
272
    {
273 1
        $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
274 1
        $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
275
276 1
        return "$value $currency";
277
    }
278
279
    /**
280
     * Returns the cell Percentage value from the given node.
281
     *
282
     * @param \DOMNode $node
283
     * @return int|float The value associated with the cell
284
     */
285 1
    protected function formatPercentageCellValue($node)
286
    {
287
        // percentages are formatted like floats
288 1
        return $this->formatFloatCellValue($node);
289
    }
290
}
291