Completed
Pull Request — master (#667)
by
unknown
02:19
created

CellValueFormatter::extractAndFormatNodeValue()   B

Complexity

Conditions 9
Paths 9

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 9

Importance

Changes 0
Metric Value
dl 0
loc 24
ccs 18
cts 18
cp 1
rs 8.0555
c 0
b 0
f 0
cc 9
nc 9
nop 1
crap 9
1
<?php
2
3
namespace Box\Spout\Reader\ODS\Helper;
4
5
use Box\Spout\Reader\Exception\InvalidValueException;
6
7
/**
8
 * Class CellValueFormatter
9
 * This class provides helper functions to format cell values
10
 */
11
class CellValueFormatter
12
{
13
    /** Definition of all possible cell types */
14
    const CELL_TYPE_STRING = 'string';
15
    const CELL_TYPE_FLOAT = 'float';
16
    const CELL_TYPE_BOOLEAN = 'boolean';
17
    const CELL_TYPE_DATE = 'date';
18
    const CELL_TYPE_TIME = 'time';
19
    const CELL_TYPE_CURRENCY = 'currency';
20
    const CELL_TYPE_PERCENTAGE = 'percentage';
21
    const CELL_TYPE_VOID = 'void';
22
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_P = 'p';
25
    const XML_NODE_S = 'text:s';
26
    const XML_NODE_A = 'text:a';
27
    const XML_NODE_SPAN = 'text:span';
28
29
    /** Definition of XML attributes used to parse data */
30
    const XML_ATTRIBUTE_TYPE = 'office:value-type';
31
    const XML_ATTRIBUTE_VALUE = 'office:value';
32
    const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
33
    const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value';
34
    const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value';
35
    const XML_ATTRIBUTE_CURRENCY = 'office:currency';
36
    const XML_ATTRIBUTE_C = 'text:c';
37
38
    /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
39
    protected $shouldFormatDates;
40
41
    /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */
42
    protected $escaper;
43
44
    /**
45
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
46
     * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
47
     */
48 29
    public function __construct($shouldFormatDates, $escaper)
49
    {
50 29
        $this->shouldFormatDates = $shouldFormatDates;
51 29
        $this->escaper = $escaper;
52 29
    }
53
54
    /**
55
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
56
     * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
57
     *
58
     * @param \DOMNode $node
59
     * @throws InvalidValueException If the node value is not valid
60
     * @return string|int|float|bool|\DateTime|\DateInterval The value associated with the cell, empty string if cell's type is void/undefined
61
     */
62 26
    public function extractAndFormatNodeValue($node)
63
    {
64 26
        $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
65
66
        switch ($cellType) {
67 26
            case self::CELL_TYPE_STRING:
68 23
                return $this->formatStringCellValue($node);
69 15
            case self::CELL_TYPE_FLOAT:
70 8
                return $this->formatFloatCellValue($node);
71 12
            case self::CELL_TYPE_BOOLEAN:
72 2
                return $this->formatBooleanCellValue($node);
73 12
            case self::CELL_TYPE_DATE:
74 3
                return $this->formatDateCellValue($node);
75 12
            case self::CELL_TYPE_TIME:
76 3
                return $this->formatTimeCellValue($node);
77 10
            case self::CELL_TYPE_CURRENCY:
78 1
                return $this->formatCurrencyCellValue($node);
79 10
            case self::CELL_TYPE_PERCENTAGE:
80 1
                return $this->formatPercentageCellValue($node);
81 10
            case self::CELL_TYPE_VOID:
82
            default:
83 10
                return '';
84
        }
85
    }
86
87
    /**
88
     * Returns the cell String value.
89
     *
90
     * @param \DOMNode $node
91
     * @return string The value associated with the cell
92
     */
93 23
    protected function formatStringCellValue($node)
94
    {
95 23
        $pNodeValues = [];
96 23
        $pNodes = $node->getElementsByTagName(self::XML_NODE_P);
97
98 23
        foreach ($pNodes as $pNode) {
99 23
            $pNodeValues[] = $this->extractTextFromNode($pNode);
100
        }
101
102 23
        $escapedCellValue = implode("\n", $pNodeValues);
103 23
        $cellValue = $this->escaper->unescape($escapedCellValue);
104
105 23
        return $cellValue;
106
    }
107
108
    /**
109
     * @param \DOMNode $pNode
110
     * @return string
111
     */
112 23
    protected function extractTextFromNode($pNode)
113
    {
114 23
        $currentPValue = '';
115
116 23
        foreach ($pNode->childNodes as $childNode) {
117 23
            if ($childNode instanceof \DOMText) {
118 23
                $currentPValue .= $childNode->nodeValue;
119 4
            } elseif ($childNode->nodeName === self::XML_NODE_S) {
120 1
                $spaceAttribute = $childNode->getAttribute(self::XML_ATTRIBUTE_C);
121 1
                $numSpaces = (!empty($spaceAttribute)) ? (int) $spaceAttribute : 1;
122 1
                $currentPValue .= str_repeat(' ', $numSpaces);
123 3
            } elseif ($childNode->nodeName === self::XML_NODE_A || $childNode->nodeName === self::XML_NODE_SPAN) {
124 23
                $currentPValue .= $this->extractTextFromNode($childNode);
125
            }
126
        }
127
128 23
        return $currentPValue;
129
    }
130
131
    /**
132
     * Returns the cell Numeric value from the given node.
133
     *
134
     * @param \DOMNode $node
135
     * @return int|float The value associated with the cell
136
     */
137 8
    protected function formatFloatCellValue($node)
138
    {
139 8
        $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
140
141 8
        $nodeIntValue = (int) $nodeValue;
142 8
        $nodeFloatValue = (float) $nodeValue;
143 8
        $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue;
144
145 8
        return $cellValue;
146
    }
147
148
    /**
149
     * Returns the cell Boolean value from the given node.
150
     *
151
     * @param \DOMNode $node
152
     * @return bool The value associated with the cell
153
     */
154 2
    protected function formatBooleanCellValue($node)
155
    {
156 2
        $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
157
158 2
        return (bool) $nodeValue;
159
    }
160
161
    /**
162
     * Returns the cell Date value from the given node.
163
     *
164
     * @param \DOMNode $node
165
     * @throws InvalidValueException If the value is not a valid date
166
     * @return \DateTime|string The value associated with the cell
167
     */
168 3
    protected function formatDateCellValue($node)
169
    {
170
        // The XML node looks like this:
171
        // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
172
        //   <text:p>05/19/16 04:39 PM</text:p>
173
        // </table:table-cell>
174
175 3
        if ($this->shouldFormatDates) {
176
            // The date is already formatted in the "p" tag
177 1
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
178 1
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
179
        } else {
180
            // otherwise, get it from the "date-value" attribute
181 2
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
182
            try {
183 2
                $cellValue = new \DateTime($nodeValue);
184 1
            } catch (\Exception $e) {
185 1
                throw new InvalidValueException($nodeValue);
186
            }
187
        }
188
189 2
        return $cellValue;
190
    }
191
192
    /**
193
     * Returns the cell Time value from the given node.
194
     *
195
     * @param \DOMNode $node
196
     * @throws InvalidValueException If the value is not a valid time
197
     * @return \DateInterval|string The value associated with the cell
198
     */
199 3
    protected function formatTimeCellValue($node)
200
    {
201
        // The XML node looks like this:
202
        // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
203
        //   <text:p>01:24:00 PM</text:p>
204
        // </table:table-cell>
205
206 3
        if ($this->shouldFormatDates) {
207
            // The date is already formatted in the "p" tag
208 1
            $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0);
209 1
            $cellValue = $nodeWithValueAlreadyFormatted->nodeValue;
210
        } else {
211
            // otherwise, get it from the "time-value" attribute
212 2
            $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
213
            try {
214 2
                $cellValue = new \DateInterval($nodeValue);
215 1
            } catch (\Exception $e) {
216 1
                throw new InvalidValueException($nodeValue);
217
            }
218
        }
219
220 2
        return $cellValue;
221
    }
222
223
    /**
224
     * Returns the cell Currency value from the given node.
225
     *
226
     * @param \DOMNode $node
227
     * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
228
     */
229 1
    protected function formatCurrencyCellValue($node)
230
    {
231 1
        $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
232 1
        $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
233
234 1
        return "$value $currency";
235
    }
236
237
    /**
238
     * Returns the cell Percentage value from the given node.
239
     *
240
     * @param \DOMNode $node
241
     * @return int|float The value associated with the cell
242
     */
243 1
    protected function formatPercentageCellValue($node)
244
    {
245
        // percentages are formatted like floats
246 1
        return $this->formatFloatCellValue($node);
247
    }
248
}
249