Completed
Pull Request — master (#189)
by Hura
02:27
created

RowIterator::key()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 4
rs 10
ccs 2
cts 2
cp 1
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
12
/**
13
 * Class RowIterator
14
 *
15
 * @package Box\Spout\Reader\ODS
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /** Definition of XML nodes names used to parse data */
20
    const XML_NODE_TABLE = 'table:table';
21
    const XML_NODE_ROW = 'table:table-row';
22
    const XML_NODE_CELL = 'table:table-cell';
23
    const MAX_COLUMNS_EXCEL= 16384;
24
25
    /** Definition of XML attribute used to parse data */
26
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
27
28
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
29
    protected $xmlReader;
30
31
    /** @var Helper\CellValueFormatter Helper to format cell values */
32
    protected $cellValueFormatter;
33
34
    /** @var bool Whether the iterator has already been rewound once */
35
    protected $hasAlreadyBeenRewound = false;
36
37
    /** @var int Number of read rows */
38
    protected $numReadRows = 0;
39
40
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
41
    protected $rowDataBuffer = null;
42
43
    /** @var bool Indicates whether all rows have been read */
44
    protected $hasReachedEndOfFile = false;
45
46
    /**
47
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
48
     */
49 60
    public function __construct($xmlReader)
50
    {
51 60
        $this->xmlReader = $xmlReader;
52 60
        $this->cellValueFormatter = new CellValueFormatter();
53 60
    }
54
55
    /**
56
     * Rewind the Iterator to the first element.
57
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
58
     * @link http://php.net/manual/en/iterator.rewind.php
59
     *
60
     * @return void
61
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
62
     */
63 60
    public function rewind()
64
    {
65
        // Because sheet and row data is located in the file, we can't rewind both the
66
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
67
        // Therefore, rewinding the row iterator has been disabled.
68 60
        if ($this->hasAlreadyBeenRewound) {
69 3
            throw new IteratorNotRewindableException();
70
        }
71
72 60
        $this->hasAlreadyBeenRewound = true;
73 60
        $this->numReadRows = 0;
74 60
        $this->rowDataBuffer = null;
75 60
        $this->hasReachedEndOfFile = false;
76
77 60
        $this->next();
78 60
    }
79
80
    /**
81
     * Checks if current position is valid
82
     * @link http://php.net/manual/en/iterator.valid.php
83
     *
84
     * @return boolean
85
     */
86 60
    public function valid()
87
    {
88 60
        return (!$this->hasReachedEndOfFile);
89
    }
90
91
    /**
92
     * Move forward to next element. Empty rows will be skipped.
93
     * @link http://php.net/manual/en/iterator.next.php
94
     *
95
     * @return void
96
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
97
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
98
     */
99 60
    public function next()
100
    {
101 60
        $rowData = [];
102 60
        $cellValue = null;
103 60
        $numColumnsRepeated = 1;
104 60
        $numCellsRead = 0;
105 60
        $hasAlreadyReadOneCell = false;
106
107
        try {
108 60
            while ($this->xmlReader->read()) {
109 60
                if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
110
                    // Start of a cell description
111 60
                    $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
112
113 60
                    $node = $this->xmlReader->expand();
114 60
                    $currentCellValue = $this->getCellValue($node);
115
116
                    // process cell N only after having read cell N+1 (see below why)
117 60
                    if ($hasAlreadyReadOneCell) {
118 51
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
119 51
                            $rowData[] = $cellValue;
120 51
                        }
121 51
                    }
122
123 60
                    $cellValue = $currentCellValue;
124 60
                    $numColumnsRepeated = $currentNumColumnsRepeated;
125
126 60
                    $numCellsRead++;
127 60
                    $hasAlreadyReadOneCell = true;
128
129 60
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
130
                    // End of the row description
131 60
                    $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
132 60
                    if ($isEmptyRow) {
133
                        // skip empty rows
134 15
                        $this->next();
135 15
                        return;
136
                    }
137
138
                    // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
139
                    // The current count of read columns is determined my counting the the values in $rowData.
140
                    // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
141
                    // with a number-columns-repeated value equals to the number of (supported columns - used columns).
142
                    // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
143
                    // always 16384 cells.
144 57
                    if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
145 51
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
146 51
                            $rowData[] = $cellValue;
147 51
                        }
148 51
                        $this->numReadRows++;
149 51
                    }
150 57
                    break;
151
152 60
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
153
                    // The closing "</table:table>" marks the end of the file
154 54
                    $this->hasReachedEndOfFile = true;
155 54
                    break;
156
                }
157 60
            }
158
159 60
        } catch (XMLProcessingException $exception) {
160
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
161
        }
162
163 60
        $this->rowDataBuffer = $rowData;
164 60
    }
165
166
    /**
167
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
168
     */
169 60
    protected function getNumColumnsRepeatedForCurrentNode()
170
    {
171 60
        $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
172 60
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
173
    }
174
175
    /**
176
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
177
     *
178
     * @param \DOMNode $node
179
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
180
     */
181 60
    protected function getCellValue($node)
182
    {
183 60
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
184
    }
185
186
    /**
187
     * empty() replacement that honours 0 as a valid value
188
     *
189
     * @param $value The cell value
190
     * @return bool
191
     */
192 24
    protected function isEmptyCellValue($value)
193
    {
194 24
        return (!isset($value) || trim($value) === '');
195
    }
196
197
    /**
198
     * Return the current element, from the buffer.
199
     * @link http://php.net/manual/en/iterator.current.php
200
     *
201
     * @return array|null
202
     */
203 57
    public function current()
204
    {
205 57
        return $this->rowDataBuffer;
206
    }
207
208
    /**
209
     * Return the key of the current element
210
     * @link http://php.net/manual/en/iterator.key.php
211
     *
212
     * @return int
213
     */
214 51
    public function key()
215
    {
216 51
        return $this->numReadRows;
217
    }
218
219
220
    /**
221
     * Cleans up what was created to iterate over the object.
222
     *
223
     * @return void
224
     */
225
    public function end()
226
    {
227
        $this->xmlReader->close();
228
    }
229
}
230