Completed
Push — master ( 5ef564...2fafb6 )
by Adrien
02:55
created

RowIterator::rewind()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 17
ccs 10
cts 10
cp 1
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 9
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
12
/**
13
 * Class RowIterator
14
 *
15
 * @package Box\Spout\Reader\ODS
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /** Definition of XML nodes names used to parse data */
20
    const XML_NODE_TABLE = 'table:table';
21
    const XML_NODE_ROW = 'table:table-row';
22
    const XML_NODE_CELL = 'table:table-cell';
23
    const MAX_COLUMNS_EXCEL = 16384;
24
25
    /** Definition of XML attribute used to parse data */
26
    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
27
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
28
29
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
30
    protected $xmlReader;
31
32
    /** @var bool Whether empty rows should be returned or skipped */
33
    protected $shouldPreserveEmptyRows;
34
35
    /** @var Helper\CellValueFormatter Helper to format cell values */
36
    protected $cellValueFormatter;
37
38
    /** @var bool Whether the iterator has already been rewound once */
39
    protected $hasAlreadyBeenRewound = false;
40
41
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
42
    protected $rowDataBuffer = null;
43
44
    /** @var bool Indicates whether all rows have been read */
45
    protected $hasReachedEndOfFile = false;
46
47
    /** @var int Last row index processed (one-based) */
48
    protected $lastRowIndexProcessed = 0;
49
50
    /** @var int Row index to be processed next (one-based) */
51
    protected $nextRowIndexToBeProcessed = 1;
52
53
    /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
54
    protected $lastProcessedCellValue = null;
55
56
    /** @var int Number of times the last processed row should be repeated */
57
    protected $numRowsRepeated = 1;
58
59
    /** @var int Number of times the last cell value should be copied to the cells on its right */
60
    protected $numColumnsRepeated = 1;
61
62
    /** @var bool Whether at least one cell has been read for the row currently being processed */
63
    protected $hasAlreadyReadOneCellInCurrentRow = false;
64
65
66
    /**
67
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
68
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
69
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
70
     */
71 78
    public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows)
72
    {
73 78
        $this->xmlReader = $xmlReader;
74 78
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
75 78
        $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
76 78
    }
77
78
    /**
79
     * Rewind the Iterator to the first element.
80
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
81
     * @link http://php.net/manual/en/iterator.rewind.php
82
     *
83
     * @return void
84
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
85
     */
86 78
    public function rewind()
87
    {
88
        // Because sheet and row data is located in the file, we can't rewind both the
89
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
90
        // Therefore, rewinding the row iterator has been disabled.
91 78
        if ($this->hasAlreadyBeenRewound) {
92 3
            throw new IteratorNotRewindableException();
93
        }
94
95 78
        $this->hasAlreadyBeenRewound = true;
96 78
        $this->lastRowIndexProcessed = 0;
97 78
        $this->nextRowIndexToBeProcessed = 1;
98 78
        $this->rowDataBuffer = null;
99 78
        $this->hasReachedEndOfFile = false;
100
101 78
        $this->next();
102 78
    }
103
104
    /**
105
     * Checks if current position is valid
106
     * @link http://php.net/manual/en/iterator.valid.php
107
     *
108
     * @return bool
109
     */
110 78
    public function valid()
111
    {
112 78
        return (!$this->hasReachedEndOfFile);
113
    }
114
115
    /**
116
     * Move forward to next element. Empty rows will be skipped.
117
     * @link http://php.net/manual/en/iterator.next.php
118
     *
119
     * @return void
120
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
121
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
122
     */
123 78
    public function next()
124
    {
125 78
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
126 78
            $this->readDataForNextRow($this->xmlReader);
127 78
        }
128
129 78
        $this->lastRowIndexProcessed++;
130 78
    }
131
132
    /**
133
     * Returns whether we need data for the next row to be processed.
134
     * We DO need to read data if:
135
     *   - we have not read any rows yet
136
     *      OR
137
     *   - the next row to be processed immediately follows the last read row
138
     *
139
     * @return bool Whether we need data for the next row to be processed.
140
     */
141 78
    protected function doesNeedDataForNextRowToBeProcessed()
142
    {
143 78
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
144
145
        return (
146 78
            !$hasReadAtLeastOneRow ||
147 69
            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
148 78
        );
149
    }
150
151
    /**
152
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
153
     * @return void
154
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
155
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
156
     */
157 78
    protected function readDataForNextRow($xmlReader)
158
    {
159 78
        $rowData = [];
160
161
        try {
162 78
            while ($xmlReader->read()) {
163 78
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
164 78
                    $this->processRowStartingNode($xmlReader);
165
166 78
                } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
167 78
                    $rowData = $this->processCellStartingNode($xmlReader, $rowData);
168
169 78
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
170 78
                    $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue);
171
172
                    // if the fetched row is empty and we don't want to preserve it...
173 78
                    if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
174
                        // ... skip it
175 15
                        continue;
176
                    }
177
178 75
                    $rowData = $this->processRowEndingNode($rowData, $isEmptyRow);
179
180
                    // at this point, we have all the data we need for the row
181
                    // so that we can populate the buffer
182 75
                    break;
183
184 78
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
185 72
                    $this->processTableEndingNode();
186 72
                    break;
187
                }
188 78
            }
189
190 78
        } catch (XMLProcessingException $exception) {
191
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
192
        }
193
194 78
        $this->rowDataBuffer = $rowData;
195 78
    }
196
197
    /**
198
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
199
     * @return void
200
     */
201 78
    protected function processRowStartingNode($xmlReader)
202
    {
203
        // Reset data from current row
204 78
        $this->hasAlreadyReadOneCellInCurrentRow = false;
205 78
        $this->lastProcessedCellValue = null;
206 78
        $this->numColumnsRepeated = 1;
207 78
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
208 78
    }
209
210
    /**
211
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
212
     * @param array $rowData Data of all cells read so far
213
     * @return array Original row data + data for the cell that was just read
214
     */
215 78
    protected function processCellStartingNode($xmlReader, $rowData)
216
    {
217 78
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
218
219 78
        $node = $xmlReader->expand();
220 78
        $currentCellValue = $this->getCellValue($node);
221
222
        // process cell N only after having read cell N+1 (see below why)
223 78
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
224 63
            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
225 63
                $rowData[] = $this->lastProcessedCellValue;
226 63
            }
227 63
        }
228
229 78
        $this->hasAlreadyReadOneCellInCurrentRow = true;
230 78
        $this->lastProcessedCellValue = $currentCellValue;
231 78
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
232
233 78
        return $rowData;
234
    }
235
236
    /**
237
     * @param array $rowData Data of all cells read so far
238
     * @param bool $isEmptyRow Whether the given row is empty
239
     * @return array
240
     */
241 75
    protected function processRowEndingNode($rowData, $isEmptyRow)
242
    {
243
        // if the row is empty, we don't want to return more than one cell
244 75
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
245
246
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
247
        // The current count of read columns is determined by counting the values in $rowData.
248
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
249
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
250
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
251
        // always 16384 cells.
252 75
        if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
253 69
            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
254 69
                $rowData[] = $this->lastProcessedCellValue;
255 69
            }
256 69
        }
257
258
        // If we are processing row N and the row is repeated M times,
259
        // then the next row to be processed will be row (N+M).
260 75
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
261
262 75
        return $rowData;
263
    }
264
265
    /**
266
     * @return void
267
     */
268 72
    protected function processTableEndingNode()
269
    {
270
        // The closing "</table:table>" marks the end of the file
271 72
        $this->hasReachedEndOfFile = true;
272 72
    }
273
274
    /**
275
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
276
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
277
     */
278 78
    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
279
    {
280 78
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
281 78
        return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
282
    }
283
284
    /**
285
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
286
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
287
     */
288 78
    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
289
    {
290 78
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
291 78
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
292
    }
293
294
    /**
295
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
296
     *
297
     * @param \DOMNode $node
298
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
299
     */
300 78
    protected function getCellValue($node)
301
    {
302 78
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
303
    }
304
305
    /**
306
     * After finishing processing each cell, a row is considered empty if it contains
307
     * no cells or if the value of the last read cell is an empty string.
308
     * After finishing processing each cell, the last read cell is not part of the
309
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
310
     *
311
     * @param array $rowData
312
     * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
313
     * @return bool Whether the row is empty
314
     */
315 78
    protected function isEmptyRow($rowData, $lastReadCellValue)
316
    {
317
        return (
318 78
            count($rowData) === 0 &&
319 33
            (!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
320 78
        );
321
    }
322
323
    /**
324
     * Return the current element, from the buffer.
325
     * @link http://php.net/manual/en/iterator.current.php
326
     *
327
     * @return array|null
328
     */
329 75
    public function current()
330
    {
331 75
        return $this->rowDataBuffer;
332
    }
333
334
    /**
335
     * Return the key of the current element
336
     * @link http://php.net/manual/en/iterator.key.php
337
     *
338
     * @return int
339
     */
340 69
    public function key()
341
    {
342 69
        return $this->lastRowIndexProcessed;
343
    }
344
345
346
    /**
347
     * Cleans up what was created to iterate over the object.
348
     *
349
     * @return void
350
     */
351
    public function end()
352
    {
353
        $this->xmlReader->close();
354
    }
355
}
356