Completed
Push — master ( 40b4a5...ee5dee )
by Adrien
03:43
created

RowIterator::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 13
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 10
cts 10
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 9
nc 1
nop 2
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
use Box\Spout\Reader\Common\XMLProcessor;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\ODS
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_TABLE = 'table:table';
22
    const XML_NODE_ROW = 'table:table-row';
23
    const XML_NODE_CELL = 'table:table-cell';
24
    const MAX_COLUMNS_EXCEL = 16384;
25
26
    /** Definition of XML attribute used to parse data */
27
    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
28
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
29
30
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
31
    protected $xmlReader;
32
33
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
34
    protected $xmlProcessor;
35
36
    /** @var bool Whether empty rows should be returned or skipped */
37
    protected $shouldPreserveEmptyRows;
38
39
    /** @var Helper\CellValueFormatter Helper to format cell values */
40
    protected $cellValueFormatter;
41
42
    /** @var bool Whether the iterator has already been rewound once */
43
    protected $hasAlreadyBeenRewound = false;
44
45
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
46
    protected $currentlyProcessedRowData = [];
47
48
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
49
    protected $rowDataBuffer = null;
50
51
    /** @var bool Indicates whether all rows have been read */
52
    protected $hasReachedEndOfFile = false;
53
54
    /** @var int Last row index processed (one-based) */
55
    protected $lastRowIndexProcessed = 0;
56
57
    /** @var int Row index to be processed next (one-based) */
58
    protected $nextRowIndexToBeProcessed = 1;
59
60
    /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
61
    protected $lastProcessedCellValue = null;
62
63
    /** @var int Number of times the last processed row should be repeated */
64
    protected $numRowsRepeated = 1;
65
66
    /** @var int Number of times the last cell value should be copied to the cells on its right */
67
    protected $numColumnsRepeated = 1;
68
69
    /** @var bool Whether at least one cell has been read for the row currently being processed */
70
    protected $hasAlreadyReadOneCellInCurrentRow = false;
71
72
73
    /**
74
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
75
     * @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options
76
     */
77 28
    public function __construct($xmlReader, $options)
78
    {
79 28
        $this->xmlReader = $xmlReader;
80 28
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
81 28
        $this->cellValueFormatter = new CellValueFormatter($options->shouldFormatDates());
82
83
        // Register all callbacks to process different nodes when reading the XML file
84 28
        $this->xmlProcessor = new XMLProcessor($this->xmlReader);
85 28
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
86 28
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
87 28
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
88 28
        $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
89 28
    }
90
91
    /**
92
     * Rewind the Iterator to the first element.
93
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
94
     * @link http://php.net/manual/en/iterator.rewind.php
95
     *
96
     * @return void
97
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
98
     */
99 26
    public function rewind()
100
    {
101
        // Because sheet and row data is located in the file, we can't rewind both the
102
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
103
        // Therefore, rewinding the row iterator has been disabled.
104 26
        if ($this->hasAlreadyBeenRewound) {
105 1
            throw new IteratorNotRewindableException();
106
        }
107
108 26
        $this->hasAlreadyBeenRewound = true;
109 26
        $this->lastRowIndexProcessed = 0;
110 26
        $this->nextRowIndexToBeProcessed = 1;
111 26
        $this->rowDataBuffer = null;
112 26
        $this->hasReachedEndOfFile = false;
113
114 26
        $this->next();
115 26
    }
116
117
    /**
118
     * Checks if current position is valid
119
     * @link http://php.net/manual/en/iterator.valid.php
120
     *
121
     * @return bool
122
     */
123 26
    public function valid()
124
    {
125 26
        return (!$this->hasReachedEndOfFile);
126
    }
127
128
    /**
129
     * Move forward to next element. Empty rows will be skipped.
130
     * @link http://php.net/manual/en/iterator.next.php
131
     *
132
     * @return void
133
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
134
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
135
     */
136 26
    public function next()
137
    {
138 26
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
139 26
            $this->readDataForNextRow();
140
        }
141
142 26
        $this->lastRowIndexProcessed++;
143 26
    }
144
145
    /**
146
     * Returns whether we need data for the next row to be processed.
147
     * We DO need to read data if:
148
     *   - we have not read any rows yet
149
     *      OR
150
     *   - the next row to be processed immediately follows the last read row
151
     *
152
     * @return bool Whether we need data for the next row to be processed.
153
     */
154 26
    protected function doesNeedDataForNextRowToBeProcessed()
155
    {
156 26
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
157
158
        return (
159 26
            !$hasReadAtLeastOneRow ||
160 26
            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
161
        );
162
    }
163
164
    /**
165
     * @return void
166
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
167
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
168
     */
169 26
    protected function readDataForNextRow()
170
    {
171 26
        $this->currentlyProcessedRowData = [];
172
173
        try {
174 26
            $this->xmlProcessor->readUntilStopped();
175
        } catch (XMLProcessingException $exception) {
176
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
177
        }
178
179 26
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
180 26
    }
181
182
    /**
183
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
184
     * @return int A return code that indicates what action should the processor take next
185
     */
186 26
    protected function processRowStartingNode($xmlReader)
187
    {
188
        // Reset data from current row
189 26
        $this->hasAlreadyReadOneCellInCurrentRow = false;
190 26
        $this->lastProcessedCellValue = null;
191 26
        $this->numColumnsRepeated = 1;
192 26
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
193
194 26
        return XMLProcessor::PROCESSING_CONTINUE;
195
    }
196
197
    /**
198
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
199
     * @return int A return code that indicates what action should the processor take next
200
     */
201 26
    protected function processCellStartingNode($xmlReader)
202
    {
203 26
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
204
205
        // NOTE: expand() will automatically decode all XML entities of the child nodes
206 26
        $node = $xmlReader->expand();
207 26
        $currentCellValue = $this->getCellValue($node);
208
209
        // process cell N only after having read cell N+1 (see below why)
210 26
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
211 21
            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
212 21
                $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
213
            }
214
        }
215
216 26
        $this->hasAlreadyReadOneCellInCurrentRow = true;
217 26
        $this->lastProcessedCellValue = $currentCellValue;
218 26
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
219
220 26
        return XMLProcessor::PROCESSING_CONTINUE;
221
    }
222
223
    /**
224
     * @return int A return code that indicates what action should the processor take next
225
     */
226 26
    protected function processRowEndingNode()
227
    {
228 26
        $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRowData, $this->lastProcessedCellValue);
229
230
        // if the fetched row is empty and we don't want to preserve it...
231 26
        if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
232
            // ... skip it
233 5
            return XMLProcessor::PROCESSING_CONTINUE;
234
        }
235
236
        // if the row is empty, we don't want to return more than one cell
237 25
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
238
239
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
240
        // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
241
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
242
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
243
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
244
        // always 16384 cells.
245 25
        if ((count($this->currentlyProcessedRowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
246 23
            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
247 23
                $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
248
            }
249
        }
250
251
        // If we are processing row N and the row is repeated M times,
252
        // then the next row to be processed will be row (N+M).
253 25
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
254
255
        // at this point, we have all the data we need for the row
256
        // so that we can populate the buffer
257 25
        return XMLProcessor::PROCESSING_STOP;
258
    }
259
260
    /**
261
     * @return int A return code that indicates what action should the processor take next
262
     */
263 24
    protected function processTableEndingNode()
264
    {
265
        // The closing "</table:table>" marks the end of the file
266 24
        $this->hasReachedEndOfFile = true;
267
268 24
        return XMLProcessor::PROCESSING_STOP;
269
    }
270
271
    /**
272
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
273
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
274
     */
275 26
    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
276
    {
277 26
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
278 26
        return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
279
    }
280
281
    /**
282
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
283
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
284
     */
285 26
    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
286
    {
287 26
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
288 26
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
289
    }
290
291
    /**
292
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
293
     *
294
     * @param \DOMNode $node
295
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
296
     */
297 26
    protected function getCellValue($node)
298
    {
299 26
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
300
    }
301
302
    /**
303
     * After finishing processing each cell, a row is considered empty if it contains
304
     * no cells or if the value of the last read cell is an empty string.
305
     * After finishing processing each cell, the last read cell is not part of the
306
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
307
     *
308
     * @param array $rowData
309
     * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
310
     * @return bool Whether the row is empty
311
     */
312 26
    protected function isEmptyRow($rowData, $lastReadCellValue)
313
    {
314
        return (
315 26
            count($rowData) === 0 &&
316 26
            (!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
317
        );
318
    }
319
320
    /**
321
     * Return the current element, from the buffer.
322
     * @link http://php.net/manual/en/iterator.current.php
323
     *
324
     * @return array|null
325
     */
326 25
    public function current()
327
    {
328 25
        return $this->rowDataBuffer;
329
    }
330
331
    /**
332
     * Return the key of the current element
333
     * @link http://php.net/manual/en/iterator.key.php
334
     *
335
     * @return int
336
     */
337 23
    public function key()
338
    {
339 23
        return $this->lastRowIndexProcessed;
340
    }
341
342
343
    /**
344
     * Cleans up what was created to iterate over the object.
345
     *
346
     * @return void
347
     */
348
    public function end()
349
    {
350
        $this->xmlReader->close();
351
    }
352
}
353