Completed
Push — master ( 73d5d0...a19231 )
by Adrien
02:53
created

RowIterator::processRowEndingNode()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 33
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 33
ccs 12
cts 12
cp 1
rs 8.439
c 0
b 0
f 0
cc 6
eloc 10
nc 5
nop 0
crap 6
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
use Box\Spout\Reader\Common\XMLProcessor;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\ODS
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_TABLE = 'table:table';
22
    const XML_NODE_ROW = 'table:table-row';
23
    const XML_NODE_CELL = 'table:table-cell';
24
    const MAX_COLUMNS_EXCEL = 16384;
25
26
    /** Definition of XML attribute used to parse data */
27
    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
28
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
29
30
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
31
    protected $xmlReader;
32
33
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
34
    protected $xmlProcessor;
35
36
    /** @var bool Whether empty rows should be returned or skipped */
37
    protected $shouldPreserveEmptyRows;
38
39
    /** @var Helper\CellValueFormatter Helper to format cell values */
40
    protected $cellValueFormatter;
41
42
    /** @var bool Whether the iterator has already been rewound once */
43
    protected $hasAlreadyBeenRewound = false;
44
45
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
46
    protected $currentlyProcessedRowData = [];
47
48
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
49
    protected $rowDataBuffer = null;
50
51
    /** @var bool Indicates whether all rows have been read */
52
    protected $hasReachedEndOfFile = false;
53
54
    /** @var int Last row index processed (one-based) */
55
    protected $lastRowIndexProcessed = 0;
56
57
    /** @var int Row index to be processed next (one-based) */
58
    protected $nextRowIndexToBeProcessed = 1;
59
60
    /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
61
    protected $lastProcessedCellValue = null;
62
63
    /** @var int Number of times the last processed row should be repeated */
64
    protected $numRowsRepeated = 1;
65
66
    /** @var int Number of times the last cell value should be copied to the cells on its right */
67
    protected $numColumnsRepeated = 1;
68
69
    /** @var bool Whether at least one cell has been read for the row currently being processed */
70
    protected $hasAlreadyReadOneCellInCurrentRow = false;
71
72
73
    /**
74
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
75
     * @param \Box\Spout\Reader\ODS\ReaderOptions $options Reader's current options
76
     */
77 78
    public function __construct($xmlReader, $options)
78
    {
79 78
        $this->xmlReader = $xmlReader;
80 78
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
81 78
        $this->cellValueFormatter = new CellValueFormatter($options->shouldFormatDates());
82
83
        // Register all callbacks to process different nodes when reading the XML file
84 78
        $this->xmlProcessor = new XMLProcessor($this->xmlReader);
85 78
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
86 78
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
87 78
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
88 78
        $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
89 78
    }
90
91
    /**
92
     * Rewind the Iterator to the first element.
93
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
94
     * @link http://php.net/manual/en/iterator.rewind.php
95
     *
96
     * @return void
97
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
98
     */
99 78
    public function rewind()
100
    {
101
        // Because sheet and row data is located in the file, we can't rewind both the
102
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
103
        // Therefore, rewinding the row iterator has been disabled.
104 78
        if ($this->hasAlreadyBeenRewound) {
105 3
            throw new IteratorNotRewindableException();
106
        }
107
108 78
        $this->hasAlreadyBeenRewound = true;
109 78
        $this->lastRowIndexProcessed = 0;
110 78
        $this->nextRowIndexToBeProcessed = 1;
111 78
        $this->rowDataBuffer = null;
112 78
        $this->hasReachedEndOfFile = false;
113
114 78
        $this->next();
115 78
    }
116
117
    /**
118
     * Checks if current position is valid
119
     * @link http://php.net/manual/en/iterator.valid.php
120
     *
121
     * @return bool
122
     */
123 78
    public function valid()
124
    {
125 78
        return (!$this->hasReachedEndOfFile);
126
    }
127
128
    /**
129
     * Move forward to next element. Empty rows will be skipped.
130
     * @link http://php.net/manual/en/iterator.next.php
131
     *
132
     * @return void
133
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
134
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
135
     */
136 78
    public function next()
137
    {
138 78
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
139 78
            $this->readDataForNextRow();
140 78
        }
141
142 78
        $this->lastRowIndexProcessed++;
143 78
    }
144
145
    /**
146
     * Returns whether we need data for the next row to be processed.
147
     * We DO need to read data if:
148
     *   - we have not read any rows yet
149
     *      OR
150
     *   - the next row to be processed immediately follows the last read row
151
     *
152
     * @return bool Whether we need data for the next row to be processed.
153
     */
154 78
    protected function doesNeedDataForNextRowToBeProcessed()
155
    {
156 78
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
157
158
        return (
159 78
            !$hasReadAtLeastOneRow ||
160 69
            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
161 78
        );
162
    }
163
164
    /**
165
     * @return void
166
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
167
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
168
     */
169 78
    protected function readDataForNextRow()
170
    {
171 78
        $this->currentlyProcessedRowData = [];
172
173
        try {
174 78
            $this->xmlProcessor->readUntilStopped();
175 78
        } catch (XMLProcessingException $exception) {
176
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
177
        }
178
179 78
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
180 78
    }
181
182
    /**
183
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
184
     * @return int A return code that indicates what action should the processor take next
185
     */
186 78
    protected function processRowStartingNode($xmlReader)
187
    {
188
        // Reset data from current row
189 78
        $this->hasAlreadyReadOneCellInCurrentRow = false;
190 78
        $this->lastProcessedCellValue = null;
191 78
        $this->numColumnsRepeated = 1;
192 78
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
193
194 78
        return XMLProcessor::PROCESSING_CONTINUE;
195
    }
196
197
    /**
198
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
199
     * @return int A return code that indicates what action should the processor take next
200
     */
201 78
    protected function processCellStartingNode($xmlReader)
202
    {
203 78
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
204
205 78
        $node = $xmlReader->expand();
206 78
        $currentCellValue = $this->getCellValue($node);
207
208
        // process cell N only after having read cell N+1 (see below why)
209 78
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
210 63
            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
211 63
                $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
212 63
            }
213 63
        }
214
215 78
        $this->hasAlreadyReadOneCellInCurrentRow = true;
216 78
        $this->lastProcessedCellValue = $currentCellValue;
217 78
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
218
219 78
        return XMLProcessor::PROCESSING_CONTINUE;
220
    }
221
222
    /**
223
     * @return int A return code that indicates what action should the processor take next
224
     */
225 78
    protected function processRowEndingNode()
226
    {
227 78
        $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRowData, $this->lastProcessedCellValue);
228
229
        // if the fetched row is empty and we don't want to preserve it...
230 78
        if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
231
            // ... skip it
232 15
            return XMLProcessor::PROCESSING_CONTINUE;
233
        }
234
235
        // if the row is empty, we don't want to return more than one cell
236 75
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
237
238
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
239
        // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
240
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
241
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
242
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
243
        // always 16384 cells.
244 75
        if ((count($this->currentlyProcessedRowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
245 69
            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
246 69
                $this->currentlyProcessedRowData[] = $this->lastProcessedCellValue;
247 69
            }
248 69
        }
249
250
        // If we are processing row N and the row is repeated M times,
251
        // then the next row to be processed will be row (N+M).
252 75
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
253
254
        // at this point, we have all the data we need for the row
255
        // so that we can populate the buffer
256 75
        return XMLProcessor::PROCESSING_STOP;
257
    }
258
259
    /**
260
     * @return int A return code that indicates what action should the processor take next
261
     */
262 72
    protected function processTableEndingNode()
263
    {
264
        // The closing "</table:table>" marks the end of the file
265 72
        $this->hasReachedEndOfFile = true;
266
267 72
        return XMLProcessor::PROCESSING_STOP;
268
    }
269
270
    /**
271
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
272
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
273
     */
274 78
    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
275
    {
276 78
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
277 78
        return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
278
    }
279
280
    /**
281
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
282
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
283
     */
284 78
    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
285
    {
286 78
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
287 78
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
288
    }
289
290
    /**
291
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
292
     *
293
     * @param \DOMNode $node
294
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
295
     */
296 78
    protected function getCellValue($node)
297
    {
298 78
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
299
    }
300
301
    /**
302
     * After finishing processing each cell, a row is considered empty if it contains
303
     * no cells or if the value of the last read cell is an empty string.
304
     * After finishing processing each cell, the last read cell is not part of the
305
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
306
     *
307
     * @param array $rowData
308
     * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
309
     * @return bool Whether the row is empty
310
     */
311 78
    protected function isEmptyRow($rowData, $lastReadCellValue)
312
    {
313
        return (
314 78
            count($rowData) === 0 &&
315 33
            (!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
316 78
        );
317
    }
318
319
    /**
320
     * Return the current element, from the buffer.
321
     * @link http://php.net/manual/en/iterator.current.php
322
     *
323
     * @return array|null
324
     */
325 75
    public function current()
326
    {
327 75
        return $this->rowDataBuffer;
328
    }
329
330
    /**
331
     * Return the key of the current element
332
     * @link http://php.net/manual/en/iterator.key.php
333
     *
334
     * @return int
335
     */
336 69
    public function key()
337
    {
338 69
        return $this->lastRowIndexProcessed;
339
    }
340
341
342
    /**
343
     * Cleans up what was created to iterate over the object.
344
     *
345
     * @return void
346
     */
347
    public function end()
348
    {
349
        $this->xmlReader->close();
350
    }
351
}
352