RowIterator::processTableEndingNode()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Entity\Cell;
6
use Box\Spout\Common\Entity\Row;
7
use Box\Spout\Common\Exception\IOException;
8
use Box\Spout\Common\Manager\OptionsManagerInterface;
9
use Box\Spout\Reader\Common\Entity\Options;
10
use Box\Spout\Reader\Common\Manager\RowManager;
11
use Box\Spout\Reader\Common\XMLProcessor;
12
use Box\Spout\Reader\Exception\InvalidValueException;
13
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
14
use Box\Spout\Reader\Exception\XMLProcessingException;
15
use Box\Spout\Reader\IteratorInterface;
16
use Box\Spout\Reader\ODS\Creator\InternalEntityFactory;
17
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
18
use Box\Spout\Reader\Wrapper\XMLReader;
19
20
/**
21
 * Class RowIterator
22
 */
23
class RowIterator implements IteratorInterface
24
{
25
    /** Definition of XML nodes names used to parse data */
26
    const XML_NODE_TABLE = 'table:table';
27
    const XML_NODE_ROW = 'table:table-row';
28
    const XML_NODE_CELL = 'table:table-cell';
29
    const MAX_COLUMNS_EXCEL = 16384;
30
31
    /** Definition of XML attribute used to parse data */
32
    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
33
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
34
35
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
36
    protected $xmlReader;
37
38
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
39
    protected $xmlProcessor;
40
41
    /** @var bool Whether empty rows should be returned or skipped */
42
    protected $shouldPreserveEmptyRows;
43
44
    /** @var Helper\CellValueFormatter Helper to format cell values */
45
    protected $cellValueFormatter;
46
47
    /** @var RowManager Manages rows */
48
    protected $rowManager;
49
50
    /** @var InternalEntityFactory Factory to create entities */
51
    protected $entityFactory;
52
53
    /** @var bool Whether the iterator has already been rewound once */
54
    protected $hasAlreadyBeenRewound = false;
55
56
    /** @var Row The currently processed row */
57
    protected $currentlyProcessedRow;
58
59
    /** @var Row Buffer used to store the current row, while checking if there are more rows to read */
60
    protected $rowBuffer;
61
62
    /** @var bool Indicates whether all rows have been read */
63
    protected $hasReachedEndOfFile = false;
64
65
    /** @var int Last row index processed (one-based) */
66
    protected $lastRowIndexProcessed = 0;
67
68
    /** @var int Row index to be processed next (one-based) */
69
    protected $nextRowIndexToBeProcessed = 1;
70
71
    /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
72
    protected $lastProcessedCell;
73
74
    /** @var int Number of times the last processed row should be repeated */
75
    protected $numRowsRepeated = 1;
76
77
    /** @var int Number of times the last cell value should be copied to the cells on its right */
78
    protected $numColumnsRepeated = 1;
79
80
    /** @var bool Whether at least one cell has been read for the row currently being processed */
81
    protected $hasAlreadyReadOneCellInCurrentRow = false;
82
83
    /**
84
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
85
     * @param OptionsManagerInterface $optionsManager Reader's options manager
86
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
87
     * @param XMLProcessor $xmlProcessor Helper to process XML files
88
     * @param RowManager $rowManager Manages rows
89
     * @param InternalEntityFactory $entityFactory Factory to create entities
90
     */
91 29
    public function __construct(
92
        XMLReader $xmlReader,
93
        OptionsManagerInterface $optionsManager,
94
        CellValueFormatter $cellValueFormatter,
95
        XMLProcessor $xmlProcessor,
96
        RowManager $rowManager,
97
        InternalEntityFactory $entityFactory
98
    ) {
99 29
        $this->xmlReader = $xmlReader;
100 29
        $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
101 29
        $this->cellValueFormatter = $cellValueFormatter;
102 29
        $this->entityFactory = $entityFactory;
103 29
        $this->rowManager = $rowManager;
104
105
        // Register all callbacks to process different nodes when reading the XML file
106 29
        $this->xmlProcessor = $xmlProcessor;
107 29
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
108 29
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
109 29
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
110 29
        $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
111 29
    }
112
113
    /**
114
     * Rewind the Iterator to the first element.
115
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
116
     * @see http://php.net/manual/en/iterator.rewind.php
117
     *
118
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
119
     * @return void
120
     */
121 26
    public function rewind()
122
    {
123
        // Because sheet and row data is located in the file, we can't rewind both the
124
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
125
        // Therefore, rewinding the row iterator has been disabled.
126 26
        if ($this->hasAlreadyBeenRewound) {
127 1
            throw new IteratorNotRewindableException();
128
        }
129
130 26
        $this->hasAlreadyBeenRewound = true;
131 26
        $this->lastRowIndexProcessed = 0;
132 26
        $this->nextRowIndexToBeProcessed = 1;
133 26
        $this->rowBuffer = null;
134 26
        $this->hasReachedEndOfFile = false;
135
136 26
        $this->next();
137 26
    }
138
139
    /**
140
     * Checks if current position is valid
141
     * @see http://php.net/manual/en/iterator.valid.php
142
     *
143
     * @return bool
144
     */
145 26
    public function valid()
146
    {
147 26
        return (!$this->hasReachedEndOfFile);
148
    }
149
150
    /**
151
     * Move forward to next element. Empty rows will be skipped.
152
     * @see http://php.net/manual/en/iterator.next.php
153
     *
154
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
155
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
156
     * @return void
157
     */
158 26
    public function next()
159
    {
160 26
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
161 26
            $this->readDataForNextRow();
162
        }
163
164 26
        $this->lastRowIndexProcessed++;
165 26
    }
166
167
    /**
168
     * Returns whether we need data for the next row to be processed.
169
     * We DO need to read data if:
170
     *   - we have not read any rows yet
171
     *      OR
172
     *   - the next row to be processed immediately follows the last read row
173
     *
174
     * @return bool Whether we need data for the next row to be processed.
175
     */
176 26
    protected function doesNeedDataForNextRowToBeProcessed()
177
    {
178 26
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
179
180
        return (
181 26
            !$hasReadAtLeastOneRow ||
182 26
            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
183
        );
184
    }
185
186
    /**
187
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
188
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
189
     * @return void
190
     */
191 26
    protected function readDataForNextRow()
192
    {
193 26
        $this->currentlyProcessedRow = $this->entityFactory->createRow();
194
195
        try {
196 26
            $this->xmlProcessor->readUntilStopped();
197
        } catch (XMLProcessingException $exception) {
198
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
199
        }
200
201 26
        $this->rowBuffer = $this->currentlyProcessedRow;
202 26
    }
203
204
    /**
205
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
206
     * @return int A return code that indicates what action should the processor take next
207
     */
208 26
    protected function processRowStartingNode($xmlReader)
209
    {
210
        // Reset data from current row
211 26
        $this->hasAlreadyReadOneCellInCurrentRow = false;
212 26
        $this->lastProcessedCell = null;
213 26
        $this->numColumnsRepeated = 1;
214 26
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
215
216 26
        return XMLProcessor::PROCESSING_CONTINUE;
217
    }
218
219
    /**
220
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
221
     * @return int A return code that indicates what action should the processor take next
222
     */
223 26
    protected function processCellStartingNode($xmlReader)
224
    {
225 26
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
226
227
        // NOTE: expand() will automatically decode all XML entities of the child nodes
228 26
        $node = $xmlReader->expand();
229 26
        $currentCell = $this->getCell($node);
230
231
        // process cell N only after having read cell N+1 (see below why)
232 26
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
233 20
            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
234 20
                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
235
            }
236
        }
237
238 26
        $this->hasAlreadyReadOneCellInCurrentRow = true;
239 26
        $this->lastProcessedCell = $currentCell;
240 26
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
241
242 26
        return XMLProcessor::PROCESSING_CONTINUE;
243
    }
244
245
    /**
246
     * @return int A return code that indicates what action should the processor take next
247
     */
248 26
    protected function processRowEndingNode()
249
    {
250 26
        $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
251
252
        // if the fetched row is empty and we don't want to preserve it...
253 26
        if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
254
            // ... skip it
255 5
            return XMLProcessor::PROCESSING_CONTINUE;
256
        }
257
258
        // if the row is empty, we don't want to return more than one cell
259 25
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
260 25
        $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells();
261
262
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
263
        // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData".
264
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
265
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
266
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
267
        // always 16384 cells.
268 25
        if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
269 23
            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
270 23
                $this->currentlyProcessedRow->addCell($this->lastProcessedCell);
271
            }
272
        }
273
274
        // If we are processing row N and the row is repeated M times,
275
        // then the next row to be processed will be row (N+M).
276 25
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
277
278
        // at this point, we have all the data we need for the row
279
        // so that we can populate the buffer
280 25
        return XMLProcessor::PROCESSING_STOP;
281
    }
282
283
    /**
284
     * @return int A return code that indicates what action should the processor take next
285
     */
286 24
    protected function processTableEndingNode()
287
    {
288
        // The closing "</table:table>" marks the end of the file
289 24
        $this->hasReachedEndOfFile = true;
290
291 24
        return XMLProcessor::PROCESSING_STOP;
292
    }
293
294
    /**
295
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
296
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
297
     */
298 26
    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
299
    {
300 26
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
301
302 26
        return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1;
303
    }
304
305
    /**
306
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
307
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
308
     */
309 26
    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
310
    {
311 26
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
312
313 26
        return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1;
314
    }
315
316
    /**
317
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
318
     *
319
     * @param \DOMNode $node
320
     * @return Cell The cell set with the associated with the cell
321
     */
322 26
    protected function getCell($node)
323
    {
324
        try {
325 26
            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
326 25
            $cell = $this->entityFactory->createCell($cellValue);
327 1
        } catch (InvalidValueException $exception) {
328 1
            $cell = $this->entityFactory->createCell($exception->getInvalidValue());
329 1
            $cell->setType(Cell::TYPE_ERROR);
330
        }
331
332 26
        return $cell;
333
    }
334
335
    /**
336
     * After finishing processing each cell, a row is considered empty if it contains
337
     * no cells or if the last read cell is empty.
338
     * After finishing processing each cell, the last read cell is not part of the
339
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
340
     *
341
     * @param Row $currentRow
342
     * @param Cell $lastReadCell The last read cell
343
     * @return bool Whether the row is empty
344
     */
345 26
    protected function isEmptyRow($currentRow, $lastReadCell)
346
    {
347
        return (
348 26
            $this->rowManager->isEmpty($currentRow) &&
349 26
            (!isset($lastReadCell) || $lastReadCell->isEmpty())
350
        );
351
    }
352
353
    /**
354
     * Return the current element, from the buffer.
355
     * @see http://php.net/manual/en/iterator.current.php
356
     *
357
     * @return Row
358
     */
359 25
    public function current()
360
    {
361 25
        return $this->rowBuffer;
362
    }
363
364
    /**
365
     * Return the key of the current element
366
     * @see http://php.net/manual/en/iterator.key.php
367
     *
368
     * @return int
369
     */
370 23
    public function key()
371
    {
372 23
        return $this->lastRowIndexProcessed;
373
    }
374
375
    /**
376
     * Cleans up what was created to iterate over the object.
377
     *
378
     * @return void
379
     */
380
    public function end()
381
    {
382
        $this->xmlReader->close();
383
    }
384
}
385