RowIterator   A
last analyzed

Complexity

Total Complexity 34

Size/Duplication

Total Lines 401
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 10

Test Coverage

Coverage 98.2%

Importance

Changes 0
Metric Value
wmc 34
lcom 1
cbo 10
dl 0
loc 401
ccs 109
cts 111
cp 0.982
rs 9.68
c 0
b 0
f 0

18 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 26 1
A normalizeSheetDataXMLFilePath() 0 4 1
A rewind() 0 17 2
A valid() 0 4 1
A next() 0 8 2
A doesNeedDataForNextRowToBeProcessed() 0 10 3
A readDataForNextRow() 0 12 2
A processDimensionStartingNode() 0 10 2
A processRowStartingNode() 0 21 2
A processCellStartingNode() 0 13 1
A processRowEndingNode() 0 19 4
A processWorksheetEndingNode() 0 7 1
A getRowIndex() 0 9 2
A getColumnIndex() 0 9 2
A getCell() 0 12 2
A current() 0 18 3
A key() 0 9 2
A end() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Entity\Cell;
6
use Box\Spout\Common\Entity\Row;
7
use Box\Spout\Common\Exception\IOException;
8
use Box\Spout\Reader\Common\Manager\RowManager;
9
use Box\Spout\Reader\Common\XMLProcessor;
10
use Box\Spout\Reader\Exception\InvalidValueException;
11
use Box\Spout\Reader\Exception\XMLProcessingException;
12
use Box\Spout\Reader\IteratorInterface;
13
use Box\Spout\Reader\Wrapper\XMLReader;
14
use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
15
use Box\Spout\Reader\XLSX\Helper\CellHelper;
16
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
17
18
/**
19
 * Class RowIterator
20
 */
21
class RowIterator implements IteratorInterface
22
{
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_DIMENSION = 'dimension';
25
    const XML_NODE_WORKSHEET = 'worksheet';
26
    const XML_NODE_ROW = 'row';
27
    const XML_NODE_CELL = 'c';
28
29
    /** Definition of XML attributes used to parse data */
30
    const XML_ATTRIBUTE_REF = 'ref';
31
    const XML_ATTRIBUTE_SPANS = 'spans';
32
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
33
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
34
35
    /** @var string Path of the XLSX file being read */
36
    protected $filePath;
37
38
    /** @var string Path of the sheet data XML file as in [Content_Types].xml */
39
    protected $sheetDataXMLFilePath;
40
41
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
42
    protected $xmlReader;
43
44
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
45
    protected $xmlProcessor;
46
47
    /** @var Helper\CellValueFormatter Helper to format cell values */
48
    protected $cellValueFormatter;
49
50
    /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */
51
    protected $rowManager;
52
53
    /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
54
    protected $entityFactory;
55
56
    /**
57
     * TODO: This variable can be deleted when row indices get preserved
58
     * @var int Number of read rows
59
     */
60
    protected $numReadRows = 0;
61
62
    /** @var Row Contains the row currently processed */
63
    protected $currentlyProcessedRow;
64
65
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
66
    protected $rowBuffer;
67
68
    /** @var bool Indicates whether all rows have been read */
69
    protected $hasReachedEndOfFile = false;
70
71
    /** @var int The number of columns the sheet has (0 meaning undefined) */
72
    protected $numColumns = 0;
73
74
    /** @var bool Whether empty rows should be returned or skipped */
75
    protected $shouldPreserveEmptyRows;
76
77
    /** @var int Last row index processed (one-based) */
78
    protected $lastRowIndexProcessed = 0;
79
80
    /** @var int Row index to be processed next (one-based) */
81
    protected $nextRowIndexToBeProcessed = 0;
82
83
    /** @var int Last column index processed (zero-based) */
84
    protected $lastColumnIndexProcessed = -1;
85
86
    /**
87
     * @param string $filePath Path of the XLSX file being read
88
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
89
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
90
     * @param XMLReader $xmlReader XML Reader
91
     * @param XMLProcessor $xmlProcessor Helper to process XML files
92
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
93
     * @param RowManager $rowManager Manages rows
94
     * @param InternalEntityFactory $entityFactory Factory to create entities
95
     */
96 41
    public function __construct(
97
        $filePath,
98
        $sheetDataXMLFilePath,
99
        $shouldPreserveEmptyRows,
100
        $xmlReader,
101
        XMLProcessor $xmlProcessor,
102
        CellValueFormatter $cellValueFormatter,
103
        RowManager $rowManager,
104
        InternalEntityFactory $entityFactory
105
    ) {
106 41
        $this->filePath = $filePath;
107 41
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
108 41
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
109 41
        $this->xmlReader = $xmlReader;
110 41
        $this->cellValueFormatter = $cellValueFormatter;
111 41
        $this->rowManager = $rowManager;
112 41
        $this->entityFactory = $entityFactory;
113
114
        // Register all callbacks to process different nodes when reading the XML file
115 41
        $this->xmlProcessor = $xmlProcessor;
116 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
117 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
118 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
119 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
120 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
121 41
    }
122
123
    /**
124
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
125
     * @return string Path of the XML file containing the sheet data,
126
     *                without the leading slash.
127
     */
128 41
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
129
    {
130 41
        return \ltrim($sheetDataXMLFilePath, '/');
131
    }
132
133
    /**
134
     * Rewind the Iterator to the first element.
135
     * Initializes the XMLReader object that reads the associated sheet data.
136
     * The XMLReader is configured to be safe from billion laughs attack.
137
     * @see http://php.net/manual/en/iterator.rewind.php
138
     *
139
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
140
     * @return void
141
     */
142 39
    public function rewind()
143
    {
144 39
        $this->xmlReader->close();
145
146 39
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
147 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
148
        }
149
150 38
        $this->numReadRows = 0;
151 38
        $this->lastRowIndexProcessed = 0;
152 38
        $this->nextRowIndexToBeProcessed = 0;
153 38
        $this->rowBuffer = null;
154 38
        $this->hasReachedEndOfFile = false;
155 38
        $this->numColumns = 0;
156
157 38
        $this->next();
158 38
    }
159
160
    /**
161
     * Checks if current position is valid
162
     * @see http://php.net/manual/en/iterator.valid.php
163
     *
164
     * @return bool
165
     */
166 38
    public function valid()
167
    {
168 38
        return (!$this->hasReachedEndOfFile);
169
    }
170
171
    /**
172
     * Move forward to next element. Reads data describing the next unprocessed row.
173
     * @see http://php.net/manual/en/iterator.next.php
174
     *
175
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
176
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
177
     * @return void
178
     */
179 38
    public function next()
180
    {
181 38
        $this->nextRowIndexToBeProcessed++;
182
183 38
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
184 38
            $this->readDataForNextRow();
185
        }
186 38
    }
187
188
    /**
189
     * Returns whether we need data for the next row to be processed.
190
     * We don't need to read data if:
191
     *   we have already read at least one row
192
     *     AND
193
     *   we need to preserve empty rows
194
     *     AND
195
     *   the last row that was read is not the row that need to be processed
196
     *   (i.e. if we need to return empty rows)
197
     *
198
     * @return bool Whether we need data for the next row to be processed.
199
     */
200 38
    protected function doesNeedDataForNextRowToBeProcessed()
201
    {
202 38
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
203
204
        return (
205 38
            !$hasReadAtLeastOneRow ||
206 36
            !$this->shouldPreserveEmptyRows ||
207 38
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
208
        );
209
    }
210
211
    /**
212
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
213
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
214
     * @return void
215
     */
216 38
    protected function readDataForNextRow()
217
    {
218 38
        $this->currentlyProcessedRow = $this->entityFactory->createRow();
219
220
        try {
221 38
            $this->xmlProcessor->readUntilStopped();
222
        } catch (XMLProcessingException $exception) {
223
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
224
        }
225
226 38
        $this->rowBuffer = $this->currentlyProcessedRow;
227 38
    }
228
229
    /**
230
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
231
     * @return int A return code that indicates what action should the processor take next
232
     */
233 17
    protected function processDimensionStartingNode($xmlReader)
234
    {
235
        // Read dimensions of the sheet
236 17
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
237 17
        if (\preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
238 14
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
239
        }
240
241 17
        return XMLProcessor::PROCESSING_CONTINUE;
242
    }
243
244
    /**
245
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
246
     * @return int A return code that indicates what action should the processor take next
247
     */
248 37
    protected function processRowStartingNode($xmlReader)
249
    {
250
        // Reset index of the last processed column
251 37
        $this->lastColumnIndexProcessed = -1;
252
253
        // Mark the last processed row as the one currently being read
254 37
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
255
256
        // Read spans info if present
257 37
        $numberOfColumnsForRow = $this->numColumns;
258 37
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
259 37
        if ($spans) {
260 12
            list(, $numberOfColumnsForRow) = \explode(':', $spans);
261 12
            $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
262
        }
263
264 37
        $cells = \array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
265 37
        $this->currentlyProcessedRow->setCells($cells);
266
267 37
        return XMLProcessor::PROCESSING_CONTINUE;
268
    }
269
270
    /**
271
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
272
     * @return int A return code that indicates what action should the processor take next
273
     */
274 37
    protected function processCellStartingNode($xmlReader)
275
    {
276 37
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
277
278
        // NOTE: expand() will automatically decode all XML entities of the child nodes
279 37
        $node = $xmlReader->expand();
280 37
        $cell = $this->getCell($node);
281
282 37
        $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
283 37
        $this->lastColumnIndexProcessed = $currentColumnIndex;
284
285 37
        return XMLProcessor::PROCESSING_CONTINUE;
286
    }
287
288
    /**
289
     * @return int A return code that indicates what action should the processor take next
290
     */
291 37
    protected function processRowEndingNode()
292
    {
293
        // if the fetched row is empty and we don't want to preserve it..,
294 37
        if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
295
            // ... skip it
296 2
            return XMLProcessor::PROCESSING_CONTINUE;
297
        }
298
299 37
        $this->numReadRows++;
300
301
        // If needed, we fill the empty cells
302 37
        if ($this->numColumns === 0) {
303 23
            $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
304
        }
305
306
        // at this point, we have all the data we need for the row
307
        // so that we can populate the buffer
308 37
        return XMLProcessor::PROCESSING_STOP;
309
    }
310
311
    /**
312
     * @return int A return code that indicates what action should the processor take next
313
     */
314 37
    protected function processWorksheetEndingNode()
315
    {
316
        // The closing "</worksheet>" marks the end of the file
317 37
        $this->hasReachedEndOfFile = true;
318
319 37
        return XMLProcessor::PROCESSING_STOP;
320
    }
321
322
    /**
323
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
324
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
325
     * @return int Row index
326
     */
327 37
    protected function getRowIndex($xmlReader)
328
    {
329
        // Get "r" attribute if present (from something like <row r="3"...>
330 37
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
331
332 37
        return ($currentRowIndex !== null) ?
333 37
                (int) $currentRowIndex :
334 37
                $this->lastRowIndexProcessed + 1;
335
    }
336
337
    /**
338
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
339
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
340
     * @return int Column index
341
     */
342 37
    protected function getColumnIndex($xmlReader)
343
    {
344
        // Get "r" attribute if present (from something like <c r="A1"...>
345 37
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
346
347 37
        return ($currentCellIndex !== null) ?
348 36
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
349 37
                $this->lastColumnIndexProcessed + 1;
350
    }
351
352
    /**
353
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
354
     *
355
     * @param \DOMNode $node
356
     * @return Cell The cell set with the associated with the cell
357
     */
358 37
    protected function getCell($node)
359
    {
360
        try {
361 37
            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
362 37
            $cell = $this->entityFactory->createCell($cellValue);
363 1
        } catch (InvalidValueException $exception) {
364 1
            $cell = $this->entityFactory->createCell($exception->getInvalidValue());
365 1
            $cell->setType(Cell::TYPE_ERROR);
366
        }
367
368 37
        return $cell;
369
    }
370
371
    /**
372
     * Return the current element, either an empty row or from the buffer.
373
     * @see http://php.net/manual/en/iterator.current.php
374
     *
375
     * @return Row|null
376
     */
377 37
    public function current()
378
    {
379 37
        $rowToBeProcessed = $this->rowBuffer;
380
381 37
        if ($this->shouldPreserveEmptyRows) {
382
            // when we need to preserve empty rows, we will either return
383
            // an empty row or the last row read. This depends whether the
384
            // index of last row that was read matches the index of the last
385
            // row whose value should be returned.
386 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
387
                // return empty row if mismatch between last processed row
388
                // and the row that needs to be returned
389 1
                $rowToBeProcessed = $this->entityFactory->createRow();
390
            }
391
        }
392
393 37
        return $rowToBeProcessed;
394
    }
395
396
    /**
397
     * Return the key of the current element. Here, the row index.
398
     * @see http://php.net/manual/en/iterator.key.php
399
     *
400
     * @return int
401
     */
402 36
    public function key()
403
    {
404
        // TODO: This should return $this->nextRowIndexToBeProcessed
405
        //       but to avoid a breaking change, the return value for
406
        //       this function has been kept as the number of rows read.
407 36
        return $this->shouldPreserveEmptyRows ?
408 1
                $this->nextRowIndexToBeProcessed :
409 36
                $this->numReadRows;
410
    }
411
412
    /**
413
     * Cleans up what was created to iterate over the object.
414
     *
415
     * @return void
416
     */
417 40
    public function end()
418
    {
419 40
        $this->xmlReader->close();
420 40
    }
421
}
422