Passed
Push — develop_3.0 ( a665b9...78b663 )
by Adrien
04:18 queued 01:35
created

RowIterator   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 395
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 8

Test Coverage

Coverage 98.13%

Importance

Changes 0
Metric Value
wmc 33
lcom 1
cbo 8
dl 0
loc 395
ccs 105
cts 107
cp 0.9813
rs 9.3999
c 0
b 0
f 0

18 Methods

Rating   Name   Duplication   Size   Complexity  
B __construct() 0 26 1
A normalizeSheetDataXMLFilePath() 0 4 1
A rewind() 0 17 2
A valid() 0 4 1
A next() 0 8 2
A doesNeedDataForNextRowToBeProcessed() 0 10 3
A readDataForNextRow() 0 12 2
A processDimensionStartingNode() 0 10 2
A processRowStartingNode() 0 21 2
A processCellStartingNode() 0 13 1
A processRowEndingNode() 0 19 4
A processWorksheetEndingNode() 0 7 1
A getRowIndex() 0 9 2
A getColumnIndex() 0 9 2
A getCell() 0 6 1
A current() 0 18 3
A key() 0 9 2
A end() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Common\Entity\Cell;
7
use Box\Spout\Reader\Common\Entity\Row;
8
use Box\Spout\Reader\Common\XMLProcessor;
9
use Box\Spout\Reader\Exception\XMLProcessingException;
10
use Box\Spout\Reader\IteratorInterface;
11
use Box\Spout\Reader\Wrapper\XMLReader;
12
use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
13
use Box\Spout\Reader\XLSX\Helper\CellHelper;
14
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
15
use Box\Spout\Reader\XLSX\Manager\RowManager;
16
17
/**
18
 * Class RowIterator
19
 */
20
class RowIterator implements IteratorInterface
21
{
22
    /** Definition of XML nodes names used to parse data */
23
    const XML_NODE_DIMENSION = 'dimension';
24
    const XML_NODE_WORKSHEET = 'worksheet';
25
    const XML_NODE_ROW = 'row';
26
    const XML_NODE_CELL = 'c';
27
28
    /** Definition of XML attributes used to parse data */
29
    const XML_ATTRIBUTE_REF = 'ref';
30
    const XML_ATTRIBUTE_SPANS = 'spans';
31
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
32
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
33
34
    /** @var string Path of the XLSX file being read */
35
    protected $filePath;
36
37
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
38
    protected $sheetDataXMLFilePath;
39
40
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
41
    protected $xmlReader;
42
43
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
44
    protected $xmlProcessor;
45
46
    /** @var Helper\CellValueFormatter Helper to format cell values */
47
    protected $cellValueFormatter;
48
49
    /** @var \Box\Spout\Reader\XLSX\Manager\RowManager Manages rows */
50
    protected $rowManager;
51
52
    /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
53
    protected $entityFactory;
54
55
    /**
56
     * TODO: This variable can be deleted when row indices get preserved
57
     * @var int Number of read rows
58
     */
59
    protected $numReadRows = 0;
60
61
    /** @var Row Contains the row currently processed */
62
    protected $currentlyProcessedRow;
63
64
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
65
    protected $rowBuffer;
66
67
    /** @var bool Indicates whether all rows have been read */
68
    protected $hasReachedEndOfFile = false;
69
70
    /** @var int The number of columns the sheet has (0 meaning undefined) */
71
    protected $numColumns = 0;
72
73
    /** @var bool Whether empty rows should be returned or skipped */
74
    protected $shouldPreserveEmptyRows;
75
76
    /** @var int Last row index processed (one-based) */
77
    protected $lastRowIndexProcessed = 0;
78
79
    /** @var int Row index to be processed next (one-based) */
80
    protected $nextRowIndexToBeProcessed = 0;
81
82
    /** @var int Last column index processed (zero-based) */
83
    protected $lastColumnIndexProcessed = -1;
84
85
    /**
86
     * @param string $filePath Path of the XLSX file being read
87
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
88
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
89
     * @param XMLReader $xmlReader XML Reader
90
     * @param XMLProcessor $xmlProcessor Helper to process XML files
91
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
92
     * @param RowManager $rowManager Manages rows
93
     * @param InternalEntityFactory $entityFactory Factory to create entities
94
     */
95 38
    public function __construct(
96
        $filePath,
97
        $sheetDataXMLFilePath,
98
        $shouldPreserveEmptyRows,
99
        $xmlReader,
100
        XMLProcessor $xmlProcessor,
101
        CellValueFormatter $cellValueFormatter,
102
        RowManager $rowManager,
103
        InternalEntityFactory $entityFactory
104
    ) {
105 38
        $this->filePath = $filePath;
106 38
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
107 38
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
108 38
        $this->xmlReader = $xmlReader;
109 38
        $this->cellValueFormatter = $cellValueFormatter;
110 38
        $this->rowManager = $rowManager;
111 38
        $this->entityFactory = $entityFactory;
112
113
        // Register all callbacks to process different nodes when reading the XML file
114 38
        $this->xmlProcessor = $xmlProcessor;
115 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
116 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
117 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
118 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
119 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
120 38
    }
121
122
    /**
123
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
124
     * @return string Path of the XML file containing the sheet data,
125
     *                without the leading slash.
126
     */
127 38
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
128
    {
129 38
        return ltrim($sheetDataXMLFilePath, '/');
130
    }
131
132
    /**
133
     * Rewind the Iterator to the first element.
134
     * Initializes the XMLReader object that reads the associated sheet data.
135
     * The XMLReader is configured to be safe from billion laughs attack.
136
     * @see http://php.net/manual/en/iterator.rewind.php
137
     *
138
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
139
     * @return void
140
     */
141 36
    public function rewind()
142
    {
143 36
        $this->xmlReader->close();
144
145 36
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
146 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
147
        }
148
149 35
        $this->numReadRows = 0;
150 35
        $this->lastRowIndexProcessed = 0;
151 35
        $this->nextRowIndexToBeProcessed = 0;
152 35
        $this->rowBuffer = null;
153 35
        $this->hasReachedEndOfFile = false;
154 35
        $this->numColumns = 0;
155
156 35
        $this->next();
157 35
    }
158
159
    /**
160
     * Checks if current position is valid
161
     * @see http://php.net/manual/en/iterator.valid.php
162
     *
163
     * @return bool
164
     */
165 35
    public function valid()
166
    {
167 35
        return (!$this->hasReachedEndOfFile);
168
    }
169
170
    /**
171
     * Move forward to next element. Reads data describing the next unprocessed row.
172
     * @see http://php.net/manual/en/iterator.next.php
173
     *
174
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
175
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
176
     * @return void
177
     */
178 35
    public function next()
179
    {
180 35
        $this->nextRowIndexToBeProcessed++;
181
182 35
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
183 35
            $this->readDataForNextRow();
184
        }
185 35
    }
186
187
    /**
188
     * Returns whether we need data for the next row to be processed.
189
     * We don't need to read data if:
190
     *   we have already read at least one row
191
     *     AND
192
     *   we need to preserve empty rows
193
     *     AND
194
     *   the last row that was read is not the row that need to be processed
195
     *   (i.e. if we need to return empty rows)
196
     *
197
     * @return bool Whether we need data for the next row to be processed.
198
     */
199 35
    protected function doesNeedDataForNextRowToBeProcessed()
200
    {
201 35
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
202
203
        return (
204 35
            !$hasReadAtLeastOneRow ||
205 33
            !$this->shouldPreserveEmptyRows ||
206 35
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
207
        );
208
    }
209
210
    /**
211
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
212
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
213
     * @return void
214
     */
215 35
    protected function readDataForNextRow()
216
    {
217 35
        $this->currentlyProcessedRow = $this->entityFactory->createRow([]);
218
219
        try {
220 35
            $this->xmlProcessor->readUntilStopped();
221
        } catch (XMLProcessingException $exception) {
222
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
223
        }
224
225 35
        $this->rowBuffer = $this->currentlyProcessedRow;
226 35
    }
227
228
    /**
229
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
230
     * @return int A return code that indicates what action should the processor take next
231
     */
232 16
    protected function processDimensionStartingNode($xmlReader)
233
    {
234
        // Read dimensions of the sheet
235 16
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
236 16
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
237 13
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
238
        }
239
240 16
        return XMLProcessor::PROCESSING_CONTINUE;
241
    }
242
243
    /**
244
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
245
     * @return int A return code that indicates what action should the processor take next
246
     */
247 34
    protected function processRowStartingNode($xmlReader)
248
    {
249
        // Reset index of the last processed column
250 34
        $this->lastColumnIndexProcessed = -1;
251
252
        // Mark the last processed row as the one currently being read
253 34
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
254
255
        // Read spans info if present
256 34
        $numberOfColumnsForRow = $this->numColumns;
257 34
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
258 34
        if ($spans) {
259 11
            list(, $numberOfColumnsForRow) = explode(':', $spans);
260 11
            $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
261
        }
262
263 34
        $cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
264 34
        $this->currentlyProcessedRow->setCells($cells);
265
266 34
        return XMLProcessor::PROCESSING_CONTINUE;
267
    }
268
269
    /**
270
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
271
     * @return int A return code that indicates what action should the processor take next
272
     */
273 34
    protected function processCellStartingNode($xmlReader)
274
    {
275 34
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
276
277
        // NOTE: expand() will automatically decode all XML entities of the child nodes
278 34
        $node = $xmlReader->expand();
279 34
        $cell = $this->getCell($node);
280
281 34
        $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
282 34
        $this->lastColumnIndexProcessed = $currentColumnIndex;
283
284 34
        return XMLProcessor::PROCESSING_CONTINUE;
285
    }
286
287
    /**
288
     * @return int A return code that indicates what action should the processor take next
289
     */
290 34
    protected function processRowEndingNode()
291
    {
292
        // if the fetched row is empty and we don't want to preserve it..,
293 34
        if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
294
            // ... skip it
295 1
            return XMLProcessor::PROCESSING_CONTINUE;
296
        }
297
298 34
        $this->numReadRows++;
299
300
        // If needed, we fill the empty cells
301 34
        if ($this->numColumns === 0) {
302 21
            $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
303
        }
304
305
        // at this point, we have all the data we need for the row
306
        // so that we can populate the buffer
307 34
        return XMLProcessor::PROCESSING_STOP;
308
    }
309
310
    /**
311
     * @return int A return code that indicates what action should the processor take next
312
     */
313 34
    protected function processWorksheetEndingNode()
314
    {
315
        // The closing "</worksheet>" marks the end of the file
316 34
        $this->hasReachedEndOfFile = true;
317
318 34
        return XMLProcessor::PROCESSING_STOP;
319
    }
320
321
    /**
322
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
323
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
324
     * @return int Row index
325
     */
326 34
    protected function getRowIndex($xmlReader)
327
    {
328
        // Get "r" attribute if present (from something like <row r="3"...>
329 34
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
330
331 34
        return ($currentRowIndex !== null) ?
332 34
                (int) $currentRowIndex :
333 34
                $this->lastRowIndexProcessed + 1;
334
    }
335
336
    /**
337
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
338
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
339
     * @return int Column index
340
     */
341 34
    protected function getColumnIndex($xmlReader)
342
    {
343
        // Get "r" attribute if present (from something like <c r="A1"...>
344 34
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
345
346 34
        return ($currentCellIndex !== null) ?
347 33
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
348 34
                $this->lastColumnIndexProcessed + 1;
349
    }
350
351
    /**
352
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
353
     *
354
     * @param \DOMNode $node
355
     * @return Cell The cell set with the associated with the cell
356
     */
357 34
    protected function getCell($node)
358
    {
359 34
        $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
360
361 34
        return $this->entityFactory->createCell($cellValue);
362
    }
363
364
    /**
365
     * Return the current element, either an empty row or from the buffer.
366
     * @see http://php.net/manual/en/iterator.current.php
367
     *
368
     * @return Row|null
369
     */
370 34
    public function current()
371
    {
372 34
        $rowToBeProcessed = $this->rowBuffer;
373
374 34
        if ($this->shouldPreserveEmptyRows) {
375
            // when we need to preserve empty rows, we will either return
376
            // an empty row or the last row read. This depends whether the
377
            // index of last row that was read matches the index of the last
378
            // row whose value should be returned.
379 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
380
                // return empty row if mismatch between last processed row
381
                // and the row that needs to be returned
382 1
                $rowToBeProcessed = $this->entityFactory->createRow([]);
383
            }
384
        }
385
386 34
        return $rowToBeProcessed;
387
    }
388
389
    /**
390
     * Return the key of the current element. Here, the row index.
391
     * @see http://php.net/manual/en/iterator.key.php
392
     *
393
     * @return int
394
     */
395 33
    public function key()
396
    {
397
        // TODO: This should return $this->nextRowIndexToBeProcessed
398
        //       but to avoid a breaking change, the return value for
399
        //       this function has been kept as the number of rows read.
400 33
        return $this->shouldPreserveEmptyRows ?
401 1
                $this->nextRowIndexToBeProcessed :
402 33
                $this->numReadRows;
403
    }
404
405
    /**
406
     * Cleans up what was created to iterate over the object.
407
     *
408
     * @return void
409
     */
410 37
    public function end()
411
    {
412 37
        $this->xmlReader->close();
413 37
    }
414
}
415