RowIterator::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 26
ccs 15
cts 15
cp 1
rs 9.504
c 0
b 0
f 0
cc 1
nc 1
nop 8
crap 1

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Entity\Cell;
6
use Box\Spout\Common\Entity\Row;
7
use Box\Spout\Common\Exception\IOException;
8
use Box\Spout\Reader\Common\Manager\RowManager;
9
use Box\Spout\Reader\Common\XMLProcessor;
10
use Box\Spout\Reader\Exception\InvalidValueException;
11
use Box\Spout\Reader\Exception\XMLProcessingException;
12
use Box\Spout\Reader\IteratorInterface;
13
use Box\Spout\Reader\Wrapper\XMLReader;
14
use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
15
use Box\Spout\Reader\XLSX\Helper\CellHelper;
16
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
17
18
/**
19
 * Class RowIterator
20
 */
21
class RowIterator implements IteratorInterface
22
{
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_DIMENSION = 'dimension';
25
    const XML_NODE_WORKSHEET = 'worksheet';
26
    const XML_NODE_ROW = 'row';
27
    const XML_NODE_CELL = 'c';
28
29
    /** Definition of XML attributes used to parse data */
30
    const XML_ATTRIBUTE_REF = 'ref';
31
    const XML_ATTRIBUTE_SPANS = 'spans';
32
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
33
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
34
35
    /** @var string Path of the XLSX file being read */
36
    protected $filePath;
37
38
    /** @var string Path of the sheet data XML file as in [Content_Types].xml */
39
    protected $sheetDataXMLFilePath;
40
41
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
42
    protected $xmlReader;
43
44
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
45
    protected $xmlProcessor;
46
47
    /** @var Helper\CellValueFormatter Helper to format cell values */
48
    protected $cellValueFormatter;
49
50
    /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */
51
    protected $rowManager;
52
53
    /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
54
    protected $entityFactory;
55
56
    /**
57
     * TODO: This variable can be deleted when row indices get preserved
58
     * @var int Number of read rows
59
     */
60
    protected $numReadRows = 0;
61
62
    /** @var Row Contains the row currently processed */
63
    protected $currentlyProcessedRow;
64
65
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
66
    protected $rowBuffer;
67
68
    /** @var bool Indicates whether all rows have been read */
69
    protected $hasReachedEndOfFile = false;
70
71
    /** @var int The number of columns the sheet has (0 meaning undefined) */
72
    protected $numColumns = 0;
73
74
    /** @var bool Whether empty rows should be returned or skipped */
75
    protected $shouldPreserveEmptyRows;
76
77
    /** @var int Last row index processed (one-based) */
78
    protected $lastRowIndexProcessed = 0;
79
80
    /** @var int Row index to be processed next (one-based) */
81
    protected $nextRowIndexToBeProcessed = 0;
82
83
    /** @var int Last column index processed (zero-based) */
84
    protected $lastColumnIndexProcessed = -1;
85
86
    /**
87
     * @param string $filePath Path of the XLSX file being read
88
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
89
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
90
     * @param XMLReader $xmlReader XML Reader
91
     * @param XMLProcessor $xmlProcessor Helper to process XML files
92
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
93
     * @param RowManager $rowManager Manages rows
94
     * @param InternalEntityFactory $entityFactory Factory to create entities
95
     */
96 41
    public function __construct(
97
        $filePath,
98
        $sheetDataXMLFilePath,
99
        $shouldPreserveEmptyRows,
100
        $xmlReader,
101
        XMLProcessor $xmlProcessor,
102
        CellValueFormatter $cellValueFormatter,
103
        RowManager $rowManager,
104
        InternalEntityFactory $entityFactory
105
    ) {
106 41
        $this->filePath = $filePath;
107 41
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
108 41
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
109 41
        $this->xmlReader = $xmlReader;
110 41
        $this->cellValueFormatter = $cellValueFormatter;
111 41
        $this->rowManager = $rowManager;
112 41
        $this->entityFactory = $entityFactory;
113
114
        // Register all callbacks to process different nodes when reading the XML file
115 41
        $this->xmlProcessor = $xmlProcessor;
116 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
117 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
118 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
119 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
120 41
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
121 41
    }
122
123
    /**
124
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
125
     * @return string Path of the XML file containing the sheet data,
126
     *                without the leading slash.
127
     */
128 41
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
129
    {
130 41
        return \ltrim($sheetDataXMLFilePath, '/');
131
    }
132
133
    /**
134
     * Rewind the Iterator to the first element.
135
     * Initializes the XMLReader object that reads the associated sheet data.
136
     * The XMLReader is configured to be safe from billion laughs attack.
137
     * @see http://php.net/manual/en/iterator.rewind.php
138
     *
139
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
140
     * @return void
141
     */
142 39
    public function rewind()
143
    {
144 39
        $this->xmlReader->close();
145
146 39
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
147 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
148
        }
149
150 38
        $this->numReadRows = 0;
151 38
        $this->lastRowIndexProcessed = 0;
152 38
        $this->nextRowIndexToBeProcessed = 0;
153 38
        $this->rowBuffer = null;
154 38
        $this->hasReachedEndOfFile = false;
155 38
        $this->numColumns = 0;
156
157 38
        $this->next();
158 38
    }
159
160
    /**
161
     * Checks if current position is valid
162
     * @see http://php.net/manual/en/iterator.valid.php
163
     *
164
     * @return bool
165
     */
166 38
    public function valid()
167
    {
168 38
        return (!$this->hasReachedEndOfFile);
169
    }
170
171
    /**
172
     * Move forward to next element. Reads data describing the next unprocessed row.
173
     * @see http://php.net/manual/en/iterator.next.php
174
     *
175
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
176
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
177
     * @return void
178
     */
179 38
    public function next()
180
    {
181 38
        $this->nextRowIndexToBeProcessed++;
182
183 38
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
184 38
            $this->readDataForNextRow();
185
        }
186 38
    }
187
188
    /**
189
     * Returns whether we need data for the next row to be processed.
190
     * We don't need to read data if:
191
     *   we have already read at least one row
192
     *     AND
193
     *   we need to preserve empty rows
194
     *     AND
195
     *   the last row that was read is not the row that need to be processed
196
     *   (i.e. if we need to return empty rows)
197
     *
198
     * @return bool Whether we need data for the next row to be processed.
199
     */
200 38
    protected function doesNeedDataForNextRowToBeProcessed()
201
    {
202 38
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
203
204
        return (
205 38
            !$hasReadAtLeastOneRow ||
206 36
            !$this->shouldPreserveEmptyRows ||
207 38
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
208
        );
209
    }
210
211
    /**
212
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
213
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
214
     * @return void
215
     */
216 38
    protected function readDataForNextRow()
217
    {
218 38
        $this->currentlyProcessedRow = $this->entityFactory->createRow();
219
220
        try {
221 38
            $this->xmlProcessor->readUntilStopped();
222
        } catch (XMLProcessingException $exception) {
223
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
224
        }
225
226 38
        $this->rowBuffer = $this->currentlyProcessedRow;
227 38
    }
228
229
    /**
230
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
231
     * @return int A return code that indicates what action should the processor take next
232
     */
233 17
    protected function processDimensionStartingNode($xmlReader)
234
    {
235
        // Read dimensions of the sheet
236 17
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
237 17
        if (\preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
238 14
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
239
        }
240
241 17
        return XMLProcessor::PROCESSING_CONTINUE;
242
    }
243
244
    /**
245
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
246
     * @return int A return code that indicates what action should the processor take next
247
     */
248 37
    protected function processRowStartingNode($xmlReader)
249
    {
250
        // Reset index of the last processed column
251 37
        $this->lastColumnIndexProcessed = -1;
252
253
        // Mark the last processed row as the one currently being read
254 37
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
255
256
        // Read spans info if present
257 37
        $numberOfColumnsForRow = $this->numColumns;
258 37
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
259 37
        if ($spans) {
260 12
            list(, $numberOfColumnsForRow) = \explode(':', $spans);
261 12
            $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
262
        }
263
264 37
        $cells = \array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
265 37
        $this->currentlyProcessedRow->setCells($cells);
266
267 37
        return XMLProcessor::PROCESSING_CONTINUE;
268
    }
269
270
    /**
271
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
272
     * @return int A return code that indicates what action should the processor take next
273
     */
274 37
    protected function processCellStartingNode($xmlReader)
275
    {
276 37
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
277
278
        // NOTE: expand() will automatically decode all XML entities of the child nodes
279 37
        $node = $xmlReader->expand();
280 37
        $cell = $this->getCell($node);
281
282 37
        $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
283 37
        $this->lastColumnIndexProcessed = $currentColumnIndex;
284
285 37
        return XMLProcessor::PROCESSING_CONTINUE;
286
    }
287
288
    /**
289
     * @return int A return code that indicates what action should the processor take next
290
     */
291 37
    protected function processRowEndingNode()
292
    {
293
        // if the fetched row is empty and we don't want to preserve it..,
294 37
        if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
295
            // ... skip it
296 2
            return XMLProcessor::PROCESSING_CONTINUE;
297
        }
298
299 37
        $this->numReadRows++;
300
301
        // If needed, we fill the empty cells
302 37
        if ($this->numColumns === 0) {
303 23
            $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
304
        }
305
306
        // at this point, we have all the data we need for the row
307
        // so that we can populate the buffer
308 37
        return XMLProcessor::PROCESSING_STOP;
309
    }
310
311
    /**
312
     * @return int A return code that indicates what action should the processor take next
313
     */
314 37
    protected function processWorksheetEndingNode()
315
    {
316
        // The closing "</worksheet>" marks the end of the file
317 37
        $this->hasReachedEndOfFile = true;
318
319 37
        return XMLProcessor::PROCESSING_STOP;
320
    }
321
322
    /**
323
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
324
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
325
     * @return int Row index
326
     */
327 37
    protected function getRowIndex($xmlReader)
328
    {
329
        // Get "r" attribute if present (from something like <row r="3"...>
330 37
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
331
332 37
        return ($currentRowIndex !== null) ?
333 37
                (int) $currentRowIndex :
334 37
                $this->lastRowIndexProcessed + 1;
335
    }
336
337
    /**
338
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
339
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
340
     * @return int Column index
341
     */
342 37
    protected function getColumnIndex($xmlReader)
343
    {
344
        // Get "r" attribute if present (from something like <c r="A1"...>
345 37
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
346
347 37
        return ($currentCellIndex !== null) ?
348 36
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
349 37
                $this->lastColumnIndexProcessed + 1;
350
    }
351
352
    /**
353
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
354
     *
355
     * @param \DOMNode $node
356
     * @return Cell The cell set with the associated with the cell
357
     */
358 37
    protected function getCell($node)
359
    {
360
        try {
361 37
            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
362 37
            $cell = $this->entityFactory->createCell($cellValue);
363 1
        } catch (InvalidValueException $exception) {
364 1
            $cell = $this->entityFactory->createCell($exception->getInvalidValue());
365 1
            $cell->setType(Cell::TYPE_ERROR);
366
        }
367
368 37
        return $cell;
369
    }
370
371
    /**
372
     * Return the current element, either an empty row or from the buffer.
373
     * @see http://php.net/manual/en/iterator.current.php
374
     *
375
     * @return Row|null
376
     */
377 37
    public function current()
378
    {
379 37
        $rowToBeProcessed = $this->rowBuffer;
380
381 37
        if ($this->shouldPreserveEmptyRows) {
382
            // when we need to preserve empty rows, we will either return
383
            // an empty row or the last row read. This depends whether the
384
            // index of last row that was read matches the index of the last
385
            // row whose value should be returned.
386 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
387
                // return empty row if mismatch between last processed row
388
                // and the row that needs to be returned
389 1
                $rowToBeProcessed = $this->entityFactory->createRow();
390
            }
391
        }
392
393 37
        return $rowToBeProcessed;
394
    }
395
396
    /**
397
     * Return the key of the current element. Here, the row index.
398
     * @see http://php.net/manual/en/iterator.key.php
399
     *
400
     * @return int
401
     */
402 36
    public function key()
403
    {
404
        // TODO: This should return $this->nextRowIndexToBeProcessed
405
        //       but to avoid a breaking change, the return value for
406
        //       this function has been kept as the number of rows read.
407 36
        return $this->shouldPreserveEmptyRows ?
408 1
                $this->nextRowIndexToBeProcessed :
409 36
                $this->numReadRows;
410
    }
411
412
    /**
413
     * Cleans up what was created to iterate over the object.
414
     *
415
     * @return void
416
     */
417 40
    public function end()
418
    {
419 40
        $this->xmlReader->close();
420 40
    }
421
}
422