Completed
Pull Request — master (#557)
by Adrien
03:10
created

RowIterator::__construct()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 26
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 26
ccs 15
cts 15
cp 1
rs 8.8571
c 0
b 0
f 0
cc 1
eloc 22
nc 1
nop 8
crap 1

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Entity\Cell;
6
use Box\Spout\Common\Entity\Row;
7
use Box\Spout\Common\Exception\IOException;
8
use Box\Spout\Reader\Common\Manager\RowManager;
9
use Box\Spout\Reader\Common\XMLProcessor;
10
use Box\Spout\Reader\Exception\InvalidValueException;
11
use Box\Spout\Reader\Exception\XMLProcessingException;
12
use Box\Spout\Reader\IteratorInterface;
13
use Box\Spout\Reader\Wrapper\XMLReader;
14
use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
15
use Box\Spout\Reader\XLSX\Helper\CellHelper;
16
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
17
18
/**
19
 * Class RowIterator
20
 */
21
class RowIterator implements IteratorInterface
22
{
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_DIMENSION = 'dimension';
25
    const XML_NODE_WORKSHEET = 'worksheet';
26
    const XML_NODE_ROW = 'row';
27
    const XML_NODE_CELL = 'c';
28
29
    /** Definition of XML attributes used to parse data */
30
    const XML_ATTRIBUTE_REF = 'ref';
31
    const XML_ATTRIBUTE_SPANS = 'spans';
32
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
33
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
34
35
    /** @var string Path of the XLSX file being read */
36
    protected $filePath;
37
38
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
39
    protected $sheetDataXMLFilePath;
40
41
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
42
    protected $xmlReader;
43
44
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
45
    protected $xmlProcessor;
46
47
    /** @var Helper\CellValueFormatter Helper to format cell values */
48
    protected $cellValueFormatter;
49
50
    /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */
51
    protected $rowManager;
52
53
    /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
54
    protected $entityFactory;
55
56
    /**
57
     * TODO: This variable can be deleted when row indices get preserved
58
     * @var int Number of read rows
59
     */
60
    protected $numReadRows = 0;
61
62
    /** @var Row Contains the row currently processed */
63
    protected $currentlyProcessedRow;
64
65
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
66
    protected $rowBuffer;
67
68
    /** @var bool Indicates whether all rows have been read */
69
    protected $hasReachedEndOfFile = false;
70
71
    /** @var int The number of columns the sheet has (0 meaning undefined) */
72
    protected $numColumns = 0;
73
74
    /** @var bool Whether empty rows should be returned or skipped */
75
    protected $shouldPreserveEmptyRows;
76
77
    /** @var int Last row index processed (one-based) */
78
    protected $lastRowIndexProcessed = 0;
79
80
    /** @var int Row index to be processed next (one-based) */
81
    protected $nextRowIndexToBeProcessed = 0;
82
83
    /** @var int Last column index processed (zero-based) */
84
    protected $lastColumnIndexProcessed = -1;
85
86
    /**
87
     * @param string $filePath Path of the XLSX file being read
88
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
89
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
90
     * @param XMLReader $xmlReader XML Reader
91
     * @param XMLProcessor $xmlProcessor Helper to process XML files
92
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
93
     * @param RowManager $rowManager Manages rows
94
     * @param InternalEntityFactory $entityFactory Factory to create entities
95
     */
96 38
    public function __construct(
97
        $filePath,
98
        $sheetDataXMLFilePath,
99
        $shouldPreserveEmptyRows,
100
        $xmlReader,
101
        XMLProcessor $xmlProcessor,
102
        CellValueFormatter $cellValueFormatter,
103
        RowManager $rowManager,
104
        InternalEntityFactory $entityFactory
105
    ) {
106 38
        $this->filePath = $filePath;
107 38
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
108 38
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
109 38
        $this->xmlReader = $xmlReader;
110 38
        $this->cellValueFormatter = $cellValueFormatter;
111 38
        $this->rowManager = $rowManager;
112 38
        $this->entityFactory = $entityFactory;
113
114
        // Register all callbacks to process different nodes when reading the XML file
115 38
        $this->xmlProcessor = $xmlProcessor;
116 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
117 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
118 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
119 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
120 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
121 38
    }
122
123
    /**
124
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
125
     * @return string Path of the XML file containing the sheet data,
126
     *                without the leading slash.
127
     */
128 38
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
129
    {
130 38
        return ltrim($sheetDataXMLFilePath, '/');
131
    }
132
133
    /**
134
     * Rewind the Iterator to the first element.
135
     * Initializes the XMLReader object that reads the associated sheet data.
136
     * The XMLReader is configured to be safe from billion laughs attack.
137
     * @see http://php.net/manual/en/iterator.rewind.php
138
     *
139
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
140
     * @return void
141
     */
142 36
    public function rewind()
143
    {
144 36
        $this->xmlReader->close();
145
146 36
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
147 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
148
        }
149
150 35
        $this->numReadRows = 0;
151 35
        $this->lastRowIndexProcessed = 0;
152 35
        $this->nextRowIndexToBeProcessed = 0;
153 35
        $this->rowBuffer = null;
154 35
        $this->hasReachedEndOfFile = false;
155 35
        $this->numColumns = 0;
156
157 35
        $this->next();
158 35
    }
159
160
    /**
161
     * Checks if current position is valid
162
     * @see http://php.net/manual/en/iterator.valid.php
163
     *
164
     * @return bool
165
     */
166 35
    public function valid()
167
    {
168 35
        return (!$this->hasReachedEndOfFile);
169
    }
170
171
    /**
172
     * Move forward to next element. Reads data describing the next unprocessed row.
173
     * @see http://php.net/manual/en/iterator.next.php
174
     *
175
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
176
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
177
     * @return void
178
     */
179 35
    public function next()
180
    {
181 35
        $this->nextRowIndexToBeProcessed++;
182
183 35
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
184 35
            $this->readDataForNextRow();
185
        }
186 35
    }
187
188
    /**
189
     * Returns whether we need data for the next row to be processed.
190
     * We don't need to read data if:
191
     *   we have already read at least one row
192
     *     AND
193
     *   we need to preserve empty rows
194
     *     AND
195
     *   the last row that was read is not the row that need to be processed
196
     *   (i.e. if we need to return empty rows)
197
     *
198
     * @return bool Whether we need data for the next row to be processed.
199
     */
200 35
    protected function doesNeedDataForNextRowToBeProcessed()
201
    {
202 35
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
203
204
        return (
205 35
            !$hasReadAtLeastOneRow ||
206 33
            !$this->shouldPreserveEmptyRows ||
207 35
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
208
        );
209
    }
210
211
    /**
212
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
213
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
214
     * @return void
215
     */
216 35
    protected function readDataForNextRow()
217
    {
218 35
        $this->currentlyProcessedRow = $this->entityFactory->createRow();
219
220
        try {
221 35
            $this->xmlProcessor->readUntilStopped();
222
        } catch (XMLProcessingException $exception) {
223
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
224
        }
225
226 35
        $this->rowBuffer = $this->currentlyProcessedRow;
227 35
    }
228
229
    /**
230
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
231
     * @return int A return code that indicates what action should the processor take next
232
     */
233 16
    protected function processDimensionStartingNode($xmlReader)
234
    {
235
        // Read dimensions of the sheet
236 16
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
237 16
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
238 13
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
239
        }
240
241 16
        return XMLProcessor::PROCESSING_CONTINUE;
242
    }
243
244
    /**
245
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
246
     * @return int A return code that indicates what action should the processor take next
247
     */
248 34
    protected function processRowStartingNode($xmlReader)
249
    {
250
        // Reset index of the last processed column
251 34
        $this->lastColumnIndexProcessed = -1;
252
253
        // Mark the last processed row as the one currently being read
254 34
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
255
256
        // Read spans info if present
257 34
        $numberOfColumnsForRow = $this->numColumns;
258 34
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
259 34
        if ($spans) {
260 11
            list(, $numberOfColumnsForRow) = explode(':', $spans);
261 11
            $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
262
        }
263
264 34
        $cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
265 34
        $this->currentlyProcessedRow->setCells($cells);
266
267 34
        return XMLProcessor::PROCESSING_CONTINUE;
268
    }
269
270
    /**
271
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
272
     * @return int A return code that indicates what action should the processor take next
273
     */
274 34
    protected function processCellStartingNode($xmlReader)
275
    {
276 34
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
277
278
        // NOTE: expand() will automatically decode all XML entities of the child nodes
279 34
        $node = $xmlReader->expand();
280 34
        $cell = $this->getCell($node);
281
282 34
        $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
283 34
        $this->lastColumnIndexProcessed = $currentColumnIndex;
284
285 34
        return XMLProcessor::PROCESSING_CONTINUE;
286
    }
287
288
    /**
289
     * @return int A return code that indicates what action should the processor take next
290
     */
291 34
    protected function processRowEndingNode()
292
    {
293
        // if the fetched row is empty and we don't want to preserve it..,
294 34
        if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
295
            // ... skip it
296 1
            return XMLProcessor::PROCESSING_CONTINUE;
297
        }
298
299 34
        $this->numReadRows++;
300
301
        // If needed, we fill the empty cells
302 34
        if ($this->numColumns === 0) {
303 21
            $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
304
        }
305
306
        // at this point, we have all the data we need for the row
307
        // so that we can populate the buffer
308 34
        return XMLProcessor::PROCESSING_STOP;
309
    }
310
311
    /**
312
     * @return int A return code that indicates what action should the processor take next
313
     */
314 34
    protected function processWorksheetEndingNode()
315
    {
316
        // The closing "</worksheet>" marks the end of the file
317 34
        $this->hasReachedEndOfFile = true;
318
319 34
        return XMLProcessor::PROCESSING_STOP;
320
    }
321
322
    /**
323
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
324
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
325
     * @return int Row index
326
     */
327 34
    protected function getRowIndex($xmlReader)
328
    {
329
        // Get "r" attribute if present (from something like <row r="3"...>
330 34
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
331
332 34
        return ($currentRowIndex !== null) ?
333 34
                (int) $currentRowIndex :
334 34
                $this->lastRowIndexProcessed + 1;
335
    }
336
337
    /**
338
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
339
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
340
     * @return int Column index
341
     */
342 34
    protected function getColumnIndex($xmlReader)
343
    {
344
        // Get "r" attribute if present (from something like <c r="A1"...>
345 34
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
346
347 34
        return ($currentCellIndex !== null) ?
348 33
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
349 34
                $this->lastColumnIndexProcessed + 1;
350
    }
351
352
    /**
353
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
354
     *
355
     * @param \DOMNode $node
356
     * @return Cell The cell set with the associated with the cell
357
     */
358 34
    protected function getCell($node)
359
    {
360
        try {
361 34
            $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
362 34
            $cell = $this->entityFactory->createCell($cellValue);
363 1
        } catch (InvalidValueException $exception) {
364 1
            $cell = $this->entityFactory->createCell($exception->getInvalidValue());
365 1
            $cell->setType(Cell::TYPE_ERROR);
366
        }
367
368 34
        return $cell;
369
    }
370
371
    /**
372
     * Return the current element, either an empty row or from the buffer.
373
     * @see http://php.net/manual/en/iterator.current.php
374
     *
375
     * @return Row|null
376
     */
377 34
    public function current()
378
    {
379 34
        $rowToBeProcessed = $this->rowBuffer;
380
381 34
        if ($this->shouldPreserveEmptyRows) {
382
            // when we need to preserve empty rows, we will either return
383
            // an empty row or the last row read. This depends whether the
384
            // index of last row that was read matches the index of the last
385
            // row whose value should be returned.
386 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
387
                // return empty row if mismatch between last processed row
388
                // and the row that needs to be returned
389 1
                $rowToBeProcessed = $this->entityFactory->createRow();
390
            }
391
        }
392
393 34
        return $rowToBeProcessed;
394
    }
395
396
    /**
397
     * Return the key of the current element. Here, the row index.
398
     * @see http://php.net/manual/en/iterator.key.php
399
     *
400
     * @return int
401
     */
402 33
    public function key()
403
    {
404
        // TODO: This should return $this->nextRowIndexToBeProcessed
405
        //       but to avoid a breaking change, the return value for
406
        //       this function has been kept as the number of rows read.
407 33
        return $this->shouldPreserveEmptyRows ?
408 1
                $this->nextRowIndexToBeProcessed :
409 33
                $this->numReadRows;
410
    }
411
412
    /**
413
     * Cleans up what was created to iterate over the object.
414
     *
415
     * @return void
416
     */
417 37
    public function end()
418
    {
419 37
        $this->xmlReader->close();
420 37
    }
421
}
422