Passed
Pull Request — develop_3.0 (#508)
by Adrien
02:50
created

RowIterator::__construct()   B

Complexity

Conditions 1
Paths 1

Size

Total Lines 26
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 26
ccs 15
cts 15
cp 1
rs 8.8571
c 0
b 0
f 0
cc 1
eloc 22
nc 1
nop 8
crap 1

How to fix   Many Parameters   

Many Parameters

Methods with many parameters are not only hard to understand, but their parameters also often become inconsistent when you need more, or different data.

There are several approaches to avoid long parameter lists:

1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Common\Entity\Cell;
7
use Box\Spout\Reader\Common\Entity\Row;
8
use Box\Spout\Reader\Common\XMLProcessor;
9
use Box\Spout\Reader\Exception\XMLProcessingException;
10
use Box\Spout\Reader\IteratorInterface;
11
use Box\Spout\Reader\Wrapper\XMLReader;
12
use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory;
13
use Box\Spout\Reader\XLSX\Helper\CellHelper;
14
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
15
use Box\Spout\Reader\XLSX\Manager\RowManager;
16
17
/**
18
 * Class RowIterator
19
 */
20
class RowIterator implements IteratorInterface
21
{
22
    /** Definition of XML nodes names used to parse data */
23
    const XML_NODE_DIMENSION = 'dimension';
24
    const XML_NODE_WORKSHEET = 'worksheet';
25
    const XML_NODE_ROW = 'row';
26
    const XML_NODE_CELL = 'c';
27
28
    /** Definition of XML attributes used to parse data */
29
    const XML_ATTRIBUTE_REF = 'ref';
30
    const XML_ATTRIBUTE_SPANS = 'spans';
31
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
32
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
33
34
    /** @var string Path of the XLSX file being read */
35
    protected $filePath;
36
37
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
38
    protected $sheetDataXMLFilePath;
39
40
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
41
    protected $xmlReader;
42
43
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
44
    protected $xmlProcessor;
45
46
    /** @var Helper\CellValueFormatter Helper to format cell values */
47
    protected $cellValueFormatter;
48
49
    /** @var \Box\Spout\Reader\XLSX\Manager\RowManager Manages rows */
50
    protected $rowManager;
51
52
    /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
53
    protected $entityFactory;
54
55
    /**
56
     * TODO: This variable can be deleted when row indices get preserved
57
     * @var int Number of read rows
58
     */
59
    protected $numReadRows = 0;
60
61
    /** @var Row Contains the row currently processed */
62
    protected $currentlyProcessedRow;
63
64
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
65
    protected $rowBuffer;
66
67
    /** @var bool Indicates whether all rows have been read */
68
    protected $hasReachedEndOfFile = false;
69
70
    /** @var int The number of columns the sheet has (0 meaning undefined) */
71
    protected $numColumns = 0;
72
73
    /** @var bool Whether empty rows should be returned or skipped */
74
    protected $shouldPreserveEmptyRows;
75
76
    /** @var int Last row index processed (one-based) */
77
    protected $lastRowIndexProcessed = 0;
78
79
    /** @var int Row index to be processed next (one-based) */
80
    protected $nextRowIndexToBeProcessed = 0;
81
82
    /** @var int Last column index processed (zero-based) */
83
    protected $lastColumnIndexProcessed = -1;
84
85
    /**
86
     * @param string $filePath Path of the XLSX file being read
87
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
88
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
89
     * @param XMLReader $xmlReader XML Reader
90
     * @param XMLProcessor $xmlProcessor Helper to process XML files
91
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
92
     * @param RowManager $rowManager Manages rows
93
     * @param InternalEntityFactory $entityFactory Factory to create entities
94
     */
95 38
    public function __construct(
96
        $filePath,
97
        $sheetDataXMLFilePath,
98
        $shouldPreserveEmptyRows,
99
        $xmlReader,
100
        XMLProcessor $xmlProcessor,
101
        CellValueFormatter$cellValueFormatter,
102
        RowManager $rowManager,
103
        InternalEntityFactory $entityFactory
104
    ) {
105 38
        $this->filePath = $filePath;
106 38
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
107 38
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
108 38
        $this->xmlReader = $xmlReader;
109 38
        $this->cellValueFormatter = $cellValueFormatter;
110 38
        $this->rowManager = $rowManager;
111 38
        $this->entityFactory = $entityFactory;
112
113
        // Register all callbacks to process different nodes when reading the XML file
114 38
        $this->xmlProcessor = $xmlProcessor;
115 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
116 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
117 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
118 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
119 38
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
120 38
    }
121
122
    /**
123
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
124
     * @return string Path of the XML file containing the sheet data,
125
     *                without the leading slash.
126
     */
127 38
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
128
    {
129 38
        return ltrim($sheetDataXMLFilePath, '/');
130
    }
131
132
    /**
133
     * Rewind the Iterator to the first element.
134
     * Initializes the XMLReader object that reads the associated sheet data.
135
     * The XMLReader is configured to be safe from billion laughs attack.
136
     * @see http://php.net/manual/en/iterator.rewind.php
137
     *
138
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
139
     * @return void
140
     */
141 36
    public function rewind()
142
    {
143 36
        $this->xmlReader->close();
144
145 36
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
146 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
147
        }
148
149 35
        $this->numReadRows = 0;
150 35
        $this->lastRowIndexProcessed = 0;
151 35
        $this->nextRowIndexToBeProcessed = 0;
152 35
        $this->rowBuffer = null;
153 35
        $this->hasReachedEndOfFile = false;
154 35
        $this->numColumns = 0;
155
156 35
        $this->next();
157 35
    }
158
159
    /**
160
     * Checks if current position is valid
161
     * @see http://php.net/manual/en/iterator.valid.php
162
     *
163
     * @return bool
164
     */
165 35
    public function valid()
166
    {
167 35
        return (!$this->hasReachedEndOfFile);
168
    }
169
170
    /**
171
     * Move forward to next element. Reads data describing the next unprocessed row.
172
     * @see http://php.net/manual/en/iterator.next.php
173
     *
174
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
175
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
176
     * @return void
177
     */
178 35
    public function next()
179
    {
180 35
        $this->nextRowIndexToBeProcessed++;
181
182 35
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
183 35
            $this->readDataForNextRow();
184
        }
185 35
    }
186
187
    /**
188
     * Returns whether we need data for the next row to be processed.
189
     * We don't need to read data if:
190
     *   we have already read at least one row
191
     *     AND
192
     *   we need to preserve empty rows
193
     *     AND
194
     *   the last row that was read is not the row that need to be processed
195
     *   (i.e. if we need to return empty rows)
196
     *
197
     * @return bool Whether we need data for the next row to be processed.
198
     */
199 35
    protected function doesNeedDataForNextRowToBeProcessed()
200
    {
201 35
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
202
203
        return (
204 35
            !$hasReadAtLeastOneRow ||
205 33
            !$this->shouldPreserveEmptyRows ||
206 35
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
207
        );
208
    }
209
210
    /**
211
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
212
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
213
     * @return void
214
     */
215 35
    protected function readDataForNextRow()
216
    {
217 35
        $this->currentlyProcessedRow = $this->entityFactory->createRow([]);
218
219
        try {
220 35
            $this->xmlProcessor->readUntilStopped();
221
        } catch (XMLProcessingException $exception) {
222
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
223
        }
224
225 35
        $this->rowBuffer = $this->currentlyProcessedRow;
226 35
    }
227
228
    /**
229
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
230
     * @return int A return code that indicates what action should the processor take next
231
     */
232 16
    protected function processDimensionStartingNode($xmlReader)
233
    {
234
        // Read dimensions of the sheet
235 16
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
236 16
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
237 13
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
238
        }
239
240 16
        return XMLProcessor::PROCESSING_CONTINUE;
241
    }
242
243
    /**
244
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
245
     * @return int A return code that indicates what action should the processor take next
246
     */
247 34
    protected function processRowStartingNode($xmlReader)
248
    {
249
        // Reset index of the last processed column
250 34
        $this->lastColumnIndexProcessed = -1;
251
252
        // Mark the last processed row as the one currently being read
253 34
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
254
255
        // Read spans info if present
256 34
        $numberOfColumnsForRow = $this->numColumns;
257 34
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
258 34
        if ($spans) {
259 11
            list(, $numberOfColumnsForRow) = explode(':', $spans);
260 11
            $numberOfColumnsForRow = (int) $numberOfColumnsForRow;
261
        }
262
263 34
        $cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
264 34
        $this->currentlyProcessedRow->setCells($cells);
265
266 34
        return XMLProcessor::PROCESSING_CONTINUE;
267
    }
268
269
    /**
270
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
271
     * @return int A return code that indicates what action should the processor take next
272
     */
273 34
    protected function processCellStartingNode($xmlReader)
274
    {
275 34
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
276
277
        // NOTE: expand() will automatically decode all XML entities of the child nodes
278 34
        $node = $xmlReader->expand();
279 34
        $cell = $this->getCell($node);
280
281 34
        $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
282 34
        $this->lastColumnIndexProcessed = $currentColumnIndex;
283
284 34
        return XMLProcessor::PROCESSING_CONTINUE;
285
    }
286
287
    /**
288
     * @return int A return code that indicates what action should the processor take next
289
     */
290 34
    protected function processRowEndingNode()
291
    {
292
        // if the fetched row is empty and we don't want to preserve it..,
293 34
        if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
294
            // ... skip it
295 1
            return XMLProcessor::PROCESSING_CONTINUE;
296
        }
297
298 34
        $this->numReadRows++;
299
300
        // If needed, we fill the empty cells
301 34
        if ($this->numColumns === 0) {
302 21
            $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
303
        }
304
305
        // at this point, we have all the data we need for the row
306
        // so that we can populate the buffer
307 34
        return XMLProcessor::PROCESSING_STOP;
308
    }
309
310
    /**
311
     * @return int A return code that indicates what action should the processor take next
312
     */
313 34
    protected function processWorksheetEndingNode()
314
    {
315
        // The closing "</worksheet>" marks the end of the file
316 34
        $this->hasReachedEndOfFile = true;
317
318 34
        return XMLProcessor::PROCESSING_STOP;
319
    }
320
321
    /**
322
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
323
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
324
     * @return int Row index
325
     */
326 34
    protected function getRowIndex($xmlReader)
327
    {
328
        // Get "r" attribute if present (from something like <row r="3"...>
329 34
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
330
331 34
        return ($currentRowIndex !== null) ?
332 34
                (int) $currentRowIndex :
333 34
                $this->lastRowIndexProcessed + 1;
334
    }
335
336
    /**
337
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
338
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
339
     * @return int Column index
340
     */
341 34
    protected function getColumnIndex($xmlReader)
342
    {
343
        // Get "r" attribute if present (from something like <c r="A1"...>
344 34
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
345
346 34
        return ($currentCellIndex !== null) ?
347 33
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
348 34
                $this->lastColumnIndexProcessed + 1;
349
    }
350
351
    /**
352
     * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
353
     *
354
     * @param \DOMNode $node
355
     * @return Cell The cell set with the associated with the cell
356
     */
357 34
    protected function getCell($node)
358
    {
359 34
        $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
360
361 34
        return $this->entityFactory->createCell($cellValue);
362
    }
363
364
    /**
365
     * Return the current element, either an empty row or from the buffer.
366
     * @see http://php.net/manual/en/iterator.current.php
367
     *
368
     * @return Row|null
369
     */
370 34
    public function current()
371
    {
372 34
        $rowToBeProcessed = $this->rowBuffer;
373
374 34
        if ($this->shouldPreserveEmptyRows) {
375
            // when we need to preserve empty rows, we will either return
376
            // an empty row or the last row read. This depends whether the
377
            // index of last row that was read matches the index of the last
378
            // row whose value should be returned.
379 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
380
                // return empty row if mismatch between last processed row
381
                // and the row that needs to be returned
382 1
                $rowToBeProcessed = $this->entityFactory->createRow([]);
383
            }
384
        }
385
386 34
        return $rowToBeProcessed;
387
    }
388
389
    /**
390
     * Return the key of the current element. Here, the row index.
391
     * @see http://php.net/manual/en/iterator.key.php
392
     *
393
     * @return int
394
     */
395 33
    public function key()
396
    {
397
        // TODO: This should return $this->nextRowIndexToBeProcessed
398
        //       but to avoid a breaking change, the return value for
399
        //       this function has been kept as the number of rows read.
400 33
        return $this->shouldPreserveEmptyRows ?
401 1
                $this->nextRowIndexToBeProcessed :
402 33
                $this->numReadRows;
403
    }
404
405
    /**
406
     * Cleans up what was created to iterate over the object.
407
     *
408
     * @return void
409
     */
410 37
    public function end()
411
    {
412 37
        $this->xmlReader->close();
413 37
    }
414
}
415