Completed
Pull Request — develop_3.0 (#459)
by Adrien
02:46
created

RowIterator   B

Complexity

Total Complexity 36

Size/Duplication

Total Lines 382
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Test Coverage

Coverage 97.12%

Importance

Changes 0
Metric Value
wmc 36
lcom 1
cbo 4
dl 0
loc 382
ccs 101
cts 104
cp 0.9712
rs 8.8
c 0
b 0
f 0

19 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 16 1
A normalizeSheetDataXMLFilePath() 0 4 1
A rewind() 0 17 2
A valid() 0 4 1
A next() 0 8 2
A doesNeedDataForNextRowToBeProcessed() 0 10 3
A readDataForNextRow() 0 12 2
A processDimensionStartingNode() 0 10 2
A processRowStartingNode() 0 20 3
A processCellStartingNode() 0 11 1
A processRowEndingNode() 0 19 4
A processWorksheetEndingNode() 0 7 1
A getRowIndex() 0 9 2
A getColumnIndex() 0 9 2
A getCellValue() 0 4 1
A isEmptyRow() 0 4 2
A current() 0 18 3
A key() 0 9 2
A end() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Common\Entity\Options;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\Wrapper\XMLReader;
10
use Box\Spout\Reader\XLSX\Creator\EntityFactory;
11
use Box\Spout\Reader\XLSX\Creator\HelperFactory;
12
use Box\Spout\Reader\XLSX\Helper\CellHelper;
13
use Box\Spout\Reader\Common\XMLProcessor;
14
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
15
16
/**
17
 * Class RowIterator
18
 *
19
 * @package Box\Spout\Reader\XLSX
20
 */
21
class RowIterator implements IteratorInterface
22
{
23
    /** Definition of XML nodes names used to parse data */
24
    const XML_NODE_DIMENSION = 'dimension';
25
    const XML_NODE_WORKSHEET = 'worksheet';
26
    const XML_NODE_ROW = 'row';
27
    const XML_NODE_CELL = 'c';
28
29
    /** Definition of XML attributes used to parse data */
30
    const XML_ATTRIBUTE_REF = 'ref';
31
    const XML_ATTRIBUTE_SPANS = 'spans';
32
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
33
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
34
35
    /** @var string Path of the XLSX file being read */
36
    protected $filePath;
37
38
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
39
    protected $sheetDataXMLFilePath;
40
41
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
42
    protected $xmlReader;
43
44
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
45
    protected $xmlProcessor;
46
47
    /** @var Helper\CellValueFormatter Helper to format cell values */
48
    protected $cellValueFormatter;
49
50
    /**
51
     * TODO: This variable can be deleted when row indices get preserved
52
     * @var int Number of read rows
53
     */
54
    protected $numReadRows = 0;
55
56
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
57
    protected $currentlyProcessedRowData = [];
58
59
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
60
    protected $rowDataBuffer = null;
61
62
    /** @var bool Indicates whether all rows have been read */
63
    protected $hasReachedEndOfFile = false;
64
65
    /** @var int The number of columns the sheet has (0 meaning undefined) */
66
    protected $numColumns = 0;
67
68
    /** @var bool Whether empty rows should be returned or skipped */
69
    protected $shouldPreserveEmptyRows;
70
71
    /** @var int Last row index processed (one-based) */
72
    protected $lastRowIndexProcessed = 0;
73
74
    /** @var int Row index to be processed next (one-based) */
75
    protected $nextRowIndexToBeProcessed = 0;
76
77
    /** @var int Last column index processed (zero-based) */
78
    protected $lastColumnIndexProcessed = -1;
79
80
    /**
81
     * @param string $filePath Path of the XLSX file being read
82
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
83
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved
84
     * @param XMLReader $xmlReader XML Reader
85
     * @param XMLProcessor $xmlProcessor Helper to process XML files
86
     * @param CellValueFormatter $cellValueFormatter Helper to format cell values
87
     */
88 33
    public function __construct($filePath, $sheetDataXMLFilePath, $shouldPreserveEmptyRows, $xmlReader, $xmlProcessor, $cellValueFormatter)
89
    {
90 33
        $this->filePath = $filePath;
91 33
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
92 33
        $this->xmlReader = $xmlReader;
93 33
        $this->cellValueFormatter = $cellValueFormatter;
94 33
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
95
96
        // Register all callbacks to process different nodes when reading the XML file
97 33
        $this->xmlProcessor = $xmlProcessor;
98 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
99 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
100 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
101 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
102 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
103 33
    }
104
105
    /**
106
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
107
     * @return string Path of the XML file containing the sheet data,
108
     *                without the leading slash.
109
     */
110 33
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
111
    {
112 33
        return ltrim($sheetDataXMLFilePath, '/');
113
    }
114
115
    /**
116
     * Rewind the Iterator to the first element.
117
     * Initializes the XMLReader object that reads the associated sheet data.
118
     * The XMLReader is configured to be safe from billion laughs attack.
119
     * @link http://php.net/manual/en/iterator.rewind.php
120
     *
121
     * @return void
122
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
123
     */
124 32
    public function rewind()
125
    {
126 32
        $this->xmlReader->close();
127
128 32
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
129 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
130
        }
131
132 31
        $this->numReadRows = 0;
133 31
        $this->lastRowIndexProcessed = 0;
134 31
        $this->nextRowIndexToBeProcessed = 0;
135 31
        $this->rowDataBuffer = null;
136 31
        $this->hasReachedEndOfFile = false;
137 31
        $this->numColumns = 0;
138
139 31
        $this->next();
140 31
    }
141
142
    /**
143
     * Checks if current position is valid
144
     * @link http://php.net/manual/en/iterator.valid.php
145
     *
146
     * @return bool
147
     */
148 31
    public function valid()
149
    {
150 31
        return (!$this->hasReachedEndOfFile);
151
    }
152
153
    /**
154
     * Move forward to next element. Reads data describing the next unprocessed row.
155
     * @link http://php.net/manual/en/iterator.next.php
156
     *
157
     * @return void
158
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
159
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
160
     */
161 31
    public function next()
162
    {
163 31
        $this->nextRowIndexToBeProcessed++;
164
165 31
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
166 31
            $this->readDataForNextRow();
167
        }
168 31
    }
169
170
    /**
171
     * Returns whether we need data for the next row to be processed.
172
     * We don't need to read data if:
173
     *   we have already read at least one row
174
     *     AND
175
     *   we need to preserve empty rows
176
     *     AND
177
     *   the last row that was read is not the row that need to be processed
178
     *   (i.e. if we need to return empty rows)
179
     *
180
     * @return bool Whether we need data for the next row to be processed.
181
     */
182 31
    protected function doesNeedDataForNextRowToBeProcessed()
183
    {
184 31
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
185
186
        return (
187 31
            !$hasReadAtLeastOneRow ||
188 29
            !$this->shouldPreserveEmptyRows ||
189 31
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
190
        );
191
    }
192
193
    /**
194
     * @return void
195
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
196
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
197
     */
198 31
    protected function readDataForNextRow()
199
    {
200 31
        $this->currentlyProcessedRowData = [];
201
202
        try {
203 31
            $this->xmlProcessor->readUntilStopped();
204
        } catch (XMLProcessingException $exception) {
205
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
206
        }
207
208 31
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
209 31
    }
210
211
    /**
212
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
213
     * @return int A return code that indicates what action should the processor take next
214
     */
215 15
    protected function processDimensionStartingNode($xmlReader)
216
    {
217
        // Read dimensions of the sheet
218 15
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
219 15
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
220 12
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
221
        }
222
223 15
        return XMLProcessor::PROCESSING_CONTINUE;
224
    }
225
226
    /**
227
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
228
     * @return int A return code that indicates what action should the processor take next
229
     */
230 30
    protected function processRowStartingNode($xmlReader)
231
    {
232
        // Reset index of the last processed column
233 30
        $this->lastColumnIndexProcessed = -1;
234
235
        // Mark the last processed row as the one currently being read
236 30
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
237
238
        // Read spans info if present
239 30
        $numberOfColumnsForRow = $this->numColumns;
240 30
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
241 30
        if ($spans) {
242 10
            list(, $numberOfColumnsForRow) = explode(':', $spans);
243 10
            $numberOfColumnsForRow = intval($numberOfColumnsForRow);
244
        }
245
246 30
        $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
247
248 30
        return XMLProcessor::PROCESSING_CONTINUE;
249
    }
250
251
    /**
252
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
253
     * @return int A return code that indicates what action should the processor take next
254
     */
255 30
    protected function processCellStartingNode($xmlReader)
256
    {
257 30
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
258
259
        // NOTE: expand() will automatically decode all XML entities of the child nodes
260 30
        $node = $xmlReader->expand();
261 30
        $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
262 30
        $this->lastColumnIndexProcessed = $currentColumnIndex;
263
264 30
        return XMLProcessor::PROCESSING_CONTINUE;
265
    }
266
267
    /**
268
     * @return int A return code that indicates what action should the processor take next
269
     */
270 30
    protected function processRowEndingNode()
271
    {
272
        // if the fetched row is empty and we don't want to preserve it..,
273 30
        if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
274
            // ... skip it
275
            return XMLProcessor::PROCESSING_CONTINUE;
276
        }
277
278 30
        $this->numReadRows++;
279
280
        // If needed, we fill the empty cells
281 30
        if ($this->numColumns === 0) {
282 18
            $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
283
        }
284
285
        // at this point, we have all the data we need for the row
286
        // so that we can populate the buffer
287 30
        return XMLProcessor::PROCESSING_STOP;
288
    }
289
290
    /**
291
     * @return int A return code that indicates what action should the processor take next
292
     */
293 30
    protected function processWorksheetEndingNode()
294
    {
295
        // The closing "</worksheet>" marks the end of the file
296 30
        $this->hasReachedEndOfFile = true;
297
298 30
        return XMLProcessor::PROCESSING_STOP;
299
    }
300
301
    /**
302
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
303
     * @return int Row index
304
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
305
     */
306 30
    protected function getRowIndex($xmlReader)
307
    {
308
        // Get "r" attribute if present (from something like <row r="3"...>
309 30
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
310
311 30
        return ($currentRowIndex !== null) ?
312 30
                intval($currentRowIndex) :
313 30
                $this->lastRowIndexProcessed + 1;
314
    }
315
316
    /**
317
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
318
     * @return int Column index
319
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
320
     */
321 30
    protected function getColumnIndex($xmlReader)
322
    {
323
        // Get "r" attribute if present (from something like <c r="A1"...>
324 30
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
325
326 30
        return ($currentCellIndex !== null) ?
327 29
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
328 30
                $this->lastColumnIndexProcessed + 1;
329
    }
330
331
    /**
332
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
333
     *
334
     * @param \DOMNode $node
335
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
336
     */
337 30
    protected function getCellValue($node)
338
    {
339 30
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
340
    }
341
342
    /**
343
     * @param array $rowData
344
     * @return bool Whether the given row is empty
345
     */
346 29
    protected function isEmptyRow($rowData)
347
    {
348 29
        return (count($rowData) === 1 && $rowData[0] === '');
349
    }
350
351
    /**
352
     * Return the current element, either an empty row or from the buffer.
353
     * @link http://php.net/manual/en/iterator.current.php
354
     *
355
     * @return array|null
356
     */
357 30
    public function current()
358
    {
359 30
        $rowDataForRowToBeProcessed = $this->rowDataBuffer;
360
361 30
        if ($this->shouldPreserveEmptyRows) {
362
            // when we need to preserve empty rows, we will either return
363
            // an empty row or the last row read. This depends whether the
364
            // index of last row that was read matches the index of the last
365
            // row whose value should be returned.
366 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
367
                // return empty row if mismatch between last processed row
368
                // and the row that needs to be returned
369 1
                $rowDataForRowToBeProcessed = [''];
370
            }
371
        }
372
373 30
        return $rowDataForRowToBeProcessed;
374
    }
375
376
    /**
377
     * Return the key of the current element. Here, the row index.
378
     * @link http://php.net/manual/en/iterator.key.php
379
     *
380
     * @return int
381
     */
382 29
    public function key()
383
    {
384
        // TODO: This should return $this->nextRowIndexToBeProcessed
385
        //       but to avoid a breaking change, the return value for
386
        //       this function has been kept as the number of rows read.
387 29
        return $this->shouldPreserveEmptyRows ?
388 1
                $this->nextRowIndexToBeProcessed :
389 29
                $this->numReadRows;
390
    }
391
392
393
    /**
394
     * Cleans up what was created to iterate over the object.
395
     *
396
     * @return void
397
     */
398 32
    public function end()
399
    {
400 32
        $this->xmlReader->close();
401 32
    }
402
}
403