Failed Conditions
Push — develop_3.0 ( 80553c...f9d8ad )
by Adrien
31:09
created

RowIterator::end()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 3
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\XMLProcessingException;
7
use Box\Spout\Reader\IteratorInterface;
8
use Box\Spout\Reader\Wrapper\XMLReader;
9
use Box\Spout\Reader\XLSX\Helper\CellHelper;
10
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
11
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
12
use Box\Spout\Reader\Common\XMLProcessor;
13
14
/**
15
 * Class RowIterator
16
 *
17
 * @package Box\Spout\Reader\XLSX
18
 */
19
class RowIterator implements IteratorInterface
20
{
21
    /** Definition of XML nodes names used to parse data */
22
    const XML_NODE_DIMENSION = 'dimension';
23
    const XML_NODE_WORKSHEET = 'worksheet';
24
    const XML_NODE_ROW = 'row';
25
    const XML_NODE_CELL = 'c';
26
27
    /** Definition of XML attributes used to parse data */
28
    const XML_ATTRIBUTE_REF = 'ref';
29
    const XML_ATTRIBUTE_SPANS = 'spans';
30
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
31
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
32
33
    /** @var string Path of the XLSX file being read */
34
    protected $filePath;
35
36
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
37
    protected $sheetDataXMLFilePath;
38
39
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
40
    protected $xmlReader;
41
42
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
43
    protected $xmlProcessor;
44
45
    /** @var Helper\CellValueFormatter Helper to format cell values */
46
    protected $cellValueFormatter;
47
48
    /** @var Helper\StyleHelper $styleHelper Helper to work with styles */
49
    protected $styleHelper;
50
51
    /**
52
     * TODO: This variable can be deleted when row indices get preserved
53
     * @var int Number of read rows
54
     */
55
    protected $numReadRows = 0;
56
57
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
58
    protected $currentlyProcessedRowData = [];
59
60
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
61
    protected $rowDataBuffer = null;
62
63
    /** @var bool Indicates whether all rows have been read */
64
    protected $hasReachedEndOfFile = false;
65
66
    /** @var int The number of columns the sheet has (0 meaning undefined) */
67
    protected $numColumns = 0;
68
69
    /** @var bool Whether empty rows should be returned or skipped */
70
    protected $shouldPreserveEmptyRows;
71
72
    /** @var int Last row index processed (one-based) */
73
    protected $lastRowIndexProcessed = 0;
74
75
    /** @var int Row index to be processed next (one-based) */
76
    protected $nextRowIndexToBeProcessed = 0;
77
78
    /** @var int Last column index processed (zero-based) */
79
    protected $lastColumnIndexProcessed = -1;
80
81
    /**
82
     * @param string $filePath Path of the XLSX file being read
83
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
84
     * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
85
     * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
86
     */
87 33
    public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper)
88
    {
89 33
        $this->filePath = $filePath;
90 33
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
91
92 33
        $this->xmlReader = new XMLReader();
93
94 33
        $this->styleHelper = new StyleHelper($filePath);
95 33
        $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates());
96
97 33
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
98
99
        // Register all callbacks to process different nodes when reading the XML file
100 33
        $this->xmlProcessor = new XMLProcessor($this->xmlReader);
101 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
102 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
103 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
104 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
105 33
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
106 33
    }
107
108
    /**
109
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
110
     * @return string Path of the XML file containing the sheet data,
111
     *                without the leading slash.
112
     */
113 33
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
114
    {
115 33
        return ltrim($sheetDataXMLFilePath, '/');
116
    }
117
118
    /**
119
     * Rewind the Iterator to the first element.
120
     * Initializes the XMLReader object that reads the associated sheet data.
121
     * The XMLReader is configured to be safe from billion laughs attack.
122
     * @link http://php.net/manual/en/iterator.rewind.php
123
     *
124
     * @return void
125
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
126
     */
127 32
    public function rewind()
128
    {
129 32
        $this->xmlReader->close();
130
131 32
        $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
132 32
        if ($this->xmlReader->open($sheetDataFilePath) === false) {
133 1
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
134
        }
135
136 31
        $this->numReadRows = 0;
137 31
        $this->lastRowIndexProcessed = 0;
138 31
        $this->nextRowIndexToBeProcessed = 0;
139 31
        $this->rowDataBuffer = null;
140 31
        $this->hasReachedEndOfFile = false;
141 31
        $this->numColumns = 0;
142
143 31
        $this->next();
144 31
    }
145
146
    /**
147
     * Checks if current position is valid
148
     * @link http://php.net/manual/en/iterator.valid.php
149
     *
150
     * @return bool
151
     */
152 31
    public function valid()
153
    {
154 31
        return (!$this->hasReachedEndOfFile);
155
    }
156
157
    /**
158
     * Move forward to next element. Reads data describing the next unprocessed row.
159
     * @link http://php.net/manual/en/iterator.next.php
160
     *
161
     * @return void
162
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
163
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
164
     */
165 31
    public function next()
166
    {
167 31
        $this->nextRowIndexToBeProcessed++;
168
169 31
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
170 31
            $this->readDataForNextRow();
171
        }
172 31
    }
173
174
    /**
175
     * Returns whether we need data for the next row to be processed.
176
     * We don't need to read data if:
177
     *   we have already read at least one row
178
     *     AND
179
     *   we need to preserve empty rows
180
     *     AND
181
     *   the last row that was read is not the row that need to be processed
182
     *   (i.e. if we need to return empty rows)
183
     *
184
     * @return bool Whether we need data for the next row to be processed.
185
     */
186 31
    protected function doesNeedDataForNextRowToBeProcessed()
187
    {
188 31
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
189
190
        return (
191 31
            !$hasReadAtLeastOneRow ||
192 29
            !$this->shouldPreserveEmptyRows ||
193 31
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
194
        );
195
    }
196
197
    /**
198
     * @return void
199
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
200
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
201
     */
202 31
    protected function readDataForNextRow()
203
    {
204 31
        $this->currentlyProcessedRowData = [];
205
206
        try {
207 31
            $this->xmlProcessor->readUntilStopped();
208
        } catch (XMLProcessingException $exception) {
209
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
210
        }
211
212 31
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
213 31
    }
214
215
    /**
216
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
217
     * @return int A return code that indicates what action should the processor take next
218
     */
219 15
    protected function processDimensionStartingNode($xmlReader)
220
    {
221
        // Read dimensions of the sheet
222 15
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
223 15
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
224 12
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
225
        }
226
227 15
        return XMLProcessor::PROCESSING_CONTINUE;
228
    }
229
230
    /**
231
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
232
     * @return int A return code that indicates what action should the processor take next
233
     */
234 30
    protected function processRowStartingNode($xmlReader)
235
    {
236
        // Reset index of the last processed column
237 30
        $this->lastColumnIndexProcessed = -1;
238
239
        // Mark the last processed row as the one currently being read
240 30
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
241
242
        // Read spans info if present
243 30
        $numberOfColumnsForRow = $this->numColumns;
244 30
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
245 30
        if ($spans) {
246 10
            list(, $numberOfColumnsForRow) = explode(':', $spans);
247 10
            $numberOfColumnsForRow = intval($numberOfColumnsForRow);
248
        }
249
250 30
        $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
251
252 30
        return XMLProcessor::PROCESSING_CONTINUE;
253
    }
254
255
    /**
256
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
257
     * @return int A return code that indicates what action should the processor take next
258
     */
259 30
    protected function processCellStartingNode($xmlReader)
260
    {
261 30
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
262
263
        // NOTE: expand() will automatically decode all XML entities of the child nodes
264 30
        $node = $xmlReader->expand();
265 30
        $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
266 30
        $this->lastColumnIndexProcessed = $currentColumnIndex;
267
268 30
        return XMLProcessor::PROCESSING_CONTINUE;
269
    }
270
271
    /**
272
     * @return int A return code that indicates what action should the processor take next
273
     */
274 30
    protected function processRowEndingNode()
275
    {
276
        // if the fetched row is empty and we don't want to preserve it..,
277 30
        if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
278
            // ... skip it
279
            return XMLProcessor::PROCESSING_CONTINUE;
280
        }
281
282 30
        $this->numReadRows++;
283
284
        // If needed, we fill the empty cells
285 30
        if ($this->numColumns === 0) {
286 18
            $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
287
        }
288
289
        // at this point, we have all the data we need for the row
290
        // so that we can populate the buffer
291 30
        return XMLProcessor::PROCESSING_STOP;
292
    }
293
294
    /**
295
     * @return int A return code that indicates what action should the processor take next
296
     */
297 30
    protected function processWorksheetEndingNode()
298
    {
299
        // The closing "</worksheet>" marks the end of the file
300 30
        $this->hasReachedEndOfFile = true;
301
302 30
        return XMLProcessor::PROCESSING_STOP;
303
    }
304
305
    /**
306
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
307
     * @return int Row index
308
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
309
     */
310 30
    protected function getRowIndex($xmlReader)
311
    {
312
        // Get "r" attribute if present (from something like <row r="3"...>
313 30
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
314
315 30
        return ($currentRowIndex !== null) ?
316 30
                intval($currentRowIndex) :
317 30
                $this->lastRowIndexProcessed + 1;
318
    }
319
320
    /**
321
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
322
     * @return int Column index
323
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
324
     */
325 30
    protected function getColumnIndex($xmlReader)
326
    {
327
        // Get "r" attribute if present (from something like <c r="A1"...>
328 30
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
329
330 30
        return ($currentCellIndex !== null) ?
331 29
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
332 30
                $this->lastColumnIndexProcessed + 1;
333
    }
334
335
    /**
336
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
337
     *
338
     * @param \DOMNode $node
339
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
340
     */
341 30
    protected function getCellValue($node)
342
    {
343 30
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
344
    }
345
346
    /**
347
     * @param array $rowData
348
     * @return bool Whether the given row is empty
349
     */
350 29
    protected function isEmptyRow($rowData)
351
    {
352 29
        return (count($rowData) === 1 && $rowData[0] === '');
353
    }
354
355
    /**
356
     * Return the current element, either an empty row or from the buffer.
357
     * @link http://php.net/manual/en/iterator.current.php
358
     *
359
     * @return array|null
360
     */
361 30
    public function current()
362
    {
363 30
        $rowDataForRowToBeProcessed = $this->rowDataBuffer;
364
365 30
        if ($this->shouldPreserveEmptyRows) {
366
            // when we need to preserve empty rows, we will either return
367
            // an empty row or the last row read. This depends whether the
368
            // index of last row that was read matches the index of the last
369
            // row whose value should be returned.
370 1
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
371
                // return empty row if mismatch between last processed row
372
                // and the row that needs to be returned
373 1
                $rowDataForRowToBeProcessed = [''];
374
            }
375
        }
376
377 30
        return $rowDataForRowToBeProcessed;
378
    }
379
380
    /**
381
     * Return the key of the current element. Here, the row index.
382
     * @link http://php.net/manual/en/iterator.key.php
383
     *
384
     * @return int
385
     */
386 29
    public function key()
387
    {
388
        // TODO: This should return $this->nextRowIndexToBeProcessed
389
        //       but to avoid a breaking change, the return value for
390
        //       this function has been kept as the number of rows read.
391 29
        return $this->shouldPreserveEmptyRows ?
392 1
                $this->nextRowIndexToBeProcessed :
393 29
                $this->numReadRows;
394
    }
395
396
397
    /**
398
     * Cleans up what was created to iterate over the object.
399
     *
400
     * @return void
401
     */
402 32
    public function end()
403
    {
404 32
        $this->xmlReader->close();
405 32
    }
406
}
407