Completed
Pull Request — master (#372)
by
unknown
03:34
created

RowIterator::key()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 9
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 9
ccs 4
cts 4
cp 1
rs 9.6666
c 0
b 0
f 0
cc 2
eloc 4
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\XMLProcessingException;
7
use Box\Spout\Reader\IteratorInterface;
8
use Box\Spout\Reader\Wrapper\XMLReader;
9
use Box\Spout\Reader\XLSX\Helper\CellHelper;
10
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
11
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
12
use Box\Spout\Reader\Common\XMLProcessor;
13
14
/**
15
 * Class RowIterator
16
 *
17
 * @package Box\Spout\Reader\XLSX
18
 */
19
class RowIterator implements IteratorInterface
20
{
21
    /** Definition of XML nodes names used to parse data */
22
    const XML_NODE_DIMENSION = 'dimension';
23
    const XML_NODE_WORKSHEET = 'worksheet';
24
    const XML_NODE_ROW = 'row';
25
    const XML_NODE_CELL = 'c';
26
27
    /** Definition of XML attributes used to parse data */
28
    const XML_ATTRIBUTE_REF = 'ref';
29
    const XML_ATTRIBUTE_SPANS = 'spans';
30
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
31
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
32
33
    /** @var string Path of the XLSX file being read */
34
    protected $filePath;
35
36
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
37
    protected $sheetDataXMLFilePath;
38
39
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
40
    protected $xmlReader;
41
42
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
43
    protected $xmlProcessor;
44
45
    /** @var Helper\CellValueFormatter Helper to format cell values */
46
    protected $cellValueFormatter;
47
48
    /** @var Helper\StyleHelper $styleHelper Helper to work with styles */
49
    protected $styleHelper;
50
51
    /**
52
     * TODO: This variable can be deleted when row indices get preserved
53
     * @var int Number of read rows
54
     */
55
    protected $numReadRows = 0;
56
57
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
58
    protected $currentlyProcessedRowData = [];
59
60
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
61
    protected $rowDataBuffer = null;
62
63
    /** @var bool Indicates whether all rows have been read */
64
    protected $hasReachedEndOfFile = false;
65
66
    /** @var int The number of columns the sheet has (0 meaning undefined) */
67
    protected $numColumns = 0;
68
69
    /** @var bool Whether empty rows should be returned or skipped */
70
    protected $shouldPreserveEmptyRows;
71
72
    /** @var int Last row index processed (one-based) */
73
    protected $lastRowIndexProcessed = 0;
74
75
    /** @var int Row index to be processed next (one-based) */
76
    protected $nextRowIndexToBeProcessed = 0;
77
78
    /** @var int Last column index processed (zero-based) */
79
    protected $lastColumnIndexProcessed = -1;
80
81
    /**
82
     * @param string $filePath Path of the XLSX file being read
83
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
84
     * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
85
     * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
86
     */
87 96
    public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper)
88
    {
89 96
        $this->filePath = $filePath;
90 96
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
91
92 96
        $this->xmlReader = new XMLReader();
93
94 96
        $this->styleHelper = new StyleHelper($filePath);
95 96
        $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates());
96
97 96
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
98
99
        // Register all callbacks to process different nodes when reading the XML file
100 96
        $this->xmlProcessor = new XMLProcessor($this->xmlReader);
101 96
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
102 96
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
103 96
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
104 96
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
105 96
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
106 96
    }
107
108
    /**
109
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
110
     * @return string Path of the XML file containing the sheet data,
111
     *                without the leading slash.
112
     */
113 96
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
114
    {
115 96
        return ltrim($sheetDataXMLFilePath, '/');
116
    }
117
118
    /**
119
     * Rewind the Iterator to the first element.
120
     * Initializes the XMLReader object that reads the associated sheet data.
121
     * The XMLReader is configured to be safe from billion laughs attack.
122
     * @link http://php.net/manual/en/iterator.rewind.php
123
     *
124
     * @return void
125
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
126
     */
127 93
    public function rewind()
128
    {
129 93
        $this->xmlReader->close();
130
131 93
        $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
132 93
        if ($this->xmlReader->open($sheetDataFilePath) === false) {
133 3
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
134
        }
135
136 90
        $this->numReadRows = 0;
137 90
        $this->lastRowIndexProcessed = 0;
138 90
        $this->nextRowIndexToBeProcessed = 0;
139 90
        $this->rowDataBuffer = null;
140 90
        $this->hasReachedEndOfFile = false;
141 90
        $this->numColumns = 0;
142
143 90
        $this->next();
144 90
    }
145
146
    /**
147
     * Checks if current position is valid
148
     * @link http://php.net/manual/en/iterator.valid.php
149
     *
150
     * @return bool
151
     */
152 90
    public function valid()
153
    {
154 90
        return (!$this->hasReachedEndOfFile);
155
    }
156
157
    /**
158
     * Move forward to next element. Reads data describing the next unprocessed row.
159
     * @link http://php.net/manual/en/iterator.next.php
160
     *
161
     * @return void
162
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
163
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
164
     */
165 90
    public function next()
166
    {
167 90
        $this->nextRowIndexToBeProcessed++;
168
169 90
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
170 90
            $this->readDataForNextRow();
171 90
        }
172 90
    }
173
174
    /**
175
     * Returns whether we need data for the next row to be processed.
176
     * We don't need to read data if:
177
     *   we have already read at least one row
178
     *     AND
179
     *   we need to preserve empty rows
180
     *     AND
181
     *   the last row that was read is not the row that need to be processed
182
     *   (i.e. if we need to return empty rows)
183
     *
184
     * @return bool Whether we need data for the next row to be processed.
185
     */
186 90
    protected function doesNeedDataForNextRowToBeProcessed()
187
    {
188 90
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
189
190
        return (
191 90
            !$hasReadAtLeastOneRow ||
192 84
            !$this->shouldPreserveEmptyRows ||
193 3
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
194 90
        );
195
    }
196
197
    /**
198
     * @return void
199
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
200
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
201
     */
202 90
    protected function readDataForNextRow()
203
    {
204 90
        $this->currentlyProcessedRowData = [];
205
206
        try {
207 90
            $this->xmlProcessor->readUntilStopped();
208 90
        } catch (XMLProcessingException $exception) {
209
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
210
        }
211
212 90
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
213 90
    }
214
215
    /**
216
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
217
     * @return int A return code that indicates what action should the processor take next
218
     */
219 45
    protected function processDimensionStartingNode($xmlReader)
220
    {
221
        // Read dimensions of the sheet
222 45
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
223 45
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
224 36
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
225 36
        }
226
227 45
        return XMLProcessor::PROCESSING_CONTINUE;
228
    }
229
230
    /**
231
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
232
     * @return int A return code that indicates what action should the processor take next
233
     */
234 87
    protected function processRowStartingNode($xmlReader)
235
    {
236
        // Reset index of the last processed column
237 87
        $this->lastColumnIndexProcessed = -1;
238
239
        // Mark the last processed row as the one currently being read
240 87
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
241
242
        // Read spans info if present
243 87
        $numberOfColumnsForRow = $this->numColumns;
244 87
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
245 87
        if ($spans) {
246 30
            list(, $numberOfColumnsForRow) = explode(':', $spans);
247 30
            $numberOfColumnsForRow = intval($numberOfColumnsForRow);
248 30
        }
249
250 87
        $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
251
252 87
        return XMLProcessor::PROCESSING_CONTINUE;
253
    }
254
255
    /**
256
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
257
     * @return int A return code that indicates what action should the processor take next
258
     */
259 87
    protected function processCellStartingNode($xmlReader)
260
    {
261 87
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
262
263 87
        $node = $xmlReader->expand();
264 87
        $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
265 87
        $this->lastColumnIndexProcessed = $currentColumnIndex;
266
267 87
        return XMLProcessor::PROCESSING_CONTINUE;
268
    }
269
270
    /**
271
     * @return int A return code that indicates what action should the processor take next
272
     */
273 87
    protected function processRowEndingNode()
274
    {
275
        // if the fetched row is empty and we don't want to preserve it..,
276 87
        if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
277
            // ... skip it
278
            return XMLProcessor::PROCESSING_CONTINUE;
279
        }
280
281 87
        $this->numReadRows++;
282
283
        // If needed, we fill the empty cells
284 87
        if ($this->numColumns === 0) {
285 51
            $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
286 51
        }
287
288
        // at this point, we have all the data we need for the row
289
        // so that we can populate the buffer
290 87
        return XMLProcessor::PROCESSING_STOP;
291
    }
292
293
    /**
294
     * @return int A return code that indicates what action should the processor take next
295
     */
296 87
    protected function processWorksheetEndingNode()
297
    {
298
        // The closing "</worksheet>" marks the end of the file
299 87
        $this->hasReachedEndOfFile = true;
300
301 87
        return XMLProcessor::PROCESSING_STOP;
302
    }
303
304
    /**
305
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
306
     * @return int Row index
307
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
308
     */
309 87
    protected function getRowIndex($xmlReader)
310
    {
311
        // Get "r" attribute if present (from something like <row r="3"...>
312 87
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
313
314 87
        return ($currentRowIndex !== null) ?
315 87
                intval($currentRowIndex) :
316 87
                $this->lastRowIndexProcessed + 1;
317
    }
318
319
    /**
320
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
321
     * @return int Column index
322
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
323
     */
324 87
    protected function getColumnIndex($xmlReader)
325
    {
326
        // Get "r" attribute if present (from something like <c r="A1"...>
327 87
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
328
329 87
        return ($currentCellIndex !== null) ?
330 87
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
331 87
                $this->lastColumnIndexProcessed + 1;
332
    }
333
334
    /**
335
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
336
     *
337
     * @param \DOMNode $node
338
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
339
     */
340 87
    protected function getCellValue($node)
341
    {
342 87
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
343
    }
344
345
    /**
346
     * @param array $rowData
347
     * @return bool Whether the given row is empty
348
     */
349 84
    protected function isEmptyRow($rowData)
350
    {
351 84
        return (count($rowData) === 1 && $rowData[0] === '');
352
    }
353
354
    /**
355
     * Return the current element, either an empty row or from the buffer.
356
     * @link http://php.net/manual/en/iterator.current.php
357
     *
358
     * @return array|null
359
     */
360 87
    public function current()
361
    {
362 87
        $rowDataForRowToBeProcessed = $this->rowDataBuffer;
363
364 87
        if ($this->shouldPreserveEmptyRows) {
365
            // when we need to preserve empty rows, we will either return
366
            // an empty row or the last row read. This depends whether the
367
            // index of last row that was read matches the index of the last
368
            // row whose value should be returned.
369 3
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
370
                // return empty row if mismatch between last processed row
371
                // and the row that needs to be returned
372 3
                $rowDataForRowToBeProcessed = [''];
373 3
            }
374 3
        }
375
376 87
        return $rowDataForRowToBeProcessed;
377
    }
378
379
    /**
380
     * Return the key of the current element. Here, the row index.
381
     * @link http://php.net/manual/en/iterator.key.php
382
     *
383
     * @return int
384
     */
385 84
    public function key()
386
    {
387
        // TODO: This should return $this->nextRowIndexToBeProcessed
388
        //       but to avoid a breaking change, the return value for
389
        //       this function has been kept as the number of rows read.
390 84
        return $this->shouldPreserveEmptyRows ?
391 84
                $this->nextRowIndexToBeProcessed :
392 84
                $this->numReadRows;
393
    }
394
395
396
    /**
397
     * Cleans up what was created to iterate over the object.
398
     *
399
     * @return void
400
     */
401 93
    public function end()
402
    {
403 93
        $this->xmlReader->close();
404 93
    }
405
}
406