Completed
Push — master ( 606103...99816b )
by Adrien
02:35
created

RowIterator::processWorksheetEndingNode()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 7
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\XMLProcessingException;
7
use Box\Spout\Reader\IteratorInterface;
8
use Box\Spout\Reader\Wrapper\XMLReader;
9
use Box\Spout\Reader\XLSX\Helper\CellHelper;
10
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
11
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
12
use Box\Spout\Reader\Common\XMLProcessor;
13
14
/**
15
 * Class RowIterator
16
 *
17
 * @package Box\Spout\Reader\XLSX
18
 */
19
class RowIterator implements IteratorInterface
20
{
21
    /** Definition of XML nodes names used to parse data */
22
    const XML_NODE_DIMENSION = 'dimension';
23
    const XML_NODE_WORKSHEET = 'worksheet';
24
    const XML_NODE_ROW = 'row';
25
    const XML_NODE_CELL = 'c';
26
27
    /** Definition of XML attributes used to parse data */
28
    const XML_ATTRIBUTE_REF = 'ref';
29
    const XML_ATTRIBUTE_SPANS = 'spans';
30
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
31
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
32
33
    /** @var string Path of the XLSX file being read */
34
    protected $filePath;
35
36
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
37
    protected $sheetDataXMLFilePath;
38
39
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
40
    protected $xmlReader;
41
42
    /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
43
    protected $xmlProcessor;
44
45
    /** @var Helper\CellValueFormatter Helper to format cell values */
46
    protected $cellValueFormatter;
47
48
    /** @var Helper\StyleHelper $styleHelper Helper to work with styles */
49
    protected $styleHelper;
50
51
    /**
52
     * TODO: This variable can be deleted when row indices get preserved
53
     * @var int Number of read rows
54
     */
55
    protected $numReadRows = 0;
56
57
    /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */
58
    protected $currentlyProcessedRowData = [];
59
60
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
61
    protected $rowDataBuffer = null;
62
63
    /** @var bool Indicates whether all rows have been read */
64
    protected $hasReachedEndOfFile = false;
65
66
    /** @var int The number of columns the sheet has (0 meaning undefined) */
67
    protected $numColumns = 0;
68
69
    /** @var bool Whether empty rows should be returned or skipped */
70
    protected $shouldPreserveEmptyRows;
71
72
    /** @var int Last row index processed (one-based) */
73
    protected $lastRowIndexProcessed = 0;
74
75
    /** @var int Row index to be processed next (one-based) */
76
    protected $nextRowIndexToBeProcessed = 0;
77
78
    /** @var int Last column index processed (zero-based) */
79
    protected $lastColumnIndexProcessed = -1;
80
81
    /**
82
     * @param string $filePath Path of the XLSX file being read
83
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
84
     * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options
85
     * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
86
     */
87 99
    public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper)
88
    {
89 99
        $this->filePath = $filePath;
90 99
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
91
92 99
        $this->xmlReader = new XMLReader();
93
94 99
        $this->styleHelper = new StyleHelper($filePath);
95 99
        $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates());
96
97 99
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
98
99
        // Register all callbacks to process different nodes when reading the XML file
100 99
        $this->xmlProcessor = new XMLProcessor($this->xmlReader);
101 99
        $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']);
102 99
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']);
103 99
        $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
104 99
        $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
105 99
        $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']);
106 99
    }
107
108
    /**
109
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
110
     * @return string Path of the XML file containing the sheet data,
111
     *                without the leading slash.
112
     */
113 99
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
114
    {
115 99
        return ltrim($sheetDataXMLFilePath, '/');
116
    }
117
118
    /**
119
     * Rewind the Iterator to the first element.
120
     * Initializes the XMLReader object that reads the associated sheet data.
121
     * The XMLReader is configured to be safe from billion laughs attack.
122
     * @link http://php.net/manual/en/iterator.rewind.php
123
     *
124
     * @return void
125
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
126
     */
127 96
    public function rewind()
128
    {
129 96
        $this->xmlReader->close();
130
131 96
        if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) {
132 3
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
133
        }
134
135 93
        $this->numReadRows = 0;
136 93
        $this->lastRowIndexProcessed = 0;
137 93
        $this->nextRowIndexToBeProcessed = 0;
138 93
        $this->rowDataBuffer = null;
139 93
        $this->hasReachedEndOfFile = false;
140 93
        $this->numColumns = 0;
141
142 93
        $this->next();
143 93
    }
144
145
    /**
146
     * Checks if current position is valid
147
     * @link http://php.net/manual/en/iterator.valid.php
148
     *
149
     * @return bool
150
     */
151 93
    public function valid()
152
    {
153 93
        return (!$this->hasReachedEndOfFile);
154
    }
155
156
    /**
157
     * Move forward to next element. Reads data describing the next unprocessed row.
158
     * @link http://php.net/manual/en/iterator.next.php
159
     *
160
     * @return void
161
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
162
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
163
     */
164 93
    public function next()
165
    {
166 93
        $this->nextRowIndexToBeProcessed++;
167
168 93
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
169 93
            $this->readDataForNextRow();
170 93
        }
171 93
    }
172
173
    /**
174
     * Returns whether we need data for the next row to be processed.
175
     * We don't need to read data if:
176
     *   we have already read at least one row
177
     *     AND
178
     *   we need to preserve empty rows
179
     *     AND
180
     *   the last row that was read is not the row that need to be processed
181
     *   (i.e. if we need to return empty rows)
182
     *
183
     * @return bool Whether we need data for the next row to be processed.
184
     */
185 93
    protected function doesNeedDataForNextRowToBeProcessed()
186
    {
187 93
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
188
189
        return (
190 93
            !$hasReadAtLeastOneRow ||
191 87
            !$this->shouldPreserveEmptyRows ||
192 3
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
193 93
        );
194
    }
195
196
    /**
197
     * @return void
198
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
199
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
200
     */
201 93
    protected function readDataForNextRow()
202
    {
203 93
        $this->currentlyProcessedRowData = [];
204
205
        try {
206 93
            $this->xmlProcessor->readUntilStopped();
207 93
        } catch (XMLProcessingException $exception) {
208
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
209
        }
210
211 93
        $this->rowDataBuffer = $this->currentlyProcessedRowData;
212 93
    }
213
214
    /**
215
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
216
     * @return int A return code that indicates what action should the processor take next
217
     */
218 45
    protected function processDimensionStartingNode($xmlReader)
219
    {
220
        // Read dimensions of the sheet
221 45
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
222 45
        if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
223 36
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
224 36
        }
225
226 45
        return XMLProcessor::PROCESSING_CONTINUE;
227
    }
228
229
    /**
230
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
231
     * @return int A return code that indicates what action should the processor take next
232
     */
233 90
    protected function processRowStartingNode($xmlReader)
234
    {
235
        // Reset index of the last processed column
236 90
        $this->lastColumnIndexProcessed = -1;
237
238
        // Mark the last processed row as the one currently being read
239 90
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
240
241
        // Read spans info if present
242 90
        $numberOfColumnsForRow = $this->numColumns;
243 90
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
244 90
        if ($spans) {
245 30
            list(, $numberOfColumnsForRow) = explode(':', $spans);
246 30
            $numberOfColumnsForRow = intval($numberOfColumnsForRow);
247 30
        }
248
249 90
        $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
250
251 90
        return XMLProcessor::PROCESSING_CONTINUE;
252
    }
253
254
    /**
255
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
256
     * @return int A return code that indicates what action should the processor take next
257
     */
258 90
    protected function processCellStartingNode($xmlReader)
259
    {
260 90
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
261
262
        // NOTE: expand() will automatically decode all XML entities of the child nodes
263 90
        $node = $xmlReader->expand();
264 90
        $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node);
265 90
        $this->lastColumnIndexProcessed = $currentColumnIndex;
266
267 90
        return XMLProcessor::PROCESSING_CONTINUE;
268
    }
269
270
    /**
271
     * @return int A return code that indicates what action should the processor take next
272
     */
273 90
    protected function processRowEndingNode()
274
    {
275
        // if the fetched row is empty and we don't want to preserve it..,
276 90
        if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) {
277
            // ... skip it
278
            return XMLProcessor::PROCESSING_CONTINUE;
279
        }
280
281 90
        $this->numReadRows++;
282
283
        // If needed, we fill the empty cells
284 90
        if ($this->numColumns === 0) {
285 54
            $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData);
286 54
        }
287
288
        // at this point, we have all the data we need for the row
289
        // so that we can populate the buffer
290 90
        return XMLProcessor::PROCESSING_STOP;
291
    }
292
293
    /**
294
     * @return int A return code that indicates what action should the processor take next
295
     */
296 90
    protected function processWorksheetEndingNode()
297
    {
298
        // The closing "</worksheet>" marks the end of the file
299 90
        $this->hasReachedEndOfFile = true;
300
301 90
        return XMLProcessor::PROCESSING_STOP;
302
    }
303
304
    /**
305
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
306
     * @return int Row index
307
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
308
     */
309 90
    protected function getRowIndex($xmlReader)
310
    {
311
        // Get "r" attribute if present (from something like <row r="3"...>
312 90
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
313
314 90
        return ($currentRowIndex !== null) ?
315 90
                intval($currentRowIndex) :
316 90
                $this->lastRowIndexProcessed + 1;
317
    }
318
319
    /**
320
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
321
     * @return int Column index
322
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
323
     */
324 90
    protected function getColumnIndex($xmlReader)
325
    {
326
        // Get "r" attribute if present (from something like <c r="A1"...>
327 90
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
328
329 90
        return ($currentCellIndex !== null) ?
330 90
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
331 90
                $this->lastColumnIndexProcessed + 1;
332
    }
333
334
    /**
335
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
336
     *
337
     * @param \DOMNode $node
338
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
339
     */
340 90
    protected function getCellValue($node)
341
    {
342 90
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
343
    }
344
345
    /**
346
     * @param array $rowData
347
     * @return bool Whether the given row is empty
348
     */
349 87
    protected function isEmptyRow($rowData)
350
    {
351 87
        return (count($rowData) === 1 && $rowData[0] === '');
352
    }
353
354
    /**
355
     * Return the current element, either an empty row or from the buffer.
356
     * @link http://php.net/manual/en/iterator.current.php
357
     *
358
     * @return array|null
359
     */
360 90
    public function current()
361
    {
362 90
        $rowDataForRowToBeProcessed = $this->rowDataBuffer;
363
364 90
        if ($this->shouldPreserveEmptyRows) {
365
            // when we need to preserve empty rows, we will either return
366
            // an empty row or the last row read. This depends whether the
367
            // index of last row that was read matches the index of the last
368
            // row whose value should be returned.
369 3
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
370
                // return empty row if mismatch between last processed row
371
                // and the row that needs to be returned
372 3
                $rowDataForRowToBeProcessed = [''];
373 3
            }
374 3
        }
375
376 90
        return $rowDataForRowToBeProcessed;
377
    }
378
379
    /**
380
     * Return the key of the current element. Here, the row index.
381
     * @link http://php.net/manual/en/iterator.key.php
382
     *
383
     * @return int
384
     */
385 87
    public function key()
386
    {
387
        // TODO: This should return $this->nextRowIndexToBeProcessed
388
        //       but to avoid a breaking change, the return value for
389
        //       this function has been kept as the number of rows read.
390 87
        return $this->shouldPreserveEmptyRows ?
391 87
                $this->nextRowIndexToBeProcessed :
392 87
                $this->numReadRows;
393
    }
394
395
396
    /**
397
     * Cleans up what was created to iterate over the object.
398
     *
399
     * @return void
400
     */
401 96
    public function end()
402
    {
403 96
        $this->xmlReader->close();
404 96
    }
405
}
406