Completed
Pull Request — master (#331)
by Adrien
02:57
created

RowIterator::processRowStartingNode()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 10
cts 10
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 9
nc 4
nop 1
crap 3
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\XMLProcessingException;
7
use Box\Spout\Reader\IteratorInterface;
8
use Box\Spout\Reader\Wrapper\XMLReader;
9
use Box\Spout\Reader\XLSX\Helper\CellHelper;
10
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
11
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\XLSX
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_DIMENSION = 'dimension';
22
    const XML_NODE_WORKSHEET = 'worksheet';
23
    const XML_NODE_ROW = 'row';
24
    const XML_NODE_CELL = 'c';
25
26
    /** Definition of XML attributes used to parse data */
27
    const XML_ATTRIBUTE_REF = 'ref';
28
    const XML_ATTRIBUTE_SPANS = 'spans';
29
    const XML_ATTRIBUTE_ROW_INDEX = 'r';
30
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
31
32
    /** @var string Path of the XLSX file being read */
33
    protected $filePath;
34
35
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
36
    protected $sheetDataXMLFilePath;
37
38
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
39
    protected $xmlReader;
40
41
    /** @var Helper\CellValueFormatter Helper to format cell values */
42
    protected $cellValueFormatter;
43
44
    /** @var Helper\StyleHelper $styleHelper Helper to work with styles */
45
    protected $styleHelper;
46
47
    /**
48
     * TODO: This variable can be deleted when row indices get preserved
49
     * @var int Number of read rows
50
     */
51
    protected $numReadRows = 0;
52
53
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
54
    protected $rowDataBuffer = null;
55
56
    /** @var bool Indicates whether all rows have been read */
57
    protected $hasReachedEndOfFile = false;
58
59
    /** @var int The number of columns the sheet has (0 meaning undefined) */
60
    protected $numColumns = 0;
61
62
    /** @var bool Whether empty rows should be returned or skipped */
63
    protected $shouldPreserveEmptyRows;
64
65
    /** @var int Last row index processed (one-based) */
66
    protected $lastRowIndexProcessed = 0;
67
68
    /** @var int Row index to be processed next (one-based) */
69
    protected $nextRowIndexToBeProcessed = 0;
70
71
    /** @var int Last column index processed (zero-based) */
72
    protected $lastColumnIndexProcessed = -1;
73
74
    /**
75
     * @param string $filePath Path of the XLSX file being read
76
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
77
     * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
78
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
79
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
80
     */
81 96
    public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates, $shouldPreserveEmptyRows)
82
    {
83 96
        $this->filePath = $filePath;
84 96
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
85
86 96
        $this->xmlReader = new XMLReader();
87
88 96
        $this->styleHelper = new StyleHelper($filePath);
89 96
        $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
90
91 96
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
92 96
    }
93
94
    /**
95
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
96
     * @return string Path of the XML file containing the sheet data,
97
     *                without the leading slash.
98
     */
99 96
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
100
    {
101 96
        return ltrim($sheetDataXMLFilePath, '/');
102
    }
103
104
    /**
105
     * Rewind the Iterator to the first element.
106
     * Initializes the XMLReader object that reads the associated sheet data.
107
     * The XMLReader is configured to be safe from billion laughs attack.
108
     * @link http://php.net/manual/en/iterator.rewind.php
109
     *
110
     * @return void
111
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
112
     */
113 93
    public function rewind()
114
    {
115 93
        $this->xmlReader->close();
116
117 93
        $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
118 93
        if ($this->xmlReader->open($sheetDataFilePath) === false) {
119 3
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
120
        }
121
122 90
        $this->numReadRows = 0;
123 90
        $this->lastRowIndexProcessed = 0;
124 90
        $this->nextRowIndexToBeProcessed = 0;
125 90
        $this->rowDataBuffer = null;
126 90
        $this->hasReachedEndOfFile = false;
127 90
        $this->numColumns = 0;
128
129 90
        $this->next();
130 90
    }
131
132
    /**
133
     * Checks if current position is valid
134
     * @link http://php.net/manual/en/iterator.valid.php
135
     *
136
     * @return bool
137
     */
138 90
    public function valid()
139
    {
140 90
        return (!$this->hasReachedEndOfFile);
141
    }
142
143
    /**
144
     * Move forward to next element. Reads data describing the next unprocessed row.
145
     * @link http://php.net/manual/en/iterator.next.php
146
     *
147
     * @return void
148
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
149
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
150
     */
151 90
    public function next()
152
    {
153 90
        $this->nextRowIndexToBeProcessed++;
154
155 90
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
156 90
            $this->readDataForNextRow($this->xmlReader);
157 90
        }
158 90
    }
159
160
    /**
161
     * Returns whether we need data for the next row to be processed.
162
     * We don't need to read data if:
163
     *   we have already read at least one row
164
     *     AND
165
     *   we need to preserve empty rows
166
     *     AND
167
     *   the last row that was read is not the row that need to be processed
168
     *   (i.e. if we need to return empty rows)
169
     *
170
     * @return bool Whether we need data for the next row to be processed.
171
     */
172 90
    protected function doesNeedDataForNextRowToBeProcessed()
173
    {
174 90
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
175
176
        return (
177 90
            !$hasReadAtLeastOneRow ||
178 84
            !$this->shouldPreserveEmptyRows ||
179 3
            $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
180 90
        );
181
    }
182
183
    /**
184
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
185
     * @return void
186
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
187
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
188
     */
189 90
    protected function readDataForNextRow($xmlReader)
190
    {
191 90
        $rowData = [];
192
193
        try {
194 90
            while ($xmlReader->read()) {
195 90
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
196 45
                    $this->processDimensionStartingNode($xmlReader);
197
198 90
                } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
199 87
                    $rowData = $this->processRowStartingNode($xmlReader);
200
201 90
                } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
202 87
                    $rowData = $this->processCellStartingNode($xmlReader, $rowData);
203
204 90
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
205
                    // if the fetched row is empty and we don't want to preserve it..,
206 87
                    if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($rowData)) {
207
                        // ... skip it
208
                        continue;
209
                    }
210
211 87
                    $rowData = $this->processRowEndingNode($rowData);
212
213
                    // at this point, we have all the data we need for the row
214
                    // so that we can populate the buffer
215 87
                    break;
216
217 90
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
218 87
                    $this->processWorksheetEndingNode();
219 87
                    break;
220
                }
221 90
            }
222
223 90
        } catch (XMLProcessingException $exception) {
224
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
225
        }
226
227 90
        $this->rowDataBuffer = $rowData;
228 90
    }
229
230
    /**
231
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
232
     * @return void
233
     */
234 45
    protected function processDimensionStartingNode($xmlReader)
235
    {
236
        // Read dimensions of the sheet
237 45
        $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
238 45
        if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
239 36
            $lastCellIndex = $matches[1];
240 36
            $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
241 36
        }
242 45
    }
243
244
    /**
245
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
246
     * @return array
247
     */
248 87
    protected function processRowStartingNode($xmlReader)
249
    {
250
        // Reset index of the last processed column
251 87
        $this->lastColumnIndexProcessed = -1;
252
253
        // Mark the last processed row as the one currently being read
254 87
        $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader);
255
256
        // Read spans info if present
257 87
        $numberOfColumnsForRow = $this->numColumns;
258 87
        $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
259 87
        if ($spans) {
260 30
            list(, $numberOfColumnsForRow) = explode(':', $spans);
261 30
            $numberOfColumnsForRow = intval($numberOfColumnsForRow);
262 30
        }
263
264 87
        return ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
265
    }
266
267
    /**
268
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
269
     * @param array $rowData Data of all cells read so far (key = cell index, value = cell value)
270
     * @return array Original row data + data for the cell that was just read (key = cell index, value = cell value)
271
     */
272 87
    protected function processCellStartingNode($xmlReader, $rowData)
273
    {
274 87
        $currentColumnIndex = $this->getColumnIndex($xmlReader);
275
276 87
        $node = $xmlReader->expand();
277 87
        $rowData[$currentColumnIndex] = $this->getCellValue($node);
278
279 87
        $this->lastColumnIndexProcessed = $currentColumnIndex;
280
281 87
        return $rowData;
282
    }
283
284
    /**
285
     * @param array $rowData Data of all cells read so far (key = cell index, value = cell value)
286
     * @return array
287
     */
288 87
    protected function processRowEndingNode($rowData)
289
    {
290 87
        $this->numReadRows++;
291
292
        // If needed, we fill the empty cells
293 87
        return ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
294
    }
295
296
    /**
297
     * @return void
298
     */
299 87
    protected function processWorksheetEndingNode()
300
    {
301
        // The closing "</worksheet>" marks the end of the file
302 87
        $this->hasReachedEndOfFile = true;
303 87
    }
304
305
    /**
306
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
307
     * @return int Row index
308
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
309
     */
310 87
    protected function getRowIndex($xmlReader)
311
    {
312
        // Get "r" attribute if present (from something like <row r="3"...>
313 87
        $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
314
315 87
        return ($currentRowIndex !== null) ?
316 87
                intval($currentRowIndex) :
317 87
                $this->lastRowIndexProcessed + 1;
318
    }
319
320
    /**
321
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
322
     * @return int Column index
323
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
324
     */
325 87
    protected function getColumnIndex($xmlReader)
326
    {
327
        // Get "r" attribute if present (from something like <c r="A1"...>
328 87
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
329
330 87
        return ($currentCellIndex !== null) ?
331 87
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
332 87
                $this->lastColumnIndexProcessed + 1;
333
    }
334
335
    /**
336
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
337
     *
338
     * @param \DOMNode $node
339
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
340
     */
341 87
    protected function getCellValue($node)
342
    {
343 87
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
344
    }
345
346
    /**
347
     * @param array $rowData
348
     * @return bool Whether the given row is empty
349
     */
350 84
    protected function isEmptyRow($rowData)
351
    {
352 84
        return (count($rowData) === 1 && $rowData[0] === '');
353
    }
354
355
    /**
356
     * Return the current element, either an empty row or from the buffer.
357
     * @link http://php.net/manual/en/iterator.current.php
358
     *
359
     * @return array|null
360
     */
361 87
    public function current()
362
    {
363 87
        $rowDataForRowToBeProcessed = $this->rowDataBuffer;
364
365 87
        if ($this->shouldPreserveEmptyRows) {
366
            // when we need to preserve empty rows, we will either return
367
            // an empty row or the last row read. This depends whether the
368
            // index of last row that was read matches the index of the last
369
            // row whose value should be returned.
370 3
            if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
371
                // return empty row if mismatch between last processed row
372
                // and the row that needs to be returned
373 3
                $rowDataForRowToBeProcessed = [''];
374 3
            }
375 3
        }
376
377 87
        return $rowDataForRowToBeProcessed;
378
    }
379
380
    /**
381
     * Return the key of the current element. Here, the row index.
382
     * @link http://php.net/manual/en/iterator.key.php
383
     *
384
     * @return int
385
     */
386 84
    public function key()
387
    {
388
        // TODO: This should return $this->nextRowIndexToBeProcessed
389
        //       but to avoid a breaking change, the return value for
390
        //       this function has been kept as the number of rows read.
391 84
        return $this->shouldPreserveEmptyRows ?
392 84
                $this->nextRowIndexToBeProcessed :
393 84
                $this->numReadRows;
394
    }
395
396
397
    /**
398
     * Cleans up what was created to iterate over the object.
399
     *
400
     * @return void
401
     */
402 93
    public function end()
403
    {
404 93
        $this->xmlReader->close();
405 93
    }
406
}
407