Completed
Pull Request — master (#331)
by Adrien
02:57
created

RowIterator::getNumRowsRepeatedForCurrentNode()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 5
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 3
nc 2
nop 1
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
12
/**
13
 * Class RowIterator
14
 *
15
 * @package Box\Spout\Reader\ODS
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /** Definition of XML nodes names used to parse data */
20
    const XML_NODE_TABLE = 'table:table';
21
    const XML_NODE_ROW = 'table:table-row';
22
    const XML_NODE_CELL = 'table:table-cell';
23
    const MAX_COLUMNS_EXCEL = 16384;
24
25
    /** Definition of XML attribute used to parse data */
26
    const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
27
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
28
29
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
30
    protected $xmlReader;
31
32
    /** @var bool Whether empty rows should be returned or skipped */
33
    protected $shouldPreserveEmptyRows;
34
35
    /** @var Helper\CellValueFormatter Helper to format cell values */
36
    protected $cellValueFormatter;
37
38
    /** @var bool Whether the iterator has already been rewound once */
39
    protected $hasAlreadyBeenRewound = false;
40
41
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
42
    protected $rowDataBuffer = null;
43
44
    /** @var bool Indicates whether all rows have been read */
45
    protected $hasReachedEndOfFile = false;
46
47
    /** @var int Last row index processed (one-based) */
48
    protected $lastRowIndexProcessed = 0;
49
50
    /** @var int Row index to be processed next (one-based) */
51
    protected $nextRowIndexToBeProcessed = 1;
52
53
    /** @var mixed|null Value of the last processed cell (because when reading cell at column N+1, cell N is processed) */
54
    protected $lastProcessedCellValue = null;
55
56
    /** @var int Number of times the last processed row should be repeated */
57
    protected $numRowsRepeated = 1;
58
59
    /** @var int Number of times the last cell value should be copied to the cells on its right */
60
    protected $numColumnsRepeated = 1;
61
62
    /** @var bool Whether at least one cell has been read for the row currently being processed */
63
    protected $hasAlreadyReadOneCellInCurrentRow = false;
64
65
66
    /**
67
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
68
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
69
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
70
     */
71 75
    public function __construct($xmlReader, $shouldFormatDates, $shouldPreserveEmptyRows)
72
    {
73 75
        $this->xmlReader = $xmlReader;
74 75
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
75 75
        $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
76 75
    }
77
78
    /**
79
     * Rewind the Iterator to the first element.
80
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
81
     * @link http://php.net/manual/en/iterator.rewind.php
82
     *
83
     * @return void
84
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
85
     */
86 75
    public function rewind()
87
    {
88
        // Because sheet and row data is located in the file, we can't rewind both the
89
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
90
        // Therefore, rewinding the row iterator has been disabled.
91 75
        if ($this->hasAlreadyBeenRewound) {
92 3
            throw new IteratorNotRewindableException();
93
        }
94
95 75
        $this->hasAlreadyBeenRewound = true;
96 75
        $this->lastRowIndexProcessed = 0;
97 75
        $this->nextRowIndexToBeProcessed = 1;
98 75
        $this->rowDataBuffer = null;
99 75
        $this->hasReachedEndOfFile = false;
100
101 75
        $this->next();
102 75
    }
103
104
    /**
105
     * Checks if current position is valid
106
     * @link http://php.net/manual/en/iterator.valid.php
107
     *
108
     * @return bool
109
     */
110 75
    public function valid()
111
    {
112 75
        return (!$this->hasReachedEndOfFile);
113
    }
114
115
    /**
116
     * Move forward to next element. Empty rows will be skipped.
117
     * @link http://php.net/manual/en/iterator.next.php
118
     *
119
     * @return void
120
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
121
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
122
     */
123 75
    public function next()
124
    {
125 75
        if ($this->doesNeedDataForNextRowToBeProcessed()) {
126 75
            $this->readDataForNextRow($this->xmlReader);
127 75
        }
128
129 75
        $this->lastRowIndexProcessed++;
130 75
    }
131
132
    /**
133
     * Returns whether we need data for the next row to be processed.
134
     * We don't need to read data if:
135
     *   we have already read at least one row
136
     *     AND
137
     *   we need to preserve empty rows
138
     *     AND
139
     *   the last row that was read is not the row that need to be processed
140
     *   (i.e. if we need to return empty rows)
141
     *
142
     * @return bool Whether we need data for the next row to be processed.
143
     */
144 75
    protected function doesNeedDataForNextRowToBeProcessed()
145
    {
146 75
        $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0);
147
148
        return (
149 75
            !$hasReadAtLeastOneRow ||
150 66
            !$this->shouldPreserveEmptyRows ||
151 3
            $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
152 75
        );
153
    }
154
155
    /**
156
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
157
     * @return void
158
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
159
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
160
     */
161 75
    protected function readDataForNextRow($xmlReader)
162
    {
163 75
        $rowData = [];
164
165
        try {
166 75
            while ($xmlReader->read()) {
167 75
                if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
168 75
                    $this->processRowStartingNode($xmlReader);
169
170 75
                } else if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
171 75
                    $rowData = $this->processCellStartingNode($xmlReader, $rowData);
172
173 75
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
174 75
                    $isEmptyRow = $this->isEmptyRow($rowData, $this->lastProcessedCellValue);
175
176
                    // if the fetched row is empty and we don't want to preserve it...
177 75
                    if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
178
                        // ... skip it
179 15
                        continue;
180
                    }
181
182 72
                    $rowData = $this->processRowEndingNode($rowData, $isEmptyRow);
183
184
                    // at this point, we have all the data we need for the row
185
                    // so that we can populate the buffer
186 72
                    break;
187
188 75
                } else if ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
189 69
                    $this->processTableEndingNode();
190 69
                    break;
191
                }
192 75
            }
193
194 75
        } catch (XMLProcessingException $exception) {
195
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
196
        }
197
198 75
        $this->rowDataBuffer = $rowData;
199 75
    }
200
201
    /**
202
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
203
     * @return array
0 ignored issues
show
Documentation introduced by
Should the return type not be array|null?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
204
     */
205 75
    protected function processRowStartingNode($xmlReader)
206
    {
207
        // Reset data from current row
208 75
        $this->hasAlreadyReadOneCellInCurrentRow = false;
209 75
        $this->lastProcessedCellValue = null;
210 75
        $this->numColumnsRepeated = 1;
211 75
        $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader);
212 75
    }
213
214
    /**
215
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
216
     * @param array $rowData Data of all cells read so far
217
     * @return array Original row data + data for the cell that was just read
218
     */
219 75
    protected function processCellStartingNode($xmlReader, $rowData)
220
    {
221 75
        $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
222
223 75
        $node = $xmlReader->expand();
224 75
        $currentCellValue = $this->getCellValue($node);
225
226
        // process cell N only after having read cell N+1 (see below why)
227 75
        if ($this->hasAlreadyReadOneCellInCurrentRow) {
228 60
            for ($i = 0; $i < $this->numColumnsRepeated; $i++) {
229 60
                $rowData[] = $this->lastProcessedCellValue;
230 60
            }
231 60
        }
232
233 75
        $this->hasAlreadyReadOneCellInCurrentRow = true;
234 75
        $this->lastProcessedCellValue = $currentCellValue;
235 75
        $this->numColumnsRepeated = $currentNumColumnsRepeated;
236
237 75
        return $rowData;
238
    }
239
240
    /**
241
     * @param array $rowData Data of all cells read so far
242
     * @param bool $isEmptyRow Whether the given row is empty
243
     * @return array
244
     */
245 72
    protected function processRowEndingNode($rowData, $isEmptyRow)
246
    {
247
        // if the row is empty, we don't want to return more than one cell
248 72
        $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1;
249
250
        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
251
        // The current count of read columns is determined by counting the values in $rowData.
252
        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
253
        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
254
        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
255
        // always 16384 cells.
256 72
        if ((count($rowData) + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
257 66
            for ($i = 0; $i < $actualNumColumnsRepeated; $i++) {
258 66
                $rowData[] = $this->lastProcessedCellValue;
259 66
            }
260 66
        }
261
262
        // If we are processing row N and the row is repeated M times,
263
        // then the next row to be processed will be row (N+M).
264 72
        $this->nextRowIndexToBeProcessed += $this->numRowsRepeated;
265
266 72
        return $rowData;
267
    }
268
269
    /**
270
     * @return void
271
     */
272 69
    protected function processTableEndingNode()
273
    {
274
        // The closing "</table:table>" marks the end of the file
275 69
        $this->hasReachedEndOfFile = true;
276 69
    }
277
278
    /**
279
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
280
     * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
281
     */
282 75
    protected function getNumRowsRepeatedForCurrentNode($xmlReader)
283
    {
284 75
        $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
285 75
        return ($numRowsRepeated !== null) ? intval($numRowsRepeated) : 1;
286
    }
287
288
    /**
289
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
290
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
291
     */
292 75
    protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
293
    {
294 75
        $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
295 75
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
296
    }
297
298
    /**
299
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
300
     *
301
     * @param \DOMNode $node
302
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
303
     */
304 75
    protected function getCellValue($node)
305
    {
306 75
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
307
    }
308
309
    /**
310
     * After finishing processing each cell, a row is considered empty if it contains
311
     * no cells or if the value of the last read cell is an empty string.
312
     * After finishing processing each cell, the last read cell is not part of the
313
     * row data yet (as we still need to apply the "num-columns-repeated" attribute).
314
     *
315
     * @param array $rowData
316
     * @param string|int|float|bool|\DateTime|\DateInterval|null The value of the last read cell
317
     * @return bool Whether the row is empty
318
     */
319 75
    protected function isEmptyRow($rowData, $lastReadCellValue)
320
    {
321
        return (
322 75
            count($rowData) === 0 &&
323 33
            (!isset($lastReadCellValue) || trim($lastReadCellValue) === '')
324 75
        );
325
    }
326
327
    /**
328
     * Return the current element, from the buffer.
329
     * @link http://php.net/manual/en/iterator.current.php
330
     *
331
     * @return array|null
332
     */
333 72
    public function current()
334
    {
335 72
        return $this->rowDataBuffer;
336
    }
337
338
    /**
339
     * Return the key of the current element
340
     * @link http://php.net/manual/en/iterator.key.php
341
     *
342
     * @return int
343
     */
344 66
    public function key()
345
    {
346 66
        return $this->lastRowIndexProcessed;
347
    }
348
349
350
    /**
351
     * Cleans up what was created to iterate over the object.
352
     *
353
     * @return void
354
     */
355
    public function end()
356
    {
357
        $this->xmlReader->close();
358
    }
359
}
360