1 | <?php |
||
19 | class RowIterator implements IteratorInterface |
||
20 | { |
||
21 | /** Definition of XML nodes names used to parse data */ |
||
22 | const XML_NODE_DIMENSION = 'dimension'; |
||
23 | const XML_NODE_WORKSHEET = 'worksheet'; |
||
24 | const XML_NODE_ROW = 'row'; |
||
25 | const XML_NODE_CELL = 'c'; |
||
26 | |||
27 | /** Definition of XML attributes used to parse data */ |
||
28 | const XML_ATTRIBUTE_REF = 'ref'; |
||
29 | const XML_ATTRIBUTE_SPANS = 'spans'; |
||
30 | const XML_ATTRIBUTE_ROW_INDEX = 'r'; |
||
31 | const XML_ATTRIBUTE_CELL_INDEX = 'r'; |
||
32 | |||
33 | /** @var string Path of the XLSX file being read */ |
||
34 | protected $filePath; |
||
35 | |||
36 | /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */ |
||
37 | protected $sheetDataXMLFilePath; |
||
38 | |||
39 | /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ |
||
40 | protected $xmlReader; |
||
41 | |||
42 | /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ |
||
43 | protected $xmlProcessor; |
||
44 | |||
45 | /** @var Helper\CellValueFormatter Helper to format cell values */ |
||
46 | protected $cellValueFormatter; |
||
47 | |||
48 | /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ |
||
49 | protected $styleHelper; |
||
50 | |||
51 | /** |
||
52 | * TODO: This variable can be deleted when row indices get preserved |
||
53 | * @var int Number of read rows |
||
54 | */ |
||
55 | protected $numReadRows = 0; |
||
56 | |||
57 | /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ |
||
58 | protected $currentlyProcessedRowData = []; |
||
59 | |||
60 | /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ |
||
61 | protected $rowDataBuffer = null; |
||
62 | |||
63 | /** @var bool Indicates whether all rows have been read */ |
||
64 | protected $hasReachedEndOfFile = false; |
||
65 | |||
66 | /** @var int The number of columns the sheet has (0 meaning undefined) */ |
||
67 | protected $numColumns = 0; |
||
68 | |||
69 | /** @var bool Whether empty rows should be returned or skipped */ |
||
70 | protected $shouldPreserveEmptyRows; |
||
71 | |||
72 | /** @var int Last row index processed (one-based) */ |
||
73 | protected $lastRowIndexProcessed = 0; |
||
74 | |||
75 | /** @var int Row index to be processed next (one-based) */ |
||
76 | protected $nextRowIndexToBeProcessed = 0; |
||
77 | |||
78 | /** @var int Last column index processed (zero-based) */ |
||
79 | protected $lastColumnIndexProcessed = -1; |
||
80 | |||
81 | /** |
||
82 | * @param string $filePath Path of the XLSX file being read |
||
83 | * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml |
||
84 | * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options |
||
85 | * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings |
||
86 | */ |
||
87 | 33 | public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper) |
|
88 | { |
||
89 | 33 | $this->filePath = $filePath; |
|
90 | 33 | $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); |
|
91 | |||
92 | 33 | $this->xmlReader = new XMLReader(); |
|
93 | |||
94 | 33 | $this->styleHelper = new StyleHelper($filePath); |
|
95 | 33 | $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates()); |
|
96 | |||
97 | 33 | $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); |
|
98 | |||
99 | // Register all callbacks to process different nodes when reading the XML file |
||
100 | 33 | $this->xmlProcessor = new XMLProcessor($this->xmlReader); |
|
101 | 33 | $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); |
|
102 | 33 | $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); |
|
103 | 33 | $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); |
|
104 | 33 | $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); |
|
105 | 33 | $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); |
|
106 | 33 | } |
|
107 | |||
108 | /** |
||
109 | * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml |
||
110 | * @return string Path of the XML file containing the sheet data, |
||
111 | * without the leading slash. |
||
112 | */ |
||
113 | 33 | protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) |
|
114 | { |
||
115 | 33 | return ltrim($sheetDataXMLFilePath, '/'); |
|
116 | } |
||
117 | |||
118 | /** |
||
119 | * Rewind the Iterator to the first element. |
||
120 | * Initializes the XMLReader object that reads the associated sheet data. |
||
121 | * The XMLReader is configured to be safe from billion laughs attack. |
||
122 | * @link http://php.net/manual/en/iterator.rewind.php |
||
123 | * |
||
124 | * @return void |
||
125 | * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read |
||
126 | */ |
||
127 | 32 | public function rewind() |
|
128 | { |
||
129 | 32 | $this->xmlReader->close(); |
|
130 | |||
131 | 32 | $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath; |
|
132 | 32 | if ($this->xmlReader->open($sheetDataFilePath) === false) { |
|
133 | 1 | throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\"."); |
|
134 | } |
||
135 | |||
136 | 31 | $this->numReadRows = 0; |
|
137 | 31 | $this->lastRowIndexProcessed = 0; |
|
138 | 31 | $this->nextRowIndexToBeProcessed = 0; |
|
139 | 31 | $this->rowDataBuffer = null; |
|
140 | 31 | $this->hasReachedEndOfFile = false; |
|
141 | 31 | $this->numColumns = 0; |
|
142 | |||
143 | 31 | $this->next(); |
|
144 | 31 | } |
|
145 | |||
146 | /** |
||
147 | * Checks if current position is valid |
||
148 | * @link http://php.net/manual/en/iterator.valid.php |
||
149 | * |
||
150 | * @return bool |
||
151 | */ |
||
152 | 31 | public function valid() |
|
153 | { |
||
154 | 31 | return (!$this->hasReachedEndOfFile); |
|
155 | } |
||
156 | |||
157 | /** |
||
158 | * Move forward to next element. Reads data describing the next unprocessed row. |
||
159 | * @link http://php.net/manual/en/iterator.next.php |
||
160 | * |
||
161 | * @return void |
||
162 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found |
||
163 | * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML |
||
164 | */ |
||
165 | 31 | public function next() |
|
166 | { |
||
167 | 31 | $this->nextRowIndexToBeProcessed++; |
|
168 | |||
169 | 31 | if ($this->doesNeedDataForNextRowToBeProcessed()) { |
|
170 | 31 | $this->readDataForNextRow(); |
|
171 | } |
||
172 | 31 | } |
|
173 | |||
174 | /** |
||
175 | * Returns whether we need data for the next row to be processed. |
||
176 | * We don't need to read data if: |
||
177 | * we have already read at least one row |
||
178 | * AND |
||
179 | * we need to preserve empty rows |
||
180 | * AND |
||
181 | * the last row that was read is not the row that need to be processed |
||
182 | * (i.e. if we need to return empty rows) |
||
183 | * |
||
184 | * @return bool Whether we need data for the next row to be processed. |
||
185 | */ |
||
186 | 31 | protected function doesNeedDataForNextRowToBeProcessed() |
|
187 | { |
||
188 | 31 | $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); |
|
189 | |||
190 | return ( |
||
191 | 31 | !$hasReadAtLeastOneRow || |
|
192 | 29 | !$this->shouldPreserveEmptyRows || |
|
193 | 31 | $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed |
|
194 | ); |
||
195 | } |
||
196 | |||
197 | /** |
||
198 | * @return void |
||
199 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found |
||
200 | * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML |
||
201 | */ |
||
202 | 31 | protected function readDataForNextRow() |
|
203 | { |
||
204 | 31 | $this->currentlyProcessedRowData = []; |
|
205 | |||
206 | try { |
||
207 | 31 | $this->xmlProcessor->readUntilStopped(); |
|
208 | } catch (XMLProcessingException $exception) { |
||
209 | throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); |
||
210 | } |
||
211 | |||
212 | 31 | $this->rowDataBuffer = $this->currentlyProcessedRowData; |
|
213 | 31 | } |
|
214 | |||
215 | /** |
||
216 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node |
||
217 | * @return int A return code that indicates what action should the processor take next |
||
218 | */ |
||
219 | 15 | protected function processDimensionStartingNode($xmlReader) |
|
220 | { |
||
221 | // Read dimensions of the sheet |
||
222 | 15 | $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) |
|
223 | 15 | if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) { |
|
224 | 12 | $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1; |
|
225 | } |
||
226 | |||
227 | 15 | return XMLProcessor::PROCESSING_CONTINUE; |
|
228 | } |
||
229 | |||
230 | /** |
||
231 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node |
||
232 | * @return int A return code that indicates what action should the processor take next |
||
233 | */ |
||
234 | 30 | protected function processRowStartingNode($xmlReader) |
|
235 | { |
||
236 | // Reset index of the last processed column |
||
237 | 30 | $this->lastColumnIndexProcessed = -1; |
|
238 | |||
239 | // Mark the last processed row as the one currently being read |
||
240 | 30 | $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); |
|
241 | |||
242 | // Read spans info if present |
||
243 | 30 | $numberOfColumnsForRow = $this->numColumns; |
|
244 | 30 | $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance |
|
245 | 30 | if ($spans) { |
|
246 | 10 | list(, $numberOfColumnsForRow) = explode(':', $spans); |
|
247 | 10 | $numberOfColumnsForRow = intval($numberOfColumnsForRow); |
|
248 | } |
||
249 | |||
250 | 30 | $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; |
|
251 | |||
252 | 30 | return XMLProcessor::PROCESSING_CONTINUE; |
|
253 | } |
||
254 | |||
255 | /** |
||
256 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node |
||
257 | * @return int A return code that indicates what action should the processor take next |
||
258 | */ |
||
259 | 30 | protected function processCellStartingNode($xmlReader) |
|
260 | { |
||
261 | 30 | $currentColumnIndex = $this->getColumnIndex($xmlReader); |
|
262 | |||
263 | // NOTE: expand() will automatically decode all XML entities of the child nodes |
||
264 | 30 | $node = $xmlReader->expand(); |
|
265 | 30 | $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); |
|
266 | 30 | $this->lastColumnIndexProcessed = $currentColumnIndex; |
|
267 | |||
268 | 30 | return XMLProcessor::PROCESSING_CONTINUE; |
|
269 | } |
||
270 | |||
271 | /** |
||
272 | * @return int A return code that indicates what action should the processor take next |
||
273 | */ |
||
274 | 30 | protected function processRowEndingNode() |
|
275 | { |
||
276 | // if the fetched row is empty and we don't want to preserve it.., |
||
277 | 30 | if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) { |
|
278 | // ... skip it |
||
279 | return XMLProcessor::PROCESSING_CONTINUE; |
||
280 | } |
||
281 | |||
282 | 30 | $this->numReadRows++; |
|
283 | |||
284 | // If needed, we fill the empty cells |
||
285 | 30 | if ($this->numColumns === 0) { |
|
286 | 18 | $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData); |
|
287 | } |
||
288 | |||
289 | // at this point, we have all the data we need for the row |
||
290 | // so that we can populate the buffer |
||
291 | 30 | return XMLProcessor::PROCESSING_STOP; |
|
292 | } |
||
293 | |||
294 | /** |
||
295 | * @return int A return code that indicates what action should the processor take next |
||
296 | */ |
||
297 | 30 | protected function processWorksheetEndingNode() |
|
298 | { |
||
299 | // The closing "</worksheet>" marks the end of the file |
||
300 | 30 | $this->hasReachedEndOfFile = true; |
|
301 | |||
302 | 30 | return XMLProcessor::PROCESSING_STOP; |
|
303 | } |
||
304 | |||
305 | /** |
||
306 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node |
||
307 | * @return int Row index |
||
308 | * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid |
||
309 | */ |
||
310 | 30 | protected function getRowIndex($xmlReader) |
|
319 | |||
320 | /** |
||
321 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node |
||
322 | * @return int Column index |
||
323 | * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid |
||
324 | */ |
||
325 | 30 | protected function getColumnIndex($xmlReader) |
|
334 | |||
335 | /** |
||
336 | * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. |
||
337 | * |
||
338 | * @param \DOMNode $node |
||
339 | * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) |
||
340 | */ |
||
341 | 30 | protected function getCellValue($node) |
|
345 | |||
346 | /** |
||
347 | * @param array $rowData |
||
348 | * @return bool Whether the given row is empty |
||
349 | */ |
||
350 | 29 | protected function isEmptyRow($rowData) |
|
354 | |||
355 | /** |
||
356 | * Return the current element, either an empty row or from the buffer. |
||
357 | * @link http://php.net/manual/en/iterator.current.php |
||
358 | * |
||
359 | * @return array|null |
||
360 | */ |
||
361 | 30 | public function current() |
|
362 | { |
||
363 | 30 | $rowDataForRowToBeProcessed = $this->rowDataBuffer; |
|
364 | |||
365 | 30 | if ($this->shouldPreserveEmptyRows) { |
|
366 | // when we need to preserve empty rows, we will either return |
||
367 | // an empty row or the last row read. This depends whether the |
||
368 | // index of last row that was read matches the index of the last |
||
369 | // row whose value should be returned. |
||
370 | 1 | if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { |
|
371 | // return empty row if mismatch between last processed row |
||
372 | // and the row that needs to be returned |
||
373 | 1 | $rowDataForRowToBeProcessed = ['']; |
|
374 | } |
||
375 | } |
||
376 | |||
377 | 30 | return $rowDataForRowToBeProcessed; |
|
378 | } |
||
379 | |||
380 | /** |
||
381 | * Return the key of the current element. Here, the row index. |
||
382 | * @link http://php.net/manual/en/iterator.key.php |
||
383 | * |
||
384 | * @return int |
||
385 | */ |
||
386 | 29 | public function key() |
|
395 | |||
396 | |||
397 | /** |
||
398 | * Cleans up what was created to iterate over the object. |
||
399 | * |
||
400 | * @return void |
||
401 | */ |
||
402 | 32 | public function end() |
|
406 | } |
||
407 |