1 | <?php |
||
19 | class RowIterator implements IteratorInterface |
||
20 | { |
||
21 | /** Definition of XML nodes names used to parse data */ |
||
22 | const XML_NODE_DIMENSION = 'dimension'; |
||
23 | const XML_NODE_WORKSHEET = 'worksheet'; |
||
24 | const XML_NODE_ROW = 'row'; |
||
25 | const XML_NODE_CELL = 'c'; |
||
26 | |||
27 | /** Definition of XML attributes used to parse data */ |
||
28 | const XML_ATTRIBUTE_REF = 'ref'; |
||
29 | const XML_ATTRIBUTE_SPANS = 'spans'; |
||
30 | const XML_ATTRIBUTE_ROW_INDEX = 'r'; |
||
31 | const XML_ATTRIBUTE_CELL_INDEX = 'r'; |
||
32 | |||
33 | /** @var string Path of the XLSX file being read */ |
||
34 | protected $filePath; |
||
35 | |||
36 | /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */ |
||
37 | protected $sheetDataXMLFilePath; |
||
38 | |||
39 | /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ |
||
40 | protected $xmlReader; |
||
41 | |||
42 | /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ |
||
43 | protected $xmlProcessor; |
||
44 | |||
45 | /** @var Helper\CellValueFormatter Helper to format cell values */ |
||
46 | protected $cellValueFormatter; |
||
47 | |||
48 | /** @var Helper\StyleHelper $styleHelper Helper to work with styles */ |
||
49 | protected $styleHelper; |
||
50 | |||
51 | /** |
||
52 | * TODO: This variable can be deleted when row indices get preserved |
||
53 | * @var int Number of read rows |
||
54 | */ |
||
55 | protected $numReadRows = 0; |
||
56 | |||
57 | /** @var array Contains the data for the currently processed row (key = cell index, value = cell value) */ |
||
58 | protected $currentlyProcessedRowData = []; |
||
59 | |||
60 | /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ |
||
61 | protected $rowDataBuffer = null; |
||
62 | |||
63 | /** @var bool Indicates whether all rows have been read */ |
||
64 | protected $hasReachedEndOfFile = false; |
||
65 | |||
66 | /** @var int The number of columns the sheet has (0 meaning undefined) */ |
||
67 | protected $numColumns = 0; |
||
68 | |||
69 | /** @var bool Whether empty rows should be returned or skipped */ |
||
70 | protected $shouldPreserveEmptyRows; |
||
71 | |||
72 | /** @var int Last row index processed (one-based) */ |
||
73 | protected $lastRowIndexProcessed = 0; |
||
74 | |||
75 | /** @var int Row index to be processed next (one-based) */ |
||
76 | protected $nextRowIndexToBeProcessed = 0; |
||
77 | |||
78 | /** @var int Last column index processed (zero-based) */ |
||
79 | protected $lastColumnIndexProcessed = -1; |
||
80 | |||
81 | /** |
||
82 | * @param string $filePath Path of the XLSX file being read |
||
83 | * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml |
||
84 | * @param \Box\Spout\Reader\XLSX\ReaderOptions $options Reader's current options |
||
85 | * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings |
||
86 | */ |
||
87 | 99 | public function __construct($filePath, $sheetDataXMLFilePath, $options, $sharedStringsHelper) |
|
88 | { |
||
89 | 99 | $this->filePath = $filePath; |
|
90 | 99 | $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); |
|
91 | |||
92 | 99 | $this->xmlReader = new XMLReader(); |
|
93 | |||
94 | 99 | $this->styleHelper = new StyleHelper($filePath); |
|
95 | 99 | $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $options->shouldFormatDates()); |
|
96 | |||
97 | 99 | $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows(); |
|
98 | |||
99 | // Register all callbacks to process different nodes when reading the XML file |
||
100 | 99 | $this->xmlProcessor = new XMLProcessor($this->xmlReader); |
|
101 | 99 | $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); |
|
102 | 99 | $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); |
|
103 | 99 | $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); |
|
104 | 99 | $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); |
|
105 | 99 | $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); |
|
106 | 99 | } |
|
107 | |||
108 | /** |
||
109 | * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml |
||
110 | * @return string Path of the XML file containing the sheet data, |
||
111 | * without the leading slash. |
||
112 | */ |
||
113 | 99 | protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) |
|
114 | { |
||
115 | 99 | return ltrim($sheetDataXMLFilePath, '/'); |
|
116 | } |
||
117 | |||
118 | /** |
||
119 | * Rewind the Iterator to the first element. |
||
120 | * Initializes the XMLReader object that reads the associated sheet data. |
||
121 | * The XMLReader is configured to be safe from billion laughs attack. |
||
122 | * @link http://php.net/manual/en/iterator.rewind.php |
||
123 | * |
||
124 | * @return void |
||
125 | * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read |
||
126 | */ |
||
127 | 96 | public function rewind() |
|
128 | { |
||
129 | 96 | $this->xmlReader->close(); |
|
130 | |||
131 | 96 | if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) { |
|
132 | 3 | throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\"."); |
|
133 | } |
||
134 | |||
135 | 93 | $this->numReadRows = 0; |
|
136 | 93 | $this->lastRowIndexProcessed = 0; |
|
137 | 93 | $this->nextRowIndexToBeProcessed = 0; |
|
138 | 93 | $this->rowDataBuffer = null; |
|
139 | 93 | $this->hasReachedEndOfFile = false; |
|
140 | 93 | $this->numColumns = 0; |
|
141 | |||
142 | 93 | $this->next(); |
|
143 | 93 | } |
|
144 | |||
145 | /** |
||
146 | * Checks if current position is valid |
||
147 | * @link http://php.net/manual/en/iterator.valid.php |
||
148 | * |
||
149 | * @return bool |
||
150 | */ |
||
151 | 93 | public function valid() |
|
152 | { |
||
153 | 93 | return (!$this->hasReachedEndOfFile); |
|
154 | } |
||
155 | |||
156 | /** |
||
157 | * Move forward to next element. Reads data describing the next unprocessed row. |
||
158 | * @link http://php.net/manual/en/iterator.next.php |
||
159 | * |
||
160 | * @return void |
||
161 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found |
||
162 | * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML |
||
163 | */ |
||
164 | 93 | public function next() |
|
165 | { |
||
166 | 93 | $this->nextRowIndexToBeProcessed++; |
|
167 | |||
168 | 93 | if ($this->doesNeedDataForNextRowToBeProcessed()) { |
|
169 | 93 | $this->readDataForNextRow(); |
|
170 | 93 | } |
|
171 | 93 | } |
|
172 | |||
173 | /** |
||
174 | * Returns whether we need data for the next row to be processed. |
||
175 | * We don't need to read data if: |
||
176 | * we have already read at least one row |
||
177 | * AND |
||
178 | * we need to preserve empty rows |
||
179 | * AND |
||
180 | * the last row that was read is not the row that need to be processed |
||
181 | * (i.e. if we need to return empty rows) |
||
182 | * |
||
183 | * @return bool Whether we need data for the next row to be processed. |
||
184 | */ |
||
185 | 93 | protected function doesNeedDataForNextRowToBeProcessed() |
|
186 | { |
||
187 | 93 | $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); |
|
188 | |||
189 | return ( |
||
190 | 93 | !$hasReadAtLeastOneRow || |
|
191 | 87 | !$this->shouldPreserveEmptyRows || |
|
192 | 3 | $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed |
|
193 | 93 | ); |
|
194 | } |
||
195 | |||
196 | /** |
||
197 | * @return void |
||
198 | * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found |
||
199 | * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML |
||
200 | */ |
||
201 | 93 | protected function readDataForNextRow() |
|
202 | { |
||
203 | 93 | $this->currentlyProcessedRowData = []; |
|
204 | |||
205 | try { |
||
206 | 93 | $this->xmlProcessor->readUntilStopped(); |
|
207 | 93 | } catch (XMLProcessingException $exception) { |
|
208 | throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); |
||
209 | } |
||
210 | |||
211 | 93 | $this->rowDataBuffer = $this->currentlyProcessedRowData; |
|
212 | 93 | } |
|
213 | |||
214 | /** |
||
215 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node |
||
216 | * @return int A return code that indicates what action should the processor take next |
||
217 | */ |
||
218 | 45 | protected function processDimensionStartingNode($xmlReader) |
|
219 | { |
||
220 | // Read dimensions of the sheet |
||
221 | 45 | $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) |
|
222 | 45 | if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) { |
|
223 | 36 | $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1; |
|
224 | 36 | } |
|
225 | |||
226 | 45 | return XMLProcessor::PROCESSING_CONTINUE; |
|
227 | } |
||
228 | |||
229 | /** |
||
230 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node |
||
231 | * @return int A return code that indicates what action should the processor take next |
||
232 | */ |
||
233 | 90 | protected function processRowStartingNode($xmlReader) |
|
234 | { |
||
235 | // Reset index of the last processed column |
||
236 | 90 | $this->lastColumnIndexProcessed = -1; |
|
237 | |||
238 | // Mark the last processed row as the one currently being read |
||
239 | 90 | $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); |
|
240 | |||
241 | // Read spans info if present |
||
242 | 90 | $numberOfColumnsForRow = $this->numColumns; |
|
243 | 90 | $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance |
|
244 | 90 | if ($spans) { |
|
245 | 30 | list(, $numberOfColumnsForRow) = explode(':', $spans); |
|
246 | 30 | $numberOfColumnsForRow = intval($numberOfColumnsForRow); |
|
247 | 30 | } |
|
248 | |||
249 | 90 | $this->currentlyProcessedRowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : []; |
|
250 | |||
251 | 90 | return XMLProcessor::PROCESSING_CONTINUE; |
|
252 | } |
||
253 | |||
254 | /** |
||
255 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node |
||
256 | * @return int A return code that indicates what action should the processor take next |
||
257 | */ |
||
258 | 90 | protected function processCellStartingNode($xmlReader) |
|
259 | { |
||
260 | 90 | $currentColumnIndex = $this->getColumnIndex($xmlReader); |
|
261 | |||
262 | // NOTE: expand() will automatically decode all XML entities of the child nodes |
||
263 | 90 | $node = $xmlReader->expand(); |
|
264 | 90 | $this->currentlyProcessedRowData[$currentColumnIndex] = $this->getCellValue($node); |
|
265 | 90 | $this->lastColumnIndexProcessed = $currentColumnIndex; |
|
266 | |||
267 | 90 | return XMLProcessor::PROCESSING_CONTINUE; |
|
268 | } |
||
269 | |||
270 | /** |
||
271 | * @return int A return code that indicates what action should the processor take next |
||
272 | */ |
||
273 | 90 | protected function processRowEndingNode() |
|
274 | { |
||
275 | // if the fetched row is empty and we don't want to preserve it.., |
||
276 | 90 | if (!$this->shouldPreserveEmptyRows && $this->isEmptyRow($this->currentlyProcessedRowData)) { |
|
277 | // ... skip it |
||
278 | return XMLProcessor::PROCESSING_CONTINUE; |
||
279 | } |
||
280 | |||
281 | 90 | $this->numReadRows++; |
|
282 | |||
283 | // If needed, we fill the empty cells |
||
284 | 90 | if ($this->numColumns === 0) { |
|
285 | 54 | $this->currentlyProcessedRowData = CellHelper::fillMissingArrayIndexes($this->currentlyProcessedRowData); |
|
286 | 54 | } |
|
287 | |||
288 | // at this point, we have all the data we need for the row |
||
289 | // so that we can populate the buffer |
||
290 | 90 | return XMLProcessor::PROCESSING_STOP; |
|
291 | } |
||
292 | |||
293 | /** |
||
294 | * @return int A return code that indicates what action should the processor take next |
||
295 | */ |
||
296 | 90 | protected function processWorksheetEndingNode() |
|
297 | { |
||
298 | // The closing "</worksheet>" marks the end of the file |
||
299 | 90 | $this->hasReachedEndOfFile = true; |
|
300 | |||
301 | 90 | return XMLProcessor::PROCESSING_STOP; |
|
302 | } |
||
303 | |||
304 | /** |
||
305 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node |
||
306 | * @return int Row index |
||
307 | * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid |
||
308 | */ |
||
309 | 90 | protected function getRowIndex($xmlReader) |
|
318 | |||
319 | /** |
||
320 | * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node |
||
321 | * @return int Column index |
||
322 | * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid |
||
323 | */ |
||
324 | 90 | protected function getColumnIndex($xmlReader) |
|
333 | |||
334 | /** |
||
335 | * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. |
||
336 | * |
||
337 | * @param \DOMNode $node |
||
338 | * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error) |
||
339 | */ |
||
340 | 90 | protected function getCellValue($node) |
|
344 | |||
345 | /** |
||
346 | * @param array $rowData |
||
347 | * @return bool Whether the given row is empty |
||
348 | */ |
||
349 | 87 | protected function isEmptyRow($rowData) |
|
353 | |||
354 | /** |
||
355 | * Return the current element, either an empty row or from the buffer. |
||
356 | * @link http://php.net/manual/en/iterator.current.php |
||
357 | * |
||
358 | * @return array|null |
||
359 | */ |
||
360 | 90 | public function current() |
|
378 | |||
379 | /** |
||
380 | * Return the key of the current element. Here, the row index. |
||
381 | * @link http://php.net/manual/en/iterator.key.php |
||
382 | * |
||
383 | * @return int |
||
384 | */ |
||
385 | 87 | public function key() |
|
394 | |||
395 | |||
396 | /** |
||
397 | * Cleans up what was created to iterate over the object. |
||
398 | * |
||
399 | * @return void |
||
400 | */ |
||
401 | 96 | public function end() |
|
405 | } |
||
406 |