Failed Conditions
Push — add_support_for_missing_cell_r... ( 51e5f1 )
by Adrien
15:53
created

RowIterator   A

Complexity

Total Complexity 23

Size/Duplication

Total Lines 231
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 11
Bugs 0 Features 2
Metric Value
wmc 23
c 11
b 0
f 2
lcom 1
cbo 4
dl 0
loc 231
rs 10

10 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 1
A normalizeSheetDataXMLFilePath() 0 4 1
A rewind() 0 16 2
A valid() 0 4 1
C next() 0 56 12
A getCellIndex() 0 9 2
A getCellValue() 0 4 1
A current() 0 4 1
A key() 0 4 1
A end() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\XLSX;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\XMLProcessingException;
7
use Box\Spout\Reader\IteratorInterface;
8
use Box\Spout\Reader\Wrapper\XMLReader;
9
use Box\Spout\Reader\XLSX\Helper\CellHelper;
10
use Box\Spout\Reader\XLSX\Helper\CellValueFormatter;
11
use Box\Spout\Reader\XLSX\Helper\StyleHelper;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\XLSX
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_DIMENSION = 'dimension';
22
    const XML_NODE_WORKSHEET = 'worksheet';
23
    const XML_NODE_ROW = 'row';
24
    const XML_NODE_CELL = 'c';
25
26
    /** Definition of XML attributes used to parse data */
27
    const XML_ATTRIBUTE_REF = 'ref';
28
    const XML_ATTRIBUTE_SPANS = 'spans';
29
    const XML_ATTRIBUTE_CELL_INDEX = 'r';
30
31
    /** @var string Path of the XLSX file being read */
32
    protected $filePath;
33
34
    /** @var string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml */
35
    protected $sheetDataXMLFilePath;
36
37
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
38
    protected $xmlReader;
39
40
    /** @var Helper\CellValueFormatter Helper to format cell values */
41
    protected $cellValueFormatter;
42
43
    /** @var Helper\StyleHelper $styleHelper Helper to work with styles */
44
    protected $styleHelper;
45
46
    /** @var int Number of read rows */
47
    protected $numReadRows = 0;
48
49
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
50
    protected $rowDataBuffer = null;
51
52
    /** @var bool Indicates whether all rows have been read */
53
    protected $hasReachedEndOfFile = false;
54
55
    /** @var int The number of columns the sheet has (0 meaning undefined) */
56
    protected $numColumns = 0;
57
58
    /** @var int Last column index processed (zero-based) */
59
    protected $lastColumnIndexProcessed = -1;
60
61
    /**
62
     * @param string $filePath Path of the XLSX file being read
63
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
64
     * @param Helper\SharedStringsHelper $sharedStringsHelper Helper to work with shared strings
65
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
66
     */
67
    public function __construct($filePath, $sheetDataXMLFilePath, $sharedStringsHelper, $shouldFormatDates)
68
    {
69
        $this->filePath = $filePath;
70
        $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
71
72
        $this->xmlReader = new XMLReader();
73
74
        $this->styleHelper = new StyleHelper($filePath);
75
        $this->cellValueFormatter = new CellValueFormatter($sharedStringsHelper, $this->styleHelper, $shouldFormatDates);
76
    }
77
78
    /**
79
     * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
80
     * @return string Path of the XML file containing the sheet data,
81
     *                without the leading slash.
82
     */
83
    protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
84
    {
85
        return ltrim($sheetDataXMLFilePath, '/');
86
    }
87
88
    /**
89
     * Rewind the Iterator to the first element.
90
     * Initializes the XMLReader object that reads the associated sheet data.
91
     * The XMLReader is configured to be safe from billion laughs attack.
92
     * @link http://php.net/manual/en/iterator.rewind.php
93
     *
94
     * @return void
95
     * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read
96
     */
97
    public function rewind()
98
    {
99
        $this->xmlReader->close();
100
101
        $sheetDataFilePath = 'zip://' . $this->filePath . '#' . $this->sheetDataXMLFilePath;
102
        if ($this->xmlReader->open($sheetDataFilePath) === false) {
103
            throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\".");
104
        }
105
106
        $this->numReadRows = 0;
107
        $this->rowDataBuffer = null;
108
        $this->hasReachedEndOfFile = false;
109
        $this->numColumns = 0;
110
111
        $this->next();
112
    }
113
114
    /**
115
     * Checks if current position is valid
116
     * @link http://php.net/manual/en/iterator.valid.php
117
     *
118
     * @return boolean
119
     */
120
    public function valid()
121
    {
122
        return (!$this->hasReachedEndOfFile);
123
    }
124
125
    /**
126
     * Move forward to next element. Empty rows will be skipped.
127
     * @link http://php.net/manual/en/iterator.next.php
128
     *
129
     * @return void
130
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
131
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
132
     */
133
    public function next()
134
    {
135
        $rowData = [];
136
137
        try {
138
            while ($this->xmlReader->read()) {
139
                if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_DIMENSION)) {
140
                    // Read dimensions of the sheet
141
                    $dimensionRef = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
142
                    if (preg_match('/[A-Z\d]+:([A-Z\d]+)/', $dimensionRef, $matches)) {
143
                        $lastCellIndex = $matches[1];
144
                        $this->numColumns = CellHelper::getColumnIndexFromCellIndex($lastCellIndex) + 1;
145
                    }
146
147
                } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
148
                    // Start of the row description
149
                    $this->lastColumnIndexProcessed = -1;
150
151
                    // Read spans info if present
152
                    $numberOfColumnsForRow = $this->numColumns;
153
                    $spans = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
154
                    if ($spans) {
155
                        list(, $numberOfColumnsForRow) = explode(':', $spans);
156
                        $numberOfColumnsForRow = intval($numberOfColumnsForRow);
157
                    }
158
                    $rowData = ($numberOfColumnsForRow !== 0) ? array_fill(0, $numberOfColumnsForRow, '') : [];
159
160
                } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
161
                    // Start of a cell description
162
                    $currentColumnIndex = $this->getCellIndex($this->xmlReader);
163
164
                    $node = $this->xmlReader->expand();
165
                    $rowData[$currentColumnIndex] = $this->getCellValue($node);
166
167
                    $this->lastColumnIndexProcessed = $currentColumnIndex;
168
169
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
170
                    // End of the row description
171
                    // If needed, we fill the empty cells
172
                    $rowData = ($this->numColumns !== 0) ? $rowData : CellHelper::fillMissingArrayIndexes($rowData);
173
                    $this->numReadRows++;
174
                    break;
175
176
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_WORKSHEET)) {
177
                    // The closing "</worksheet>" marks the end of the file
178
                    $this->hasReachedEndOfFile = true;
179
                    break;
180
                }
181
            }
182
183
        } catch (XMLProcessingException $exception) {
184
            throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
185
        }
186
187
        $this->rowDataBuffer = $rowData;
188
    }
189
190
    /**
191
     * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" tag
192
     * @return int
193
     * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid
194
     */
195
    protected function getCellIndex($xmlReader)
196
    {
197
        // Get "r" attribute if present (from something like <c r="A1"...>
198
        $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
199
200
        return ($currentCellIndex !== null) ?
201
                CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
202
                $this->lastColumnIndexProcessed + 1;
203
    }
204
205
    /**
206
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
207
     *
208
     * @param \DOMNode $node
209
     * @return string|int|float|bool|\DateTime|null The value associated with the cell (null when the cell has an error)
210
     */
211
    protected function getCellValue($node)
212
    {
213
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
214
    }
215
216
    /**
217
     * Return the current element, from the buffer.
218
     * @link http://php.net/manual/en/iterator.current.php
219
     *
220
     * @return array|null
221
     */
222
    public function current()
223
    {
224
        return $this->rowDataBuffer;
225
    }
226
227
    /**
228
     * Return the key of the current element
229
     * @link http://php.net/manual/en/iterator.key.php
230
     *
231
     * @return int
232
     */
233
    public function key()
234
    {
235
        return $this->numReadRows;
236
    }
237
238
239
    /**
240
     * Cleans up what was created to iterate over the object.
241
     *
242
     * @return void
243
     */
244
    public function end()
245
    {
246
        $this->xmlReader->close();
247
    }
248
}
249