Completed
Pull Request — master (#235)
by
unknown
02:55
created

RowIterator::key()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 4
rs 10
ccs 2
cts 2
cp 1
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
use Box\Spout\Reader\ReaderOptions;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\ODS
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_TABLE = 'table:table';
22
    const XML_NODE_ROW = 'table:table-row';
23
    const XML_NODE_CELL = 'table:table-cell';
24
    const MAX_COLUMNS_EXCEL = 16384;
25
26
    /** Definition of XML attribute used to parse data */
27
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
28
29
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
30
    protected $xmlReader;
31
32
    /** @var Helper\CellValueFormatter Helper to format cell values */
33
    protected $cellValueFormatter;
34
35
    /** @var bool Whether the iterator has already been rewound once */
36
    protected $hasAlreadyBeenRewound = false;
37
38
    /** @var int Number of read rows */
39
    protected $numReadRows = 0;
40
41
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
42
    protected $rowDataBuffer = null;
43
44
    /** @var bool Indicates whether all rows have been read */
45
    protected $hasReachedEndOfFile = false;
46
47
    /** @var \Box\Spout\Reader\ReaderOptions */
48
    protected $readerOptions;
49
50
    /**
51
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
52
     * @param \Box\Spout\Reader\ReaderOptions $readerOptions
53
     */
54 72
    public function __construct($xmlReader, ReaderOptions $readerOptions)
55
    {
56 72
        $this->xmlReader = $xmlReader;
57 72
        $this->readerOptions = $readerOptions;
58 72
        $this->cellValueFormatter = new CellValueFormatter($readerOptions->shouldFormatDates());
59 72
    }
60
61
    /**
62
     * Rewind the Iterator to the first element.
63
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
64
     * @link http://php.net/manual/en/iterator.rewind.php
65
     *
66
     * @return void
67
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
68
     */
69 72
    public function rewind()
70
    {
71
        // Because sheet and row data is located in the file, we can't rewind both the
72
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
73
        // Therefore, rewinding the row iterator has been disabled.
74 72
        if ($this->hasAlreadyBeenRewound) {
75 3
            throw new IteratorNotRewindableException();
76
        }
77
78 72
        $this->hasAlreadyBeenRewound = true;
79 72
        $this->numReadRows = 0;
80 72
        $this->rowDataBuffer = null;
81 72
        $this->hasReachedEndOfFile = false;
82
83 72
        $this->next();
84 72
    }
85
86
    /**
87
     * Checks if current position is valid
88
     * @link http://php.net/manual/en/iterator.valid.php
89
     *
90
     * @return boolean
91
     */
92 72
    public function valid()
93
    {
94 72
        return (!$this->hasReachedEndOfFile);
95
    }
96
97
    /**
98
     * Move forward to next element. Empty rows will be skipped.
99
     * @link http://php.net/manual/en/iterator.next.php
100
     *
101
     * @return void
102
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
103
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
104
     */
105 72
    public function next()
106
    {
107 72
        $rowData = [];
108 72
        $cellValue = null;
109 72
        $numColumnsRepeated = 1;
110 72
        $numCellsRead = 0;
111 72
        $hasAlreadyReadOneCell = false;
112
113
        try {
114 72
            while ($this->xmlReader->read()) {
115 72
                if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
116
                    // Start of a cell description
117 72
                    $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
118
119 72
                    $node = $this->xmlReader->expand();
120 72
                    $currentCellValue = $this->getCellValue($node);
121
122
                    // process cell N only after having read cell N+1 (see below why)
123 72
                    if ($hasAlreadyReadOneCell) {
124 60
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
125 60
                            $rowData[] = $cellValue;
126 60
                        }
127 60
                    }
128
129 72
                    $cellValue = $currentCellValue;
130 72
                    $numColumnsRepeated = $currentNumColumnsRepeated;
131
132 72
                    $numCellsRead++;
133 72
                    $hasAlreadyReadOneCell = true;
134
135 72
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
136
                    // End of the row description
137 72
                    $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
138 72
                    if ($isEmptyRow) {
139
                        // skip empty rows
140 18
                        if ($this->readerOptions->shouldPreserveRowIndices()) {
141 3
                            $this->numReadRows++;
142 3
                        }
143 18
                        $this->next();
144 18
                        return;
145
                    }
146
147
                    // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
148
                    // The current count of read columns is determined by counting the values in $rowData.
149
                    // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
150
                    // with a number-columns-repeated value equals to the number of (supported columns - used columns).
151
                    // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
152
                    // always 16384 cells.
153 69
                    if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
154 63
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
155 63
                            $rowData[] = $cellValue;
156 63
                        }
157 63
                        $this->numReadRows++;
158 63
                    }
159 69
                    break;
160
161 72
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
162
                    // The closing "</table:table>" marks the end of the file
163 66
                    $this->hasReachedEndOfFile = true;
164 66
                    break;
165
                }
166 72
            }
167
168 72
        } catch (XMLProcessingException $exception) {
169
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
170
        }
171
172 72
        $this->rowDataBuffer = $rowData;
173 72
    }
174
175
    /**
176
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
177
     */
178 72
    protected function getNumColumnsRepeatedForCurrentNode()
179
    {
180 72
        $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
181 72
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
182
    }
183
184
    /**
185
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
186
     *
187
     * @param \DOMNode $node
188
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
189
     */
190 72
    protected function getCellValue($node)
191
    {
192 72
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
193
    }
194
195
    /**
196
     * empty() replacement that honours 0 as a valid value
197
     *
198
     * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
199
     * @return bool
200
     */
201 30
    protected function isEmptyCellValue($value)
202
    {
203 30
        return (!isset($value) || trim($value) === '');
204
    }
205
206
    /**
207
     * Return the current element, from the buffer.
208
     * @link http://php.net/manual/en/iterator.current.php
209
     *
210
     * @return array|null
211
     */
212 69
    public function current()
213
    {
214 69
        return $this->rowDataBuffer;
215
    }
216
217
    /**
218
     * Return the key of the current element
219
     * @link http://php.net/manual/en/iterator.key.php
220
     *
221
     * @return int
222
     */
223 6
    public function key()
224
    {
225 6
        return $this->numReadRows;
226
    }
227
228
229
    /**
230
     * Cleans up what was created to iterate over the object.
231
     *
232
     * @return void
233
     */
234
    public function end()
235
    {
236
        $this->xmlReader->close();
237
    }
238
}
239