Completed
Pull Request — master (#235)
by
unknown
03:20
created

RowIterator::valid()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 4
rs 10
ccs 2
cts 2
cp 1
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
use Box\Spout\Reader\ReaderOptions;
12
13
/**
14
 * Class RowIterator
15
 *
16
 * @package Box\Spout\Reader\ODS
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /** Definition of XML nodes names used to parse data */
21
    const XML_NODE_TABLE = 'table:table';
22
    const XML_NODE_ROW = 'table:table-row';
23
    const XML_NODE_CELL = 'table:table-cell';
24
    const MAX_COLUMNS_EXCEL = 16384;
25
26
    /** Definition of XML attribute used to parse data */
27
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
28
29
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
30
    protected $xmlReader;
31
32
    /** @var Helper\CellValueFormatter Helper to format cell values */
33
    protected $cellValueFormatter;
34
35
    /** @var bool Whether the iterator has already been rewound once */
36
    protected $hasAlreadyBeenRewound = false;
37
38
    /** @var int Key for iterator */
39
    protected $rowIndex = 0;
40
41
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
42
    protected $rowDataBuffer = null;
43
44
    /** @var bool Indicates whether all rows have been read */
45
    protected $hasReachedEndOfFile = false;
46
47
    /** @var \Box\Spout\Reader\ReaderOptions */
48
    protected $readerOptions;
49
50
    /**
51
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
52
     * @param \Box\Spout\Reader\ReaderOptions $readerOptions
53
     */
54 84
    public function __construct($xmlReader, ReaderOptions $readerOptions)
55
    {
56 84
        $this->xmlReader = $xmlReader;
57 84
        $this->readerOptions = $readerOptions;
58 84
        $this->cellValueFormatter = new CellValueFormatter($readerOptions->shouldFormatDates());
59 84
    }
60
61
    /**
62
     * Rewind the Iterator to the first element.
63
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
64
     * @link http://php.net/manual/en/iterator.rewind.php
65
     *
66
     * @return void
67
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
68
     */
69 84
    public function rewind()
70
    {
71
        // Because sheet and row data is located in the file, we can't rewind both the
72
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
73
        // Therefore, rewinding the row iterator has been disabled.
74 84
        if ($this->hasAlreadyBeenRewound) {
75 3
            throw new IteratorNotRewindableException();
76
        }
77
78 84
        $this->hasAlreadyBeenRewound = true;
79 84
        $this->rowIndex = 0;
80 84
        $this->rowDataBuffer = null;
81 84
        $this->hasReachedEndOfFile = false;
82
83 84
        $this->next();
84 84
    }
85
86
    /**
87
     * Checks if current position is valid
88
     * @link http://php.net/manual/en/iterator.valid.php
89
     *
90
     * @return boolean
91
     */
92 84
    public function valid()
93
    {
94 84
        return (!$this->hasReachedEndOfFile);
95
    }
96
97
    /**
98
     * Move forward to next element. Empty rows will be skipped.
99
     * @link http://php.net/manual/en/iterator.next.php
100
     *
101
     * @return void
102
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
103
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
104
     */
105 84
    public function next()
106
    {
107 84
        $rowData = [];
108 84
        $cellValue = null;
109 84
        $numColumnsRepeated = 1;
110 84
        $numCellsRead = 0;
111 84
        $hasAlreadyReadOneCell = false;
112
113
        try {
114 84
            while ($this->xmlReader->read()) {
115 84
                if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_ROW)) {
116
                    // Start of a row description
117 84
                    $this->rowIndex++;
118
119 84
                } else if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
120
                    // Start of a cell description
121 84
                    $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
122
123 84
                    $node = $this->xmlReader->expand();
124 84
                    $currentCellValue = $this->getCellValue($node);
125
126
                    // process cell N only after having read cell N+1 (see below why)
127 84
                    if ($hasAlreadyReadOneCell) {
128 69
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
129 69
                            $rowData[] = $cellValue;
130 69
                        }
131 69
                    }
132
133 84
                    $cellValue = $currentCellValue;
134 84
                    $numColumnsRepeated = $currentNumColumnsRepeated;
135
136 84
                    $numCellsRead++;
137 84
                    $hasAlreadyReadOneCell = true;
138
139 84
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
140
                    // End of the row description
141 84
                    $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
142
143 84
                    if (!$isEmptyRow) {
144
                        // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
145
                        // The current count of read columns is determined by counting the values in $rowData.
146
                        // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
147
                        // with a number-columns-repeated value equals to the number of (supported columns - used columns).
148
                        // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
149
                        // always 16384 cells.
150 81
                        if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
151 69
                            for ($i = 0; $i < $numColumnsRepeated; $i++) {
152 69
                                $rowData[] = $cellValue;
153 69
                            }
154 69
                        }
155 84
                    } elseif ($this->readerOptions->shouldPreserveEmptyRows()) {
156
                        // Take number of cells from the previously read line.
157 12
                        $rowData = empty($this->rowDataBuffer) ? [] : array_fill(0, count($this->rowDataBuffer), '');
158 12
                    } else {
159 15
                        return $this->next();
160
                    }
161 81
                    break;
162
163 84
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
164
                    // The closing "</table:table>" marks the end of the file
165 78
                    $this->hasReachedEndOfFile = true;
166 78
                    break;
167
                }
168 84
            }
169
170 84
        } catch (XMLProcessingException $exception) {
171
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
172
        }
173
174 84
        $this->rowDataBuffer = $rowData;
175 84
    }
176
177
    /**
178
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
179
     */
180 84
    protected function getNumColumnsRepeatedForCurrentNode()
181
    {
182 84
        $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
183 84
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
184
    }
185
186
    /**
187
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
188
     *
189
     * @param \DOMNode $node
190
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
191
     */
192 84
    protected function getCellValue($node)
193
    {
194 84
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
195
    }
196
197
    /**
198
     * empty() replacement that honours 0 as a valid value
199
     *
200
     * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
201
     * @return bool
202
     */
203 42
    protected function isEmptyCellValue($value)
204
    {
205 42
        return (!isset($value) || trim($value) === '');
206
    }
207
208
    /**
209
     * Return the current element, from the buffer.
210
     * @link http://php.net/manual/en/iterator.current.php
211
     *
212
     * @return array|null
213
     */
214 81
    public function current()
215
    {
216 81
        return $this->rowDataBuffer;
217
    }
218
219
    /**
220
     * Return the key of the current element
221
     * @link http://php.net/manual/en/iterator.key.php
222
     *
223
     * @return int
224
     */
225 6
    public function key()
226
    {
227 6
        return $this->rowIndex;
228
    }
229
230
231
    /**
232
     * Cleans up what was created to iterate over the object.
233
     *
234
     * @return void
235
     */
236
    public function end()
237
    {
238
        $this->xmlReader->close();
239
    }
240
}
241