Failed Conditions
Push — perf-tests ( 50942d...2fc93e )
by Adrien
14:53
created

RowIterator::next()   C

Complexity

Conditions 12
Paths 24

Size

Total Lines 66
Code Lines 35

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 1 Features 0
Metric Value
c 3
b 1
f 0
dl 0
loc 66
rs 5.9123
cc 12
eloc 35
nc 24
nop 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Box\Spout\Reader\ODS;
4
5
use Box\Spout\Common\Exception\IOException;
6
use Box\Spout\Reader\Exception\IteratorNotRewindableException;
7
use Box\Spout\Reader\Exception\XMLProcessingException;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Reader\ODS\Helper\CellValueFormatter;
10
use Box\Spout\Reader\Wrapper\XMLReader;
11
12
/**
13
 * Class RowIterator
14
 *
15
 * @package Box\Spout\Reader\ODS
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /** Definition of XML nodes names used to parse data */
20
    const XML_NODE_TABLE = 'table:table';
21
    const XML_NODE_ROW = 'table:table-row';
22
    const XML_NODE_CELL = 'table:table-cell';
23
    const MAX_COLUMNS_EXCEL = 16384;
24
25
    /** Definition of XML attribute used to parse data */
26
    const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
27
28
    /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
29
    protected $xmlReader;
30
31
    /** @var Helper\CellValueFormatter Helper to format cell values */
32
    protected $cellValueFormatter;
33
34
    /** @var bool Whether the iterator has already been rewound once */
35
    protected $hasAlreadyBeenRewound = false;
36
37
    /** @var int Number of read rows */
38
    protected $numReadRows = 0;
39
40
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
41
    protected $rowDataBuffer = null;
42
43
    /** @var bool Indicates whether all rows have been read */
44
    protected $hasReachedEndOfFile = false;
45
46
    /**
47
     * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
48
     * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
49
     */
50
    public function __construct($xmlReader, $shouldFormatDates)
51
    {
52
        $this->xmlReader = $xmlReader;
53
        $this->cellValueFormatter = new CellValueFormatter($shouldFormatDates);
54
    }
55
56
    /**
57
     * Rewind the Iterator to the first element.
58
     * NOTE: It can only be done once, as it is not possible to read an XML file backwards.
59
     * @link http://php.net/manual/en/iterator.rewind.php
60
     *
61
     * @return void
62
     * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
63
     */
64
    public function rewind()
65
    {
66
        // Because sheet and row data is located in the file, we can't rewind both the
67
        // sheet iterator and the row iterator, as XML file cannot be read backwards.
68
        // Therefore, rewinding the row iterator has been disabled.
69
        if ($this->hasAlreadyBeenRewound) {
70
            throw new IteratorNotRewindableException();
71
        }
72
73
        $this->hasAlreadyBeenRewound = true;
74
        $this->numReadRows = 0;
75
        $this->rowDataBuffer = null;
76
        $this->hasReachedEndOfFile = false;
77
78
        $this->next();
79
    }
80
81
    /**
82
     * Checks if current position is valid
83
     * @link http://php.net/manual/en/iterator.valid.php
84
     *
85
     * @return boolean
86
     */
87
    public function valid()
88
    {
89
        return (!$this->hasReachedEndOfFile);
90
    }
91
92
    /**
93
     * Move forward to next element. Empty rows will be skipped.
94
     * @link http://php.net/manual/en/iterator.next.php
95
     *
96
     * @return void
97
     * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
98
     * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML
99
     */
100
    public function next()
101
    {
102
        $rowData = [];
103
        $cellValue = null;
104
        $numColumnsRepeated = 1;
105
        $numCellsRead = 0;
106
        $hasAlreadyReadOneCell = false;
107
108
        try {
109
            while ($this->xmlReader->read()) {
110
                if ($this->xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL)) {
111
                    // Start of a cell description
112
                    $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode();
113
114
                    $node = $this->xmlReader->expand();
115
                    $currentCellValue = $this->getCellValue($node);
116
117
                    // process cell N only after having read cell N+1 (see below why)
118
                    if ($hasAlreadyReadOneCell) {
119
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
120
                            $rowData[] = $cellValue;
121
                        }
122
                    }
123
124
                    $cellValue = $currentCellValue;
125
                    $numColumnsRepeated = $currentNumColumnsRepeated;
126
127
                    $numCellsRead++;
128
                    $hasAlreadyReadOneCell = true;
129
130
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_ROW)) {
131
                    // End of the row description
132
                    $isEmptyRow = ($numCellsRead <= 1 && $this->isEmptyCellValue($cellValue));
133
                    if ($isEmptyRow) {
134
                        // skip empty rows
135
                        $this->next();
136
                        return;
137
                    }
138
139
                    // Only add the value if the last read cell is not a trailing empty cell repeater in Excel.
140
                    // The current count of read columns is determined by counting the values in $rowData.
141
                    // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>"
142
                    // with a number-columns-repeated value equals to the number of (supported columns - used columns).
143
                    // In Excel, the number of supported columns is 16384, but we don't want to returns rows with
144
                    // always 16384 cells.
145
                    if ((count($rowData) + $numColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) {
146
                        for ($i = 0; $i < $numColumnsRepeated; $i++) {
147
                            $rowData[] = $cellValue;
148
                        }
149
                        $this->numReadRows++;
150
                    }
151
                    break;
152
153
                } else if ($this->xmlReader->isPositionedOnEndingNode(self::XML_NODE_TABLE)) {
154
                    // The closing "</table:table>" marks the end of the file
155
                    $this->hasReachedEndOfFile = true;
156
                    break;
157
                }
158
            }
159
160
        } catch (XMLProcessingException $exception) {
161
            throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
162
        }
163
164
        $this->rowDataBuffer = $rowData;
165
    }
166
167
    /**
168
     * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
169
     */
170
    protected function getNumColumnsRepeatedForCurrentNode()
171
    {
172
        $numColumnsRepeated = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
173
        return ($numColumnsRepeated !== null) ? intval($numColumnsRepeated) : 1;
174
    }
175
176
    /**
177
     * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
178
     *
179
     * @param \DOMNode $node
180
     * @return string|int|float|bool|\DateTime|\DateInterval|null The value associated with the cell, empty string if cell's type is void/undefined, null on error
181
     */
182
    protected function getCellValue($node)
183
    {
184
        return $this->cellValueFormatter->extractAndFormatNodeValue($node);
185
    }
186
187
    /**
188
     * empty() replacement that honours 0 as a valid value
189
     *
190
     * @param string|int|float|bool|\DateTime|\DateInterval|null $value The cell value
191
     * @return bool
192
     */
193
    protected function isEmptyCellValue($value)
194
    {
195
        return (!isset($value) || trim($value) === '');
196
    }
197
198
    /**
199
     * Return the current element, from the buffer.
200
     * @link http://php.net/manual/en/iterator.current.php
201
     *
202
     * @return array|null
203
     */
204
    public function current()
205
    {
206
        return $this->rowDataBuffer;
207
    }
208
209
    /**
210
     * Return the key of the current element
211
     * @link http://php.net/manual/en/iterator.key.php
212
     *
213
     * @return int
214
     */
215
    public function key()
216
    {
217
        return $this->numReadRows;
218
    }
219
220
221
    /**
222
     * Cleans up what was created to iterate over the object.
223
     *
224
     * @return void
225
     */
226
    public function end()
227
    {
228
        $this->xmlReader->close();
229
    }
230
}
231