Completed
Pull Request — develop_3.0 (#568)
by Hura
08:27
created

RowIterator   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 265
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 5

Test Coverage

Coverage 97.47%

Importance

Changes 2
Bugs 1 Features 0
Metric Value
wmc 33
lcom 1
cbo 5
dl 0
loc 265
c 2
b 1
f 0
ccs 77
cts 79
cp 0.9747
rs 9.3999

12 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 17 1
A rewind() 0 9 1
A rewindAndSkipBom() 0 7 1
A valid() 0 4 2
A next() 0 8 2
A readDataForNextRow() 0 17 3
A shouldReadNextRow() 0 11 4
C getNextUTF8EncodedRow() 0 53 13
A isEmptyLine() 0 4 3
A current() 0 4 1
A key() 0 4 1
A end() 0 4 1
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Common\Entity\Row;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
8
use Box\Spout\Common\Manager\OptionsManagerInterface;
9
use Box\Spout\Reader\Common\Entity\Options;
10
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
11
use Box\Spout\Reader\Exception\InvalidReaderOptionValueException;
12
use Box\Spout\Reader\IteratorInterface;
13
14
/**
15
 * Class RowIterator
16
 * Iterate over CSV rows.
17
 */
18
class RowIterator implements IteratorInterface
19
{
20
    /**
21
     * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
22
     */
23
    const MAX_READ_BYTES_PER_LINE = 0;
24
25
    /** @var resource Pointer to the CSV file to read */
26
    protected $filePointer;
27
28
    /** @var int Number of read rows */
29
    protected $numReadRows = 0;
30
31
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
32
    protected $rowBuffer;
33
34
    /** @var bool Indicates whether all rows have been read */
35
    protected $hasReachedEndOfFile = false;
36
37
    /** @var string Defines the character used to delimit fields (one character only) */
38
    protected $fieldDelimiter;
39
40
    /** @var string Defines the character used to enclose fields (one character only) */
41
    protected $fieldEnclosure;
42
43
    /** @var string Encoding of the CSV file to be read */
44
    protected $encoding;
45
46
    /** @var bool Whether empty rows should be returned or skipped */
47
    protected $shouldPreserveEmptyRows;
48
49
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
50
    protected $encodingHelper;
51
52
    /** @var \Box\Spout\Reader\CSV\Creator\InternalEntityFactory Factory to create entities */
53
    protected $entityFactory;
54
55
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
56
    protected $globalFunctionsHelper;
57
58
    /** @var OptionsManagerInterface */
59
    protected $optionsManager;
60
61
    /**
62
     * @param resource $filePointer Pointer to the CSV file to read
63
     * @param OptionsManagerInterface $optionsManager
64
     * @param EncodingHelper $encodingHelper
65
     * @param InternalEntityFactory $entityFactory
66
     * @param GlobalFunctionsHelper $globalFunctionsHelper
67
     */
68 32
    public function __construct(
69
        $filePointer,
70
        OptionsManagerInterface $optionsManager,
71
        EncodingHelper $encodingHelper,
72
        InternalEntityFactory $entityFactory,
73
        GlobalFunctionsHelper $globalFunctionsHelper
74
    ) {
75 32
        $this->filePointer = $filePointer;
76 32
        $this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER);
77 32
        $this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE);
78 32
        $this->encoding = $optionsManager->getOption(Options::ENCODING);
79 32
        $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
80 32
        $this->encodingHelper = $encodingHelper;
81 32
        $this->entityFactory = $entityFactory;
82 32
        $this->globalFunctionsHelper = $globalFunctionsHelper;
83 32
        $this->optionsManager = $optionsManager;
84 32
    }
85
86
    /**
87
     * Rewind the Iterator to the first element
88
     * @see http://php.net/manual/en/iterator.rewind.php
89
     *
90
     * @return void
91
     */
92 31
    public function rewind()
93
    {
94 31
        $this->rewindAndSkipBom();
95
96 31
        $this->numReadRows = 0;
97 31
        $this->rowBuffer = null;
98
99 31
        $this->next();
100 29
    }
101
102
    /**
103
     * This rewinds and skips the BOM if inserted at the beginning of the file
104
     * by moving the file pointer after it, so that it is not read.
105
     *
106
     * @return void
107
     */
108 31
    protected function rewindAndSkipBom()
109
    {
110 31
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
111
112
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
113 31
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
114 31
    }
115
116
    /**
117
     * Checks if current position is valid
118
     * @see http://php.net/manual/en/iterator.valid.php
119
     *
120
     * @return bool
121
     */
122 29
    public function valid()
123
    {
124 29
        return ($this->filePointer && !$this->hasReachedEndOfFile);
125
    }
126
127
    /**
128
     * Move forward to next element. Reads data for the next unprocessed row.
129
     * @see http://php.net/manual/en/iterator.next.php
130
     *
131
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
132
     * @return void
133
     */
134 31
    public function next()
135
    {
136 31
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
137
138 31
        if (!$this->hasReachedEndOfFile) {
139 31
            $this->readDataForNextRow();
140
        }
141 29
    }
142
143
    /**
144
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
145
     * @return void
146
     */
147 31
    protected function readDataForNextRow()
148
    {
149
        do {
150 31
            $rowData = $this->getNextUTF8EncodedRow();
151 29
        } while ($this->shouldReadNextRow($rowData));
152
153 29
        if ($rowData !== false) {
154
            // str_replace will replace NULL values by empty strings
155 27
            $rowDataBufferAsArray = str_replace(null, null, $rowData);
156 27
            $this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray);
157 27
            $this->numReadRows++;
158
        } else {
159
            // If we reach this point, it means end of file was reached.
160
            // This happens when the last lines are empty lines.
161 11
            $this->hasReachedEndOfFile = true;
162
        }
163 29
    }
164
165
    /**
166
     * @param array|bool $currentRowData
167
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
168
     */
169 29
    protected function shouldReadNextRow($currentRowData)
170
    {
171 29
        $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
172 29
        $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
173 29
        $isEmptyLine = $this->isEmptyLine($currentRowData);
174
175
        return (
176 29
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
177 29
            (!$this->shouldPreserveEmptyRows && $isEmptyLine)
178
        );
179
    }
180
181
    /**
182
     * Returns the next row, converted if necessary to UTF-8.
183
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
184
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
185
     * @throws InvalidReaderOptionValueException
186
     * @return array|false If unable to convert data to UTF-8
187
     */
188 31
    protected function getNextUTF8EncodedRow()
189
    {
190 31
        $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
191 31
        if ($encodedRowData === false) {
192 11
            return false;
193
        }
194
195
        // The start and end column index should be able to be set after the reader has been opened
196 30
        $startColumnIndex = $this->optionsManager->getOption(Options::START_COLUMN);
197 30
        $endColumnIndex = $this->optionsManager->getOption(Options::END_COLUMN);
198
199 30
        if ($startColumnIndex < 0) {
200 1
            throw new InvalidReaderOptionValueException(
201 1
                'The start column index has to be a non negative number'
202
            );
203
        }
204
205 29
        if ($endColumnIndex && $endColumnIndex <= $startColumnIndex) {
206 1
            throw new InvalidReaderOptionValueException(
207 1
                'The end column index has to be a larger number than the start index'
208
            );
209
        }
210
211
        // The range of the cells to be read is determined by the start and end column index
212 28
        $readerLength = $endColumnIndex ? ($endColumnIndex - $startColumnIndex) + 1 : null;
213 28
        $encodedRowData = \array_slice($encodedRowData, $startColumnIndex, $readerLength);
214
215
        // If there is an end column index  - the resulting data is a fixed array
216
        // starting at $startColumnIndex and ending at $endColumnIndex.
217
        // Missing array values are filled with the empty value ''.
218 28
        if ($endColumnIndex && count($encodedRowData) < $readerLength) {
219 1
            $encodedRowData = $encodedRowData + \array_fill(0, $readerLength, '');
220
        }
221
222 28
        foreach ($encodedRowData as $cellIndex => $cellValue) {
223 28
            switch ($this->encoding) {
224 28
                case EncodingHelper::ENCODING_UTF16_LE:
225 25
                case EncodingHelper::ENCODING_UTF32_LE:
226
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
227 4
                    $cellValue = ltrim($cellValue);
228 4
                    break;
229
230 24
                case EncodingHelper::ENCODING_UTF16_BE:
231 23
                case EncodingHelper::ENCODING_UTF32_BE:
232
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
233 2
                    $cellValue = rtrim($cellValue);
234 2
                    break;
235
            }
236 28
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
237
        }
238
239 28
        return $encodedRowData;
240
    }
241
242
    /**
243
     * @param array|bool $lineData Array containing the cells value for the line
244
     * @return bool Whether the given line is empty
245
     */
246 29
    protected function isEmptyLine($lineData)
247
    {
248 29
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
249
    }
250
251
    /**
252
     * Return the current element from the buffer
253
     * @see http://php.net/manual/en/iterator.current.php
254
     *
255
     * @return Row|null
256
     */
257 27
    public function current()
258
    {
259 27
        return $this->rowBuffer;
260
    }
261
262
    /**
263
     * Return the key of the current element
264
     * @see http://php.net/manual/en/iterator.key.php
265
     *
266
     * @return int
267
     */
268 21
    public function key()
269
    {
270 21
        return $this->numReadRows;
271
    }
272
273
    /**
274
     * Cleans up what was created to iterate over the object.
275
     *
276
     * @return void
277
     */
278
    public function end()
279
    {
280
        // do nothing
281
    }
282
}
283