Completed
Pull Request — master (#557)
by Adrien
03:10
created

RowIterator::rewind()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 9
ccs 6
cts 6
cp 1
rs 9.6666
c 0
b 0
f 0
cc 1
eloc 5
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Common\Entity\Row;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
use Box\Spout\Common\Helper\GlobalFunctionsHelper;
8
use Box\Spout\Common\Manager\OptionsManagerInterface;
9
use Box\Spout\Reader\Common\Entity\Options;
10
use Box\Spout\Reader\CSV\Creator\InternalEntityFactory;
11
use Box\Spout\Reader\IteratorInterface;
12
13
/**
14
 * Class RowIterator
15
 * Iterate over CSV rows.
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /**
20
     * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
21
     */
22
    const MAX_READ_BYTES_PER_LINE = 0;
23
24
    /** @var resource Pointer to the CSV file to read */
25
    protected $filePointer;
26
27
    /** @var int Number of read rows */
28
    protected $numReadRows = 0;
29
30
    /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */
31
    protected $rowBuffer;
32
33
    /** @var bool Indicates whether all rows have been read */
34
    protected $hasReachedEndOfFile = false;
35
36
    /** @var string Defines the character used to delimit fields (one character only) */
37
    protected $fieldDelimiter;
38
39
    /** @var string Defines the character used to enclose fields (one character only) */
40
    protected $fieldEnclosure;
41
42
    /** @var string Encoding of the CSV file to be read */
43
    protected $encoding;
44
45
    /** @var bool Whether empty rows should be returned or skipped */
46
    protected $shouldPreserveEmptyRows;
47
48
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
49
    protected $encodingHelper;
50
51
    /** @var \Box\Spout\Reader\CSV\Creator\InternalEntityFactory Factory to create entities */
52
    protected $entityFactory;
53
54
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
55
    protected $globalFunctionsHelper;
56
57
    /**
58
     * @param resource $filePointer Pointer to the CSV file to read
59
     * @param OptionsManagerInterface $optionsManager
60
     * @param EncodingHelper $encodingHelper
61
     * @param InternalEntityFactory $entityFactory
62
     * @param GlobalFunctionsHelper $globalFunctionsHelper
63
     */
64 27
    public function __construct(
65
        $filePointer,
66
        OptionsManagerInterface $optionsManager,
67
        EncodingHelper $encodingHelper,
68
        InternalEntityFactory $entityFactory,
69
        GlobalFunctionsHelper $globalFunctionsHelper
70
    ) {
71 27
        $this->filePointer = $filePointer;
72 27
        $this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER);
73 27
        $this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE);
74 27
        $this->encoding = $optionsManager->getOption(Options::ENCODING);
75 27
        $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
76 27
        $this->encodingHelper = $encodingHelper;
77 27
        $this->entityFactory = $entityFactory;
78 27
        $this->globalFunctionsHelper = $globalFunctionsHelper;
79 27
    }
80
81
    /**
82
     * Rewind the Iterator to the first element
83
     * @see http://php.net/manual/en/iterator.rewind.php
84
     *
85
     * @return void
86
     */
87 26
    public function rewind()
88
    {
89 26
        $this->rewindAndSkipBom();
90
91 26
        $this->numReadRows = 0;
92 26
        $this->rowBuffer = null;
93
94 26
        $this->next();
95 26
    }
96
97
    /**
98
     * This rewinds and skips the BOM if inserted at the beginning of the file
99
     * by moving the file pointer after it, so that it is not read.
100
     *
101
     * @return void
102
     */
103 26
    protected function rewindAndSkipBom()
104
    {
105 26
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
106
107
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
108 26
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
109 26
    }
110
111
    /**
112
     * Checks if current position is valid
113
     * @see http://php.net/manual/en/iterator.valid.php
114
     *
115
     * @return bool
116
     */
117 26
    public function valid()
118
    {
119 26
        return ($this->filePointer && !$this->hasReachedEndOfFile);
120
    }
121
122
    /**
123
     * Move forward to next element. Reads data for the next unprocessed row.
124
     * @see http://php.net/manual/en/iterator.next.php
125
     *
126
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
127
     * @return void
128
     */
129 26
    public function next()
130
    {
131 26
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
132
133 26
        if (!$this->hasReachedEndOfFile) {
134 26
            $this->readDataForNextRow();
135
        }
136 26
    }
137
138
    /**
139
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
140
     * @return void
141
     */
142 26
    protected function readDataForNextRow()
143
    {
144
        do {
145 26
            $rowData = $this->getNextUTF8EncodedRow();
146 26
        } while ($this->shouldReadNextRow($rowData));
147
148 26
        if ($rowData !== false) {
149
            // str_replace will replace NULL values by empty strings
150 24
            $rowDataBufferAsArray = str_replace(null, null, $rowData);
151 24
            $this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray);
152 24
            $this->numReadRows++;
153
        } else {
154
            // If we reach this point, it means end of file was reached.
155
            // This happens when the last lines are empty lines.
156 9
            $this->hasReachedEndOfFile = true;
157
        }
158 26
    }
159
160
    /**
161
     * @param array|bool $currentRowData
162
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
163
     */
164 26
    protected function shouldReadNextRow($currentRowData)
165
    {
166 26
        $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
167 26
        $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
168 26
        $isEmptyLine = $this->isEmptyLine($currentRowData);
169
170
        return (
171 26
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
172 26
            (!$this->shouldPreserveEmptyRows && $isEmptyLine)
173
        );
174
    }
175
176
    /**
177
     * Returns the next row, converted if necessary to UTF-8.
178
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
179
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
180
     *
181
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
182
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
183
     */
184 26
    protected function getNextUTF8EncodedRow()
185
    {
186 26
        $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
187 26
        if ($encodedRowData === false) {
188 9
            return false;
189
        }
190
191 25
        foreach ($encodedRowData as $cellIndex => $cellValue) {
192 25
            switch ($this->encoding) {
193 25
                case EncodingHelper::ENCODING_UTF16_LE:
194 22
                case EncodingHelper::ENCODING_UTF32_LE:
195
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
196 4
                    $cellValue = ltrim($cellValue);
197 4
                    break;
198
199 21
                case EncodingHelper::ENCODING_UTF16_BE:
200 20
                case EncodingHelper::ENCODING_UTF32_BE:
201
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
202 2
                    $cellValue = rtrim($cellValue);
203 2
                    break;
204
            }
205
206 25
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
207
        }
208
209 25
        return $encodedRowData;
210
    }
211
212
    /**
213
     * @param array|bool $lineData Array containing the cells value for the line
214
     * @return bool Whether the given line is empty
215
     */
216 26
    protected function isEmptyLine($lineData)
217
    {
218 26
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
219
    }
220
221
    /**
222
     * Return the current element from the buffer
223
     * @see http://php.net/manual/en/iterator.current.php
224
     *
225
     * @return Row|null
226
     */
227 24
    public function current()
228
    {
229 24
        return $this->rowBuffer;
230
    }
231
232
    /**
233
     * Return the key of the current element
234
     * @see http://php.net/manual/en/iterator.key.php
235
     *
236
     * @return int
237
     */
238 18
    public function key()
239
    {
240 18
        return $this->numReadRows;
241
    }
242
243
    /**
244
     * Cleans up what was created to iterate over the object.
245
     *
246
     * @return void
247
     */
248
    public function end()
249
    {
250
        // do nothing
251
    }
252
}
253