Completed
Pull Request — develop_3.0 (#457)
by Adrien
02:34
created

RowIterator::valid()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Common\Creator\HelperFactory;
6
use Box\Spout\Reader\Common\Entity\Options;
7
use Box\Spout\Reader\CSV\Creator\EntityFactory;
8
use Box\Spout\Reader\IteratorInterface;
9
use Box\Spout\Common\Helper\EncodingHelper;
10
11
/**
12
 * Class RowIterator
13
 * Iterate over CSV rows.
14
 *
15
 * @package Box\Spout\Reader\CSV
16
 */
17
class RowIterator implements IteratorInterface
18
{
19
    /**
20
     * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
21
     */
22
    const MAX_READ_BYTES_PER_LINE = 0;
23
24
    /** @var resource Pointer to the CSV file to read */
25
    protected $filePointer;
26
27
    /** @var int Number of read rows */
28
    protected $numReadRows = 0;
29
30
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
31
    protected $rowDataBuffer = null;
32
33
    /** @var bool Indicates whether all rows have been read */
34
    protected $hasReachedEndOfFile = false;
35
36
    /** @var string Defines the character used to delimit fields (one character only) */
37
    protected $fieldDelimiter;
38
39
    /** @var string Defines the character used to enclose fields (one character only) */
40
    protected $fieldEnclosure;
41
42
    /** @var string Encoding of the CSV file to be read */
43
    protected $encoding;
44
45
    /** @var bool Whether empty rows should be returned or skipped */
46
    protected $shouldPreserveEmptyRows;
47
48
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
49
    protected $globalFunctionsHelper;
50
51
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
52
    protected $encodingHelper;
53
54
    /**
55
     * @param resource $filePointer Pointer to the CSV file to read
56
     * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager
57
     * @param \Box\Spout\Common\Helper\EncodingHelper $encodingHelper
58
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
59
     */
60 26
    public function __construct($filePointer, $optionsManager, $encodingHelper, $globalFunctionsHelper)
61
    {
62 26
        $this->filePointer = $filePointer;
63 26
        $this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER);
64 26
        $this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE);
65 26
        $this->encoding = $optionsManager->getOption(Options::ENCODING);
66 26
        $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
67 26
        $this->encodingHelper = $encodingHelper;
68 26
        $this->globalFunctionsHelper = $globalFunctionsHelper;
69 26
    }
70
71
    /**
72
     * Rewind the Iterator to the first element
73
     * @link http://php.net/manual/en/iterator.rewind.php
74
     *
75
     * @return void
76
     */
77 25
    public function rewind()
78
    {
79 25
        $this->rewindAndSkipBom();
80
81 25
        $this->numReadRows = 0;
82 25
        $this->rowDataBuffer = null;
83
84 25
        $this->next();
85 25
    }
86
87
    /**
88
     * This rewinds and skips the BOM if inserted at the beginning of the file
89
     * by moving the file pointer after it, so that it is not read.
90
     *
91
     * @return void
92
     */
93 25
    protected function rewindAndSkipBom()
94
    {
95 25
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
96
97
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
98 25
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
99 25
    }
100
101
    /**
102
     * Checks if current position is valid
103
     * @link http://php.net/manual/en/iterator.valid.php
104
     *
105
     * @return bool
106
     */
107 25
    public function valid()
108
    {
109 25
        return ($this->filePointer && !$this->hasReachedEndOfFile);
110
    }
111
112
    /**
113
     * Move forward to next element. Reads data for the next unprocessed row.
114
     * @link http://php.net/manual/en/iterator.next.php
115
     *
116
     * @return void
117
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
118
     */
119 25
    public function next()
120
    {
121 25
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
122
123 25
        if (!$this->hasReachedEndOfFile) {
124 25
            $this->readDataForNextRow();
125
        }
126 25
    }
127
128
    /**
129
     * @return void
130
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
131
     */
132 25
    protected function readDataForNextRow()
133
    {
134
        do {
135 25
            $rowData = $this->getNextUTF8EncodedRow();
136 25
        } while ($this->shouldReadNextRow($rowData));
137
138 25
        if ($rowData !== false) {
139
            // str_replace will replace NULL values by empty strings
140 23
            $this->rowDataBuffer = str_replace(null, null, $rowData);
141 23
            $this->numReadRows++;
142
        } else {
143
            // If we reach this point, it means end of file was reached.
144
            // This happens when the last lines are empty lines.
145 8
            $this->hasReachedEndOfFile = true;
146
        }
147 25
    }
148
149
    /**
150
     * @param array|bool $currentRowData
151
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
152
     */
153 25
    protected function shouldReadNextRow($currentRowData)
154
    {
155 25
        $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
156 25
        $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
157 25
        $isEmptyLine = $this->isEmptyLine($currentRowData);
158
159
        return (
160 25
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
161 25
            (!$this->shouldPreserveEmptyRows && $isEmptyLine)
162
        );
163
    }
164
165
    /**
166
     * Returns the next row, converted if necessary to UTF-8.
167
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
168
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
169
     *
170
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
171
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
172
     */
173 25
    protected function getNextUTF8EncodedRow()
174
    {
175 25
        $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
176 25
        if ($encodedRowData === false) {
177 8
            return false;
178
        }
179
180 24
        foreach ($encodedRowData as $cellIndex => $cellValue) {
181 24
            switch($this->encoding) {
182 24
                case EncodingHelper::ENCODING_UTF16_LE:
183 21
                case EncodingHelper::ENCODING_UTF32_LE:
184
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
185 4
                    $cellValue = ltrim($cellValue);
186 4
                    break;
187
188 20
                case EncodingHelper::ENCODING_UTF16_BE:
189 19
                case EncodingHelper::ENCODING_UTF32_BE:
190
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
191 2
                    $cellValue = rtrim($cellValue);
192 2
                    break;
193
            }
194
195 24
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
196
        }
197
198 24
        return $encodedRowData;
199
    }
200
201
    /**
202
     * @param array|bool $lineData Array containing the cells value for the line
203
     * @return bool Whether the given line is empty
204
     */
205 25
    protected function isEmptyLine($lineData)
206
    {
207 25
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
208
    }
209
210
    /**
211
     * Return the current element from the buffer
212
     * @link http://php.net/manual/en/iterator.current.php
213
     *
214
     * @return array|null
215
     */
216 23
    public function current()
217
    {
218 23
        return $this->rowDataBuffer;
219
    }
220
221
    /**
222
     * Return the key of the current element
223
     * @link http://php.net/manual/en/iterator.key.php
224
     *
225
     * @return int
226
     */
227 17
    public function key()
228
    {
229 17
        return $this->numReadRows;
230
    }
231
232
    /**
233
     * Cleans up what was created to iterate over the object.
234
     *
235
     * @return void
236
     */
237
    public function end()
238
    {
239
        // do nothing
240
    }
241
}
242