Completed
Pull Request — master (#366)
by Alexander
02:25
created

RowIterator   A

Complexity

Total Complexity 29

Size/Duplication

Total Lines 248
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Test Coverage

Coverage 90.79%

Importance

Changes 2
Bugs 1 Features 0
Metric Value
wmc 29
lcom 1
cbo 3
dl 0
loc 248
ccs 69
cts 76
cp 0.9079
rs 10
c 2
b 1
f 0

13 Methods

Rating   Name   Duplication   Size   Complexity  
A rewind() 0 9 1
A rewindAndSkipBom() 0 7 1
A valid() 0 4 2
A next() 0 8 2
A readDataForNextRow() 0 16 3
A shouldReadNextRow() 0 11 4
C getNextUTF8EncodedRow() 0 27 7
A getEncodedEOLDelimiter() 0 8 2
A isEmptyLine() 0 4 3
A current() 0 4 1
A key() 0 4 1
A end() 0 4 1
A __construct() 0 13 1
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * @var int Number of bytes to read
18
     */
19
    protected $maxReadBytesPerLine;
20
21
    /** @var resource Pointer to the CSV file to read */
22
    protected $filePointer;
23
24
    /** @var int Number of read rows */
25
    protected $numReadRows = 0;
26
27
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
28
    protected $rowDataBuffer = null;
29
30
    /** @var bool Indicates whether all rows have been read */
31
    protected $hasReachedEndOfFile = false;
32
33
    /** @var string Defines the character used to delimit fields (one character only) */
34
    protected $fieldDelimiter;
35
36
    /** @var string Defines the character used to enclose fields (one character only) */
37
    protected $fieldEnclosure;
38
39
    /** @var string Encoding of the CSV file to be read */
40
    protected $encoding;
41
42
    /** @var string End of line delimiter, given by the user as input. */
43
    protected $inputEOLDelimiter;
44
45
    /** @var bool Whether empty rows should be returned or skipped */
46
    protected $shouldPreserveEmptyRows;
47
48
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
49
    protected $globalFunctionsHelper;
50
51
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
52
    protected $encodingHelper;
53
54
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
55
    protected $encodedEOLDelimiter;
56
57
    /**
58
     * @param resource $filePointer Pointer to the CSV file to read
59
     * @param \Box\Spout\Reader\CSV\ReaderOptions $options
60
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
61
     */
62 81
    public function __construct($filePointer, $options, $globalFunctionsHelper)
63
    {
64 81
        $this->filePointer = $filePointer;
65 81
        $this->fieldDelimiter = $options->getFieldDelimiter();
66 81
        $this->fieldEnclosure = $options->getFieldEnclosure();
67 81
        $this->encoding = $options->getEncoding();
68 81
        $this->inputEOLDelimiter = $options->getEndOfLineCharacter();
69 81
        $this->maxReadBytesPerLine = $options->getMaxReadBytesPerLine();
70 81
        $this->shouldPreserveEmptyRows = $options->shouldPreserveEmptyRows();
71 81
        $this->globalFunctionsHelper = $globalFunctionsHelper;
72
73 81
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
74 81
    }
75
76
    /**
77
     * Rewind the Iterator to the first element
78
     * @link http://php.net/manual/en/iterator.rewind.php
79
     *
80
     * @return void
81
     */
82 81
    public function rewind()
83
    {
84 81
        $this->rewindAndSkipBom();
85
86 81
        $this->numReadRows = 0;
87 81
        $this->rowDataBuffer = null;
88
89 81
        $this->next();
90 81
    }
91
92
    /**
93
     * This rewinds and skips the BOM if inserted at the beginning of the file
94
     * by moving the file pointer after it, so that it is not read.
95
     *
96
     * @return void
97
     */
98 81
    protected function rewindAndSkipBom()
99
    {
100 81
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
101
102
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
103 81
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
104 81
    }
105
106
    /**
107
     * Checks if current position is valid
108
     * @link http://php.net/manual/en/iterator.valid.php
109
     *
110
     * @return bool
111
     */
112 81
    public function valid()
113
    {
114 81
        return ($this->filePointer && !$this->hasReachedEndOfFile);
115
    }
116
117
    /**
118
     * Move forward to next element. Reads data for the next unprocessed row.
119
     * @link http://php.net/manual/en/iterator.next.php
120
     *
121
     * @return void
122
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
123
     */
124 81
    public function next()
125
    {
126 81
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
127
128 81
        if (!$this->hasReachedEndOfFile) {
129 81
            $this->readDataForNextRow();
130 81
        }
131 81
    }
132
133
    /**
134
     * @return void
135
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
136
     */
137 81
    protected function readDataForNextRow()
138
    {
139
        do {
140 81
            $rowData = $this->getNextUTF8EncodedRow();
141 81
        } while ($this->shouldReadNextRow($rowData));
142
143 81
        if ($rowData !== false) {
144
            // str_replace will replace NULL values by empty strings
145 75
            $this->rowDataBuffer = str_replace(null, null, $rowData);
146 75
            $this->numReadRows++;
147 75
        } else {
148
            // If we reach this point, it means end of file was reached.
149
            // This happens when the last lines are empty lines.
150 27
            $this->hasReachedEndOfFile = true;
151
        }
152 81
    }
153
154
    /**
155
     * @param array|bool $currentRowData
156
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
157
     */
158 81
    protected function shouldReadNextRow($currentRowData)
159
    {
160 81
        $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
161 81
        $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
162 81
        $isEmptyLine = $this->isEmptyLine($currentRowData);
163
164
        return (
165 81
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
166 81
            (!$this->shouldPreserveEmptyRows && $isEmptyLine)
167 81
        );
168
    }
169
170
    /**
171
     * Returns the next row, converted if necessary to UTF-8.
172
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
173
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
174
     *
175
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
176
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
177
     */
178 81
    protected function getNextUTF8EncodedRow()
179
    {
180 81
        $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, $this->maxReadBytesPerLine, $this->fieldDelimiter, $this->fieldEnclosure);
181 81
        if ($encodedRowData === false) {
182 27
            return false;
183
        }
184
185 78
        foreach ($encodedRowData as $cellIndex => $cellValue) {
186 78
            switch($this->encoding) {
187 78
                case EncodingHelper::ENCODING_UTF16_LE:
188 78
                case EncodingHelper::ENCODING_UTF32_LE:
189
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
190 12
                    $cellValue = ltrim($cellValue);
191 12
                    break;
192
193 66
                case EncodingHelper::ENCODING_UTF16_BE:
194 66
                case EncodingHelper::ENCODING_UTF32_BE:
195
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
196 6
                    $cellValue = rtrim($cellValue);
197 6
                    break;
198 78
            }
199
200 78
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
201 78
        }
202
203 78
        return $encodedRowData;
204
    }
205
206
    /**
207
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
208
     * The return value is cached.
209
     *
210
     * @return string
211
     */
212
    protected function getEncodedEOLDelimiter()
213
    {
214
        if (!isset($this->encodedEOLDelimiter)) {
215
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
216
        }
217
218
        return $this->encodedEOLDelimiter;
219
    }
220
221
    /**
222
     * @param array|bool $lineData Array containing the cells value for the line
223
     * @return bool Whether the given line is empty
224
     */
225 81
    protected function isEmptyLine($lineData)
226
    {
227 81
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
228
    }
229
230
    /**
231
     * Return the current element from the buffer
232
     * @link http://php.net/manual/en/iterator.current.php
233
     *
234
     * @return array|null
235
     */
236 75
    public function current()
237
    {
238 75
        return $this->rowDataBuffer;
239
    }
240
241
    /**
242
     * Return the key of the current element
243
     * @link http://php.net/manual/en/iterator.key.php
244
     *
245
     * @return int
246
     */
247 57
    public function key()
248
    {
249 57
        return $this->numReadRows;
250
    }
251
252
    /**
253
     * Cleans up what was created to iterate over the object.
254
     *
255
     * @return void
256
     */
257
    public function end()
258
    {
259
        // do nothing
260
    }
261
}
262