Completed
Pull Request — master (#187)
by Marie
13:06
created

RowIterator::next()   B

Complexity

Conditions 6
Paths 3

Size

Total Lines 22
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6

Importance

Changes 6
Bugs 4 Features 0
Metric Value
c 6
b 4
f 0
dl 0
loc 22
ccs 15
cts 15
cp 1
rs 8.6737
cc 6
eloc 13
nc 3
nop 0
crap 6
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
18
     * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
19
     */
20
    const MAX_READ_BYTES_PER_LINE = 32768;
21
22
    /** @var resource Pointer to the CSV file to read */
23
    protected $filePointer;
24
25
    /** @var int Number of read rows */
26
    protected $numReadRows = 0;
27
28
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
29
    protected $rowDataBuffer = null;
30
31
    /** @var bool Indicates whether all rows have been read */
32
    protected $hasReachedEndOfFile = false;
33
34
    /** @var string Defines the character used to delimit fields (one character only) */
35
    protected $fieldDelimiter;
36
37
    /** @var string Defines the character used to enclose fields (one character only) */
38
    protected $fieldEnclosure;
39
40
    /** @var string Encoding of the CSV file to be read */
41
    protected $encoding;
42
43
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
44
    protected $globalFunctionsHelper;
45
46
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
47
    protected $encodingHelper;
48
49
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
50
    protected $encodedEOLDelimiter;
51
52
    /** @var string End of line delimiter, given by the user as input. */
53
    protected $inputEOLDelimiter;
54
55
    /**
56
     * @param resource $filePointer Pointer to the CSV file to read
57
     * @param string $fieldDelimiter Character that delimits fields
58
     * @param string $fieldEnclosure Character that enclose fields
59
     * @param string $encoding Encoding of the CSV file to be read
60
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
61
     */
62 75
    public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
63
    {
64 75
        $this->filePointer = $filePointer;
65 75
        $this->fieldDelimiter = $fieldDelimiter;
66 75
        $this->fieldEnclosure = $fieldEnclosure;
67 75
        $this->encoding = $encoding;
68 75
        $this->inputEOLDelimiter = $endOfLineDelimiter;
69 75
        $this->globalFunctionsHelper = $globalFunctionsHelper;
70
71 75
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
72 75
    }
73
74
    /**
75
     * Rewind the Iterator to the first element
76
     * @link http://php.net/manual/en/iterator.rewind.php
77
     *
78
     * @return void
79
     */
80 75
    public function rewind()
81
    {
82 75
        $this->rewindAndSkipBom();
83
84 75
        $this->numReadRows = 0;
85 75
        $this->rowDataBuffer = null;
86
87 75
        $this->next();
88 75
    }
89
90
    /**
91
     * This rewinds and skips the BOM if inserted at the beginning of the file
92
     * by moving the file pointer after it, so that it is not read.
93
     *
94
     * @return void
95
     */
96 75
    protected function rewindAndSkipBom()
97
    {
98 75
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
99
100
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
101 75
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
102 75
    }
103
104
    /**
105
     * Checks if current position is valid
106
     * @link http://php.net/manual/en/iterator.valid.php
107
     *
108
     * @return boolean
109
     */
110 75
    public function valid()
111
    {
112 75
        return ($this->filePointer && !$this->hasReachedEndOfFile);
113
    }
114
115
    /**
116
     * Move forward to next element. Empty rows are skipped.
117
     * @link http://php.net/manual/en/iterator.next.php
118
     *
119
     * @return void
120
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
121
     */
122 75
    public function next()
123
    {
124 75
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
125
126 75
        if ($this->hasReachedEndOfFile) {
127 57
            return;
128
        }
129
130
        do {
131 75
            $rowData = $this->getNextUTF8EncodedRow();
132 75
            $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
133 75
        } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
0 ignored issues
show
Security Bug introduced by
It seems like $rowData defined by $this->getNextUTF8EncodedRow() on line 131 can also be of type false; however, Box\Spout\Reader\CSV\RowIterator::isEmptyLine() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
134 72
135 72
        if ($rowData !== false) {
136 75
            $this->rowDataBuffer = $rowData;
137 75
            $this->numReadRows++;
138
        } else {
139 75
            // If we reach this point, it means end of file was reached.
140 69
            // This happens when the last lines are empty lines.
141 69
            $this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
142 69
        }
143
    }
144
145 15
    /**
146
     * Returns the next row, converted if necessary to UTF-8.
147 75
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
148
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
149
     *
150
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
151
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
152
     */
153
    protected function getNextUTF8EncodedRow()
154
    {
155
        $encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
156 75
        if (false === $encodedRowData) {
157
            return false;
158
        }
159 75
160 75
        foreach ($encodedRowData as $cellIndex => $cellValue) {
161
            switch($this->encoding) {
162
                case EncodingHelper::ENCODING_UTF16_LE:
163 75
                case EncodingHelper::ENCODING_UTF32_LE:
164 75
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
165 75
                    $cellValue = ltrim($cellValue);
166
                    break;
167 75
168
                case EncodingHelper::ENCODING_UTF16_BE:
169
                case EncodingHelper::ENCODING_UTF32_BE:
170
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
171
                    $cellValue = rtrim($cellValue);
172
                    break;
173
            }
174
175
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
176 75
        }
177
178 75
        return $encodedRowData;
179 75
    }
180 75
181
    /**
182 75
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
183
     * The return value is cached.
184
     *
185
     * @return string
186
     */
187
    protected function getEncodedEOLDelimiter()
188
    {
189 75
        if (!isset($this->encodedEOLDelimiter)) {
190
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
191 75
        }
192
193
        return $this->encodedEOLDelimiter;
194
    }
195
196
    /**
197
     * @param array $lineData Array containing the cells value for the line
198
     * @return bool Whether the given line is empty
199
     */
200 69
    protected function isEmptyLine($lineData)
201
    {
202 69
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
203
    }
204
205
    /**
206
     * Return the current element from the buffer
207
     * @link http://php.net/manual/en/iterator.current.php
208
     *
209
     * @return array|null
210
     */
211 45
    public function current()
212
    {
213 45
        return $this->rowDataBuffer;
214
    }
215
216
    /**
217
     * Return the key of the current element
218
     * @link http://php.net/manual/en/iterator.key.php
219
     *
220
     * @return int
221
     */
222
    public function key()
223
    {
224
        return $this->numReadRows;
225
    }
226
227
    /**
228
     * Cleans up what was created to iterate over the object.
229
     *
230
     * @return void
231
     */
232
    public function end()
233
    {
234
        // do nothing
235
    }
236
}
237