Completed
Pull Request — master (#187)
by Marie
11:53
created

RowIterator::next()   B

Complexity

Conditions 6
Paths 3

Size

Total Lines 22
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6

Importance

Changes 6
Bugs 4 Features 0
Metric Value
c 6
b 4
f 0
dl 0
loc 22
ccs 15
cts 15
cp 1
rs 8.6737
cc 6
eloc 13
nc 3
nop 0
crap 6
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * If no value is given to stream_get_line(), it defaults to 8192 (which may be too low).
18
     * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
19
     */
20
    const MAX_READ_BYTES_PER_LINE = 32768;
21
22
    /** @var resource Pointer to the CSV file to read */
23
    protected $filePointer;
24
25
    /** @var int Number of read rows */
26
    protected $numReadRows = 0;
27
28
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
29
    protected $rowDataBuffer = null;
30
31
    /** @var bool Indicates whether all rows have been read */
32
    protected $hasReachedEndOfFile = false;
33
34
    /** @var string Defines the character used to delimit fields (one character only) */
35
    protected $fieldDelimiter;
36
37
    /** @var string Defines the character used to enclose fields (one character only) */
38
    protected $fieldEnclosure;
39
40
    /** @var string Encoding of the CSV file to be read */
41
    protected $encoding;
42
43
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
44
    protected $globalFunctionsHelper;
45
46
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
47
    protected $encodingHelper;
48
49
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
50
    protected $encodedEOLDelimiter;
51
52
    /** @var string End of line delimiter, given by the user as input. */
53
    protected $inputEOLDelimiter;
54
55
    /**
56
     * @param resource $filePointer Pointer to the CSV file to read
57
     * @param string $fieldDelimiter Character that delimits fields
58
     * @param string $fieldEnclosure Character that enclose fields
59
     * @param string $encoding Encoding of the CSV file to be read
60
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
61
     */
62 66
    public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
63
    {
64 66
        $this->filePointer = $filePointer;
65 66
        $this->fieldDelimiter = $fieldDelimiter;
66 66
        $this->fieldEnclosure = $fieldEnclosure;
67 66
        $this->encoding = $encoding;
68 66
        $this->inputEOLDelimiter = $endOfLineDelimiter;
69 66
        $this->globalFunctionsHelper = $globalFunctionsHelper;
70
71 66
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
72 66
    }
73
74
    /**
75
     * Rewind the Iterator to the first element
76
     * @link http://php.net/manual/en/iterator.rewind.php
77
     *
78
     * @return void
79
     */
80 66
    public function rewind()
81
    {
82 66
        $this->rewindAndSkipBom();
83
84 66
        $this->numReadRows = 0;
85 66
        $this->rowDataBuffer = null;
86
87 66
        $this->next();
88 66
    }
89
90
    /**
91
     * This rewinds and skips the BOM if inserted at the beginning of the file
92
     * by moving the file pointer after it, so that it is not read.
93
     *
94
     * @return void
95
     */
96 66
    protected function rewindAndSkipBom()
97
    {
98 66
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
99
100
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
101 66
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
102 66
    }
103
104
    /**
105
     * Checks if current position is valid
106
     * @link http://php.net/manual/en/iterator.valid.php
107
     *
108
     * @return boolean
109
     */
110 66
    public function valid()
111
    {
112 66
        return ($this->filePointer && !$this->hasReachedEndOfFile);
113
    }
114
115
    /**
116
     * Move forward to next element. Empty rows are skipped.
117
     * @link http://php.net/manual/en/iterator.next.php
118
     *
119
     * @return void
120
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
121
     */
122 66
    public function next()
123
    {
124 66
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
125
126 66
        if ($this->hasReachedEndOfFile) {
127 54
            return;
128
        }
129
130
        do {
131 66
            $lineData = $this->getNextUTF8EncodedLine();
132 66
            $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
133 66
        } while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData));
0 ignored issues
show
Security Bug introduced by
It seems like $lineData defined by $this->getNextUTF8EncodedLine() on line 131 can also be of type false; however, Box\Spout\Reader\CSV\RowIterator::isEmptyLine() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
134 63
135 63
        if ($lineData !== false) {
136 66
            $this->rowDataBuffer = $lineData;
137 66
            $this->numReadRows++;
138
        } else {
139 66
            // If we reach this point, it means end of file was reached.
140 60
            // This happens when the last lines are empty lines.
141 60
            $this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
142 60
        }
143
    }
144
145 9
    /**
146
     * Returns the next line, converted if necessary to UTF-8.
147 66
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
148
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
149
     *
150
     * @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
0 ignored issues
show
Documentation introduced by
Should the return type not be false|array?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
151
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
152
     */
153
    protected function getNextUTF8EncodedLine()
154
    {
155
        $encodedLineData = fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure);
156 66
        if (false === $encodedLineData) {
157
            return false;
158
        }
159 66
160 66
        foreach ($encodedLineData as $i => $line) {
161
            switch($this->encoding) {
162
                case EncodingHelper::ENCODING_UTF16_LE:
163 66
                case EncodingHelper::ENCODING_UTF32_LE:
164 66
                    $line = ltrim($line);
165 66
                    break;
166
167 66
                case EncodingHelper::ENCODING_UTF16_BE:
168
                case EncodingHelper::ENCODING_UTF32_BE:
169
                    $line = rtrim($line);
170
                    break;
171
            }
172
173
            $encodedLineData[$i] = $this->encodingHelper->attemptConversionToUTF8($line, $this->encoding);
174
        }
175
176 66
        return $encodedLineData;
177
    }
178 66
179 66
    /**
180 66
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
181
     * The return value is cached.
182 66
     *
183
     * @return string
184
     */
185
    protected function getEncodedEOLDelimiter()
186
    {
187
        if (!isset($this->encodedEOLDelimiter)) {
188
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
189 66
        }
190
191 66
        return $this->encodedEOLDelimiter;
192
    }
193
194
    /**
195
     * @param array $lineData Array containing the cells value for the line
196
     * @return bool Whether the given line is empty
197
     */
198
    protected function isEmptyLine($lineData)
199
    {
200 60
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
201
    }
202 60
203
    /**
204
     * Return the current element from the buffer
205
     * @link http://php.net/manual/en/iterator.current.php
206
     *
207
     * @return array|null
208
     */
209
    public function current()
210
    {
211 39
        return $this->rowDataBuffer;
212
    }
213 39
214
    /**
215
     * Return the key of the current element
216
     * @link http://php.net/manual/en/iterator.key.php
217
     *
218
     * @return int
219
     */
220
    public function key()
221
    {
222
        return $this->numReadRows;
223
    }
224
225
    /**
226
     * Cleans up what was created to iterate over the object.
227
     *
228
     * @return void
229
     */
230
    public function end()
231
    {
232
        // do nothing
233
    }
234
}
235