Completed
Pull Request — master (#187)
by Marie
03:22
created

RowIterator::getNextUTF8EncodedLine()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 2

Importance

Changes 3
Bugs 1 Features 0
Metric Value
c 3
b 1
f 0
dl 0
loc 13
rs 9.4285
ccs 7
cts 7
cp 1
cc 2
eloc 7
nc 2
nop 0
crap 2
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
18
     * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
19
     */
20
    const MAX_READ_BYTES_PER_LINE = 32768;
21
22
    /** @var resource Pointer to the CSV file to read */
23
    protected $filePointer;
24
25
    /** @var int Number of read rows */
26
    protected $numReadRows = 0;
27
28
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
29
    protected $rowDataBuffer = null;
30
31
    /** @var bool Indicates whether all rows have been read */
32
    protected $hasReachedEndOfFile = false;
33
34
    /** @var string Defines the character used to delimit fields (one character only) */
35
    protected $fieldDelimiter;
36
37
    /** @var string Defines the character used to enclose fields (one character only) */
38
    protected $fieldEnclosure;
39
40
    /** @var string Encoding of the CSV file to be read */
41
    protected $encoding;
42
43
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
44
    protected $globalFunctionsHelper;
45
46
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
47
    protected $encodingHelper;
48
49
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
50
    protected $encodedEOLDelimiter;
51
52
    /** @var string End of line delimiter, given by the user as input. */
53
    protected $inputEOLDelimiter;
54
55
    /**
56
     * @param resource $filePointer Pointer to the CSV file to read
57
     * @param string $fieldDelimiter Character that delimits fields
58
     * @param string $fieldEnclosure Character that enclose fields
59
     * @param string $encoding Encoding of the CSV file to be read
60
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
61
     */
62 78
    public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
63
    {
64 78
        $this->filePointer = $filePointer;
65 78
        $this->fieldDelimiter = $fieldDelimiter;
66 78
        $this->fieldEnclosure = $fieldEnclosure;
67 78
        $this->encoding = $encoding;
68 78
        $this->inputEOLDelimiter = $endOfLineDelimiter;
69 78
        $this->globalFunctionsHelper = $globalFunctionsHelper;
70
71 78
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
72 78
    }
73
74
    /**
75
     * Rewind the Iterator to the first element
76
     * @link http://php.net/manual/en/iterator.rewind.php
77
     *
78
     * @return void
79
     */
80 78
    public function rewind()
81
    {
82 78
        $this->rewindAndSkipBom();
83
84 78
        $this->numReadRows = 0;
85 78
        $this->rowDataBuffer = null;
86
87 78
        $this->next();
88 78
    }
89
90
    /**
91
     * This rewinds and skips the BOM if inserted at the beginning of the file
92
     * by moving the file pointer after it, so that it is not read.
93
     *
94
     * @return void
95
     */
96 78
    protected function rewindAndSkipBom()
97
    {
98 78
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
99
100
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
101 78
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
102 78
    }
103
104
    /**
105
     * Checks if current position is valid
106
     * @link http://php.net/manual/en/iterator.valid.php
107
     *
108
     * @return boolean
109
     */
110 78
    public function valid()
111
    {
112 78
        return ($this->filePointer && !$this->hasReachedEndOfFile);
113
    }
114
115
    /**
116
     * Move forward to next element. Empty rows are skipped.
117
     * @link http://php.net/manual/en/iterator.next.php
118
     *
119
     * @return void
120
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
121
     */
122 78
    public function next()
123
    {
124 78
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
125
126 78
        if ($this->hasReachedEndOfFile) {
127 57
            return;
128
        }
129
130
        do {
131 78
            $rowData = $this->getNextUTF8EncodedRow();
132 78
            $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
133 78
        } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
0 ignored issues
show
Security Bug introduced by
It seems like $rowData defined by $this->getNextUTF8EncodedRow() on line 131 can also be of type false; however, Box\Spout\Reader\CSV\RowIterator::isEmptyLine() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
134
135 78
        if ($rowData !== false) {
136 72
            $this->rowDataBuffer = $rowData;
137 72
            $this->numReadRows++;
138 72
        } else {
139
            // If we reach this point, it means end of file was reached.
140
            // This happens when the last lines are empty lines.
141 18
            $this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
142
        }
143 78
    }
144
145
    /**
146
     * Returns the next row, converted if necessary to UTF-8.
147
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
148
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
149
     *
150
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
151
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
152
     */
153 78
    protected function getNextUTF8EncodedRow()
154
    {
155 78
        $encodedLineData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
156 78
        if (false === $encodedLineData) {
157 18
            return false;
158
        }
159
160 75
        foreach ($encodedLineData as $i => $line) {
161 75
            switch($this->encoding) {
162 75
                case EncodingHelper::ENCODING_UTF16_LE:
163 75
                case EncodingHelper::ENCODING_UTF32_LE:
164
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
165 12
                    $line = ltrim($line);
166 12
                    break;
167
168 63
                case EncodingHelper::ENCODING_UTF16_BE:
169 63
                case EncodingHelper::ENCODING_UTF32_BE:
170
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
171 6
                    $line = rtrim($line);
172 6
                    break;
173 75
            }
174
175 75
            $encodedLineData[$i] = $this->encodingHelper->attemptConversionToUTF8($line, $this->encoding);
176 75
        }
177
178 75
        return $encodedLineData;
179
    }
180
181
    /**
182
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
183
     * The return value is cached.
184
     *
185
     * @return string
186
     */
187
    protected function getEncodedEOLDelimiter()
188
    {
189
        if (!isset($this->encodedEOLDelimiter)) {
190
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
191
        }
192
193
        return $this->encodedEOLDelimiter;
194
    }
195
196
    /**
197
     * @param array $lineData Array containing the cells value for the line
198
     * @return bool Whether the given line is empty
199
     */
200 78
    protected function isEmptyLine($lineData)
201
    {
202 78
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
203
    }
204
205
    /**
206
     * Return the current element from the buffer
207
     * @link http://php.net/manual/en/iterator.current.php
208
     *
209
     * @return array|null
210
     */
211 72
    public function current()
212
    {
213 72
        return $this->rowDataBuffer;
214
    }
215
216
    /**
217
     * Return the key of the current element
218
     * @link http://php.net/manual/en/iterator.key.php
219
     *
220
     * @return int
221
     */
222 48
    public function key()
223
    {
224 48
        return $this->numReadRows;
225
    }
226
227
    /**
228
     * Cleans up what was created to iterate over the object.
229
     *
230
     * @return void
231
     */
232
    public function end()
233
    {
234
        // do nothing
235
    }
236
}
237