Failed Conditions
Push — perf-tests ( 50942d...2fc93e )
by Adrien
14:53
created

RowIterator::getNextUTF8EncodedRow()   C

Complexity

Conditions 7
Paths 7

Size

Total Lines 27
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 1 Features 0
Metric Value
c 1
b 1
f 0
dl 0
loc 27
rs 6.7272
cc 7
eloc 16
nc 7
nop 0
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
18
     * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
19
     */
20
    const MAX_READ_BYTES_PER_LINE = 32768;
21
22
    /** @var resource Pointer to the CSV file to read */
23
    protected $filePointer;
24
25
    /** @var int Number of read rows */
26
    protected $numReadRows = 0;
27
28
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
29
    protected $rowDataBuffer = null;
30
31
    /** @var bool Indicates whether all rows have been read */
32
    protected $hasReachedEndOfFile = false;
33
34
    /** @var string Defines the character used to delimit fields (one character only) */
35
    protected $fieldDelimiter;
36
37
    /** @var string Defines the character used to enclose fields (one character only) */
38
    protected $fieldEnclosure;
39
40
    /** @var string Encoding of the CSV file to be read */
41
    protected $encoding;
42
43
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
44
    protected $globalFunctionsHelper;
45
46
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
47
    protected $encodingHelper;
48
49
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
50
    protected $encodedEOLDelimiter;
51
52
    /** @var string End of line delimiter, given by the user as input. */
53
    protected $inputEOLDelimiter;
54
55
    /**
56
     * @param resource $filePointer Pointer to the CSV file to read
57
     * @param string $fieldDelimiter Character that delimits fields
58
     * @param string $fieldEnclosure Character that enclose fields
59
     * @param string $encoding Encoding of the CSV file to be read
60
     * @param string $endOfLineDelimiter End of line delimiter
61
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
62
     */
63
    public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper)
64
    {
65
        $this->filePointer = $filePointer;
66
        $this->fieldDelimiter = $fieldDelimiter;
67
        $this->fieldEnclosure = $fieldEnclosure;
68
        $this->encoding = $encoding;
69
        $this->inputEOLDelimiter = $endOfLineDelimiter;
70
        $this->globalFunctionsHelper = $globalFunctionsHelper;
71
72
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
73
    }
74
75
    /**
76
     * Rewind the Iterator to the first element
77
     * @link http://php.net/manual/en/iterator.rewind.php
78
     *
79
     * @return void
80
     */
81
    public function rewind()
82
    {
83
        $this->rewindAndSkipBom();
84
85
        $this->numReadRows = 0;
86
        $this->rowDataBuffer = null;
87
88
        $this->next();
89
    }
90
91
    /**
92
     * This rewinds and skips the BOM if inserted at the beginning of the file
93
     * by moving the file pointer after it, so that it is not read.
94
     *
95
     * @return void
96
     */
97
    protected function rewindAndSkipBom()
98
    {
99
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
100
101
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
102
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
103
    }
104
105
    /**
106
     * Checks if current position is valid
107
     * @link http://php.net/manual/en/iterator.valid.php
108
     *
109
     * @return boolean
110
     */
111
    public function valid()
112
    {
113
        return ($this->filePointer && !$this->hasReachedEndOfFile);
114
    }
115
116
    /**
117
     * Move forward to next element. Empty rows are skipped.
118
     * @link http://php.net/manual/en/iterator.next.php
119
     *
120
     * @return void
121
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
122
     */
123
    public function next()
124
    {
125
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
126
127
        if ($this->hasReachedEndOfFile) {
128
            return;
129
        }
130
131
        do {
132
            $rowData = $this->getNextUTF8EncodedRow();
133
            $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
134
        } while (($rowData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($rowData));
0 ignored issues
show
Security Bug introduced by
It seems like $rowData defined by $this->getNextUTF8EncodedRow() on line 132 can also be of type false; however, Box\Spout\Reader\CSV\RowIterator::isEmptyLine() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
135
136
        if ($rowData !== false) {
137
            $this->rowDataBuffer = $rowData;
138
            $this->numReadRows++;
139
        } else {
140
            // If we reach this point, it means end of file was reached.
141
            // This happens when the last lines are empty lines.
142
            $this->hasReachedEndOfFile = $hasNowReachedEndOfFile;
143
        }
144
    }
145
146
    /**
147
     * Returns the next row, converted if necessary to UTF-8.
148
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
149
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
150
     *
151
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
152
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
153
     */
154
    protected function getNextUTF8EncodedRow()
155
    {
156
        $encodedRowData = fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
157
        if (false === $encodedRowData) {
158
            return false;
159
        }
160
161
        foreach ($encodedRowData as $cellIndex => $cellValue) {
162
            switch($this->encoding) {
163
                case EncodingHelper::ENCODING_UTF16_LE:
164
                case EncodingHelper::ENCODING_UTF32_LE:
165
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
166
                    $cellValue = ltrim($cellValue);
167
                    break;
168
169
                case EncodingHelper::ENCODING_UTF16_BE:
170
                case EncodingHelper::ENCODING_UTF32_BE:
171
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
172
                    $cellValue = rtrim($cellValue);
173
                    break;
174
            }
175
176
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
177
        }
178
179
        return $encodedRowData;
180
    }
181
182
    /**
183
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
184
     * The return value is cached.
185
     *
186
     * @return string
187
     */
188
    protected function getEncodedEOLDelimiter()
189
    {
190
        if (!isset($this->encodedEOLDelimiter)) {
191
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
192
        }
193
194
        return $this->encodedEOLDelimiter;
195
    }
196
197
    /**
198
     * @param array $lineData Array containing the cells value for the line
199
     * @return bool Whether the given line is empty
200
     */
201
    protected function isEmptyLine($lineData)
202
    {
203
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
204
    }
205
206
    /**
207
     * Return the current element from the buffer
208
     * @link http://php.net/manual/en/iterator.current.php
209
     *
210
     * @return array|null
211
     */
212
    public function current()
213
    {
214
        return $this->rowDataBuffer;
215
    }
216
217
    /**
218
     * Return the key of the current element
219
     * @link http://php.net/manual/en/iterator.key.php
220
     *
221
     * @return int
222
     */
223
    public function key()
224
    {
225
        return $this->numReadRows;
226
    }
227
228
    /**
229
     * Cleans up what was created to iterate over the object.
230
     *
231
     * @return void
232
     */
233
    public function end()
234
    {
235
        // do nothing
236
    }
237
}
238