Completed
Pull Request — master (#331)
by Adrien
02:57
created

RowIterator::current()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Box\Spout\Reader\CSV;
4
5
use Box\Spout\Reader\IteratorInterface;
6
use Box\Spout\Common\Helper\EncodingHelper;
7
8
/**
9
 * Class RowIterator
10
 * Iterate over CSV rows.
11
 *
12
 * @package Box\Spout\Reader\CSV
13
 */
14
class RowIterator implements IteratorInterface
15
{
16
    /**
17
     * If no value is given to fgetcsv(), it defaults to 8192 (which may be too low).
18
     * Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421
19
     */
20
    const MAX_READ_BYTES_PER_LINE = 32768;
21
22
    /** @var resource Pointer to the CSV file to read */
23
    protected $filePointer;
24
25
    /** @var int Number of read rows */
26
    protected $numReadRows = 0;
27
28
    /** @var array|null Buffer used to store the row data, while checking if there are more rows to read */
29
    protected $rowDataBuffer = null;
30
31
    /** @var bool Indicates whether all rows have been read */
32
    protected $hasReachedEndOfFile = false;
33
34
    /** @var string Defines the character used to delimit fields (one character only) */
35
    protected $fieldDelimiter;
36
37
    /** @var string Defines the character used to enclose fields (one character only) */
38
    protected $fieldEnclosure;
39
40
    /** @var string Encoding of the CSV file to be read */
41
    protected $encoding;
42
43
    /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
44
    protected $globalFunctionsHelper;
45
46
    /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */
47
    protected $encodingHelper;
48
49
    /** @var string End of line delimiter, encoded using the same encoding as the CSV */
50
    protected $encodedEOLDelimiter;
51
52
    /** @var string End of line delimiter, given by the user as input. */
53
    protected $inputEOLDelimiter;
54
55
    /** @var bool Whether empty rows should be returned or skipped */
56
    protected $shouldPreserveEmptyRows;
57
58
    /**
59
     * @param resource $filePointer Pointer to the CSV file to read
60
     * @param string $fieldDelimiter Character that delimits fields
61
     * @param string $fieldEnclosure Character that enclose fields
62
     * @param string $endOfLineDelimiter End of line delimiter
63
     * @param string $encoding Encoding of the CSV file to be read
64
     * @param bool $shouldPreserveEmptyRows Whether empty rows should be returned or skipped
65
     * @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
66
     */
67 78
    public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $endOfLineDelimiter, $encoding, $shouldPreserveEmptyRows, $globalFunctionsHelper)
68
    {
69 78
        $this->filePointer = $filePointer;
70 78
        $this->fieldDelimiter = $fieldDelimiter;
71 78
        $this->fieldEnclosure = $fieldEnclosure;
72 78
        $this->encoding = $encoding;
73 78
        $this->inputEOLDelimiter = $endOfLineDelimiter;
74 78
        $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows;
75 78
        $this->globalFunctionsHelper = $globalFunctionsHelper;
76
77 78
        $this->encodingHelper = new EncodingHelper($globalFunctionsHelper);
78 78
    }
79
80
    /**
81
     * Rewind the Iterator to the first element
82
     * @link http://php.net/manual/en/iterator.rewind.php
83
     *
84
     * @return void
85
     */
86 78
    public function rewind()
87
    {
88 78
        $this->rewindAndSkipBom();
89
90 78
        $this->numReadRows = 0;
91 78
        $this->rowDataBuffer = null;
92
93 78
        $this->next();
94 78
    }
95
96
    /**
97
     * This rewinds and skips the BOM if inserted at the beginning of the file
98
     * by moving the file pointer after it, so that it is not read.
99
     *
100
     * @return void
101
     */
102 78
    protected function rewindAndSkipBom()
103
    {
104 78
        $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
105
106
        // sets the cursor after the BOM (0 means no BOM, so rewind it)
107 78
        $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
108 78
    }
109
110
    /**
111
     * Checks if current position is valid
112
     * @link http://php.net/manual/en/iterator.valid.php
113
     *
114
     * @return bool
115
     */
116 78
    public function valid()
117
    {
118 78
        return ($this->filePointer && !$this->hasReachedEndOfFile);
119
    }
120
121
    /**
122
     * Move forward to next element. Reads data for the next unprocessed row.
123
     * @link http://php.net/manual/en/iterator.next.php
124
     *
125
     * @return void
126
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
127
     */
128 78
    public function next()
129
    {
130 78
        $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
131
132 78
        if (!$this->hasReachedEndOfFile) {
133 78
            $this->readDataForNextRow();
134 78
        }
135 78
    }
136
137
    /**
138
     * @return void
139
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
140
     */
141 78
    protected function readDataForNextRow()
142
    {
143
        do {
144 78
            $rowData = $this->getNextUTF8EncodedRow();
145 78
        } while ($this->shouldReadNextRow($rowData));
0 ignored issues
show
Security Bug introduced by
It seems like $rowData defined by $this->getNextUTF8EncodedRow() on line 144 can also be of type false; however, Box\Spout\Reader\CSV\Row...or::shouldReadNextRow() does only seem to accept array, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
146
147 78
        if ($rowData !== false) {
148
            // str_replace will replace NULL values by empty strings
149 72
            $this->rowDataBuffer = str_replace(null, null, $rowData);
150 72
            $this->numReadRows++;
151 72
        } else {
152
            // If we reach this point, it means end of file was reached.
153
            // This happens when the last lines are empty lines.
154 24
            $this->hasReachedEndOfFile = true;
155
        }
156 78
    }
157
158
    /**
159
     * @param array $currentRowData
160
     * @return bool Whether the data for the current row can be returned or if we need to keep reading
161
     */
162 78
    protected function shouldReadNextRow($currentRowData)
163
    {
164 78
        $hasSuccessfullyFetchedRowData = ($currentRowData !== false);
165 78
        $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
166 78
        $isEmptyLine = $this->isEmptyLine($currentRowData);
167
168
        return (
169 78
            (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) ||
170 78
            (!$this->shouldPreserveEmptyRows && $isEmptyLine)
171 78
        );
172
    }
173
174
    /**
175
     * Returns the next row, converted if necessary to UTF-8.
176
     * As fgetcsv() does not manage correctly encoding for non UTF-8 data,
177
     * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes)
178
     *
179
     * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
180
     * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
181
     */
182 78
    protected function getNextUTF8EncodedRow()
183
    {
184 78
        $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
185 78
        if ($encodedRowData === false) {
186 24
            return false;
187
        }
188
189 75
        foreach ($encodedRowData as $cellIndex => $cellValue) {
190 75
            switch($this->encoding) {
191 75
                case EncodingHelper::ENCODING_UTF16_LE:
192 75
                case EncodingHelper::ENCODING_UTF32_LE:
193
                    // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
194 12
                    $cellValue = ltrim($cellValue);
195 12
                    break;
196
197 63
                case EncodingHelper::ENCODING_UTF16_BE:
198 63
                case EncodingHelper::ENCODING_UTF32_BE:
199
                    // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
200 6
                    $cellValue = rtrim($cellValue);
201 6
                    break;
202 75
            }
203
204 75
            $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
205 75
        }
206
207 75
        return $encodedRowData;
208
    }
209
210
    /**
211
     * Returns the end of line delimiter, encoded using the same encoding as the CSV.
212
     * The return value is cached.
213
     *
214
     * @return string
1 ignored issue
show
Documentation introduced by
Should the return type not be string|boolean?

This check compares the return type specified in the @return annotation of a function or method doc comment with the types returned by the function and raises an issue if they mismatch.

Loading history...
215
     */
216
    protected function getEncodedEOLDelimiter()
217
    {
218
        if (!isset($this->encodedEOLDelimiter)) {
219
            $this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding);
1 ignored issue
show
Documentation Bug introduced by
It seems like $this->encodingHelper->a...miter, $this->encoding) can also be of type boolean. However, the property $encodedEOLDelimiter is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
220
        }
221
222
        return $this->encodedEOLDelimiter;
223
    }
224
225
    /**
226
     * @param array $lineData Array containing the cells value for the line
227
     * @return bool Whether the given line is empty
228
     */
229 78
    protected function isEmptyLine($lineData)
230
    {
231 78
        return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null);
232
    }
233
234
    /**
235
     * Return the current element from the buffer
236
     * @link http://php.net/manual/en/iterator.current.php
237
     *
238
     * @return array|null
239
     */
240 72
    public function current()
241
    {
242 72
        return $this->rowDataBuffer;
243
    }
244
245
    /**
246
     * Return the key of the current element
247
     * @link http://php.net/manual/en/iterator.key.php
248
     *
249
     * @return int
250
     */
251 54
    public function key()
252
    {
253 54
        return $this->numReadRows;
254
    }
255
256
    /**
257
     * Cleans up what was created to iterate over the object.
258
     *
259
     * @return void
260
     */
261
    public function end()
262
    {
263
        // do nothing
264
    }
265
}
266