1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Box\Spout\Reader\CSV; |
4
|
|
|
|
5
|
|
|
use Box\Spout\Reader\IteratorInterface; |
6
|
|
|
use Box\Spout\Common\Helper\EncodingHelper; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* Class RowIterator |
10
|
|
|
* Iterate over CSV rows. |
11
|
|
|
* |
12
|
|
|
* @package Box\Spout\Reader\CSV |
13
|
|
|
*/ |
14
|
|
|
class RowIterator implements IteratorInterface |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* If no value is given to stream_get_line(), it defaults to 8192 (which may be too low). |
18
|
|
|
* Alignement with other functions like fgets() is discussed here: https://bugs.php.net/bug.php?id=48421 |
19
|
|
|
*/ |
20
|
|
|
const MAX_READ_BYTES_PER_LINE = 32768; |
21
|
|
|
|
22
|
|
|
/** @var resource Pointer to the CSV file to read */ |
23
|
|
|
protected $filePointer; |
24
|
|
|
|
25
|
|
|
/** @var int Number of read rows */ |
26
|
|
|
protected $numReadRows = 0; |
27
|
|
|
|
28
|
|
|
/** @var array|null Buffer used to store the row data, while checking if there are more rows to read */ |
29
|
|
|
protected $rowDataBuffer = null; |
30
|
|
|
|
31
|
|
|
/** @var bool Indicates whether all rows have been read */ |
32
|
|
|
protected $hasReachedEndOfFile = false; |
33
|
|
|
|
34
|
|
|
/** @var string Defines the character used to delimit fields (one character only) */ |
35
|
|
|
protected $fieldDelimiter; |
36
|
|
|
|
37
|
|
|
/** @var string Defines the character used to enclose fields (one character only) */ |
38
|
|
|
protected $fieldEnclosure; |
39
|
|
|
|
40
|
|
|
/** @var string Encoding of the CSV file to be read */ |
41
|
|
|
protected $encoding; |
42
|
|
|
|
43
|
|
|
/** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ |
44
|
|
|
protected $globalFunctionsHelper; |
45
|
|
|
|
46
|
|
|
/** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */ |
47
|
|
|
protected $encodingHelper; |
48
|
|
|
|
49
|
|
|
/** @var string End of line delimiter, encoded using the same encoding as the CSV */ |
50
|
|
|
protected $encodedEOLDelimiter; |
51
|
|
|
|
52
|
|
|
/** @var string End of line delimiter, given by the user as input. */ |
53
|
|
|
protected $inputEOLDelimiter; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* @param resource $filePointer Pointer to the CSV file to read |
57
|
|
|
* @param string $fieldDelimiter Character that delimits fields |
58
|
|
|
* @param string $fieldEnclosure Character that enclose fields |
59
|
|
|
* @param string $encoding Encoding of the CSV file to be read |
60
|
|
|
* @param \Box\Spout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper |
61
|
|
|
*/ |
62
|
66 |
|
public function __construct($filePointer, $fieldDelimiter, $fieldEnclosure, $encoding, $endOfLineDelimiter, $globalFunctionsHelper) |
63
|
|
|
{ |
64
|
66 |
|
$this->filePointer = $filePointer; |
65
|
66 |
|
$this->fieldDelimiter = $fieldDelimiter; |
66
|
66 |
|
$this->fieldEnclosure = $fieldEnclosure; |
67
|
66 |
|
$this->encoding = $encoding; |
68
|
66 |
|
$this->inputEOLDelimiter = $endOfLineDelimiter; |
69
|
66 |
|
$this->globalFunctionsHelper = $globalFunctionsHelper; |
70
|
|
|
|
71
|
66 |
|
$this->encodingHelper = new EncodingHelper($globalFunctionsHelper); |
72
|
66 |
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Rewind the Iterator to the first element |
76
|
|
|
* @link http://php.net/manual/en/iterator.rewind.php |
77
|
|
|
* |
78
|
|
|
* @return void |
79
|
|
|
*/ |
80
|
66 |
|
public function rewind() |
81
|
|
|
{ |
82
|
66 |
|
$this->rewindAndSkipBom(); |
83
|
|
|
|
84
|
66 |
|
$this->numReadRows = 0; |
85
|
66 |
|
$this->rowDataBuffer = null; |
86
|
|
|
|
87
|
66 |
|
$this->next(); |
88
|
66 |
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* This rewinds and skips the BOM if inserted at the beginning of the file |
92
|
|
|
* by moving the file pointer after it, so that it is not read. |
93
|
|
|
* |
94
|
|
|
* @return void |
95
|
|
|
*/ |
96
|
66 |
|
protected function rewindAndSkipBom() |
97
|
|
|
{ |
98
|
66 |
|
$byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding); |
99
|
|
|
|
100
|
|
|
// sets the cursor after the BOM (0 means no BOM, so rewind it) |
101
|
66 |
|
$this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom); |
102
|
66 |
|
} |
103
|
|
|
|
104
|
|
|
/** |
105
|
|
|
* Checks if current position is valid |
106
|
|
|
* @link http://php.net/manual/en/iterator.valid.php |
107
|
|
|
* |
108
|
|
|
* @return boolean |
109
|
|
|
*/ |
110
|
66 |
|
public function valid() |
111
|
|
|
{ |
112
|
66 |
|
return ($this->filePointer && !$this->hasReachedEndOfFile); |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
/** |
116
|
|
|
* Move forward to next element. Empty rows are skipped. |
117
|
|
|
* @link http://php.net/manual/en/iterator.next.php |
118
|
|
|
* |
119
|
|
|
* @return void |
120
|
|
|
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 |
121
|
|
|
*/ |
122
|
66 |
|
public function next() |
123
|
|
|
{ |
124
|
66 |
|
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); |
125
|
|
|
|
126
|
66 |
|
if ($this->hasReachedEndOfFile) { |
127
|
54 |
|
return; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
do { |
131
|
66 |
|
$lineData = $this->getNextUTF8EncodedLine(); |
132
|
66 |
|
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); |
133
|
66 |
|
} while (($lineData === false && !$hasNowReachedEndOfFile) || $this->isEmptyLine($lineData)); |
|
|
|
|
134
|
63 |
|
|
135
|
63 |
|
if ($lineData !== false) { |
136
|
66 |
|
$this->rowDataBuffer = $lineData; |
137
|
66 |
|
$this->numReadRows++; |
138
|
|
|
} else { |
139
|
66 |
|
// If we reach this point, it means end of file was reached. |
140
|
60 |
|
// This happens when the last lines are empty lines. |
141
|
60 |
|
$this->hasReachedEndOfFile = $hasNowReachedEndOfFile; |
142
|
60 |
|
} |
143
|
|
|
} |
144
|
|
|
|
145
|
9 |
|
/** |
146
|
|
|
* Returns the next line, converted if necessary to UTF-8. |
147
|
66 |
|
* As fgetcsv() does not manage correctly encoding for non UTF-8 data, |
148
|
|
|
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes) |
149
|
|
|
* |
150
|
|
|
* @return string|false The next line for the current file pointer, encoded in UTF-8 or FALSE if nothing to read |
|
|
|
|
151
|
|
|
* @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 |
152
|
|
|
*/ |
153
|
|
|
protected function getNextUTF8EncodedLine() |
154
|
|
|
{ |
155
|
|
|
$encodedLineData = fgetcsv($this->filePointer, 0, $this->fieldDelimiter, $this->fieldEnclosure); |
156
|
66 |
|
if (false === $encodedLineData) { |
157
|
|
|
return false; |
158
|
|
|
} |
159
|
66 |
|
|
160
|
66 |
|
foreach ($encodedLineData as $i => $line) { |
161
|
|
|
switch($this->encoding) { |
162
|
|
|
case EncodingHelper::ENCODING_UTF16_LE: |
163
|
66 |
|
case EncodingHelper::ENCODING_UTF32_LE: |
164
|
66 |
|
$line = ltrim($line); |
165
|
66 |
|
break; |
166
|
|
|
|
167
|
66 |
|
case EncodingHelper::ENCODING_UTF16_BE: |
168
|
|
|
case EncodingHelper::ENCODING_UTF32_BE: |
169
|
|
|
$line = rtrim($line); |
170
|
|
|
break; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
$encodedLineData[$i] = $this->encodingHelper->attemptConversionToUTF8($line, $this->encoding); |
174
|
|
|
} |
175
|
|
|
|
176
|
66 |
|
return $encodedLineData; |
177
|
|
|
} |
178
|
66 |
|
|
179
|
66 |
|
/** |
180
|
66 |
|
* Returns the end of line delimiter, encoded using the same encoding as the CSV. |
181
|
|
|
* The return value is cached. |
182
|
66 |
|
* |
183
|
|
|
* @return string |
184
|
|
|
*/ |
185
|
|
|
protected function getEncodedEOLDelimiter() |
186
|
|
|
{ |
187
|
|
|
if (!isset($this->encodedEOLDelimiter)) { |
188
|
|
|
$this->encodedEOLDelimiter = $this->encodingHelper->attemptConversionFromUTF8($this->inputEOLDelimiter, $this->encoding); |
189
|
66 |
|
} |
190
|
|
|
|
191
|
66 |
|
return $this->encodedEOLDelimiter; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
/** |
195
|
|
|
* @param array $lineData Array containing the cells value for the line |
196
|
|
|
* @return bool Whether the given line is empty |
197
|
|
|
*/ |
198
|
|
|
protected function isEmptyLine($lineData) |
199
|
|
|
{ |
200
|
60 |
|
return (is_array($lineData) && count($lineData) === 1 && $lineData[0] === null); |
201
|
|
|
} |
202
|
60 |
|
|
203
|
|
|
/** |
204
|
|
|
* Return the current element from the buffer |
205
|
|
|
* @link http://php.net/manual/en/iterator.current.php |
206
|
|
|
* |
207
|
|
|
* @return array|null |
208
|
|
|
*/ |
209
|
|
|
public function current() |
210
|
|
|
{ |
211
|
39 |
|
return $this->rowDataBuffer; |
212
|
|
|
} |
213
|
39 |
|
|
214
|
|
|
/** |
215
|
|
|
* Return the key of the current element |
216
|
|
|
* @link http://php.net/manual/en/iterator.key.php |
217
|
|
|
* |
218
|
|
|
* @return int |
219
|
|
|
*/ |
220
|
|
|
public function key() |
221
|
|
|
{ |
222
|
|
|
return $this->numReadRows; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* Cleans up what was created to iterate over the object. |
227
|
|
|
* |
228
|
|
|
* @return void |
229
|
|
|
*/ |
230
|
|
|
public function end() |
231
|
|
|
{ |
232
|
|
|
// do nothing |
233
|
|
|
} |
234
|
|
|
} |
235
|
|
|
|
This check looks for type mismatches where the missing type is
false
. This is usually indicative of an error condtion.Consider the follow example
This function either returns a new
DateTime
object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returnedfalse
before passing on the value to another function or method that may not be able to handle afalse
.