1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace CSanquer\ColibriCsv; |
4
|
|
|
|
5
|
|
|
use CSanquer\ColibriCsv\Utility\Transcoder; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* Csv Reader |
9
|
|
|
* |
10
|
|
|
* @author Charles SANQUER - <[email protected]> |
11
|
|
|
*/ |
12
|
|
|
class CsvReader extends AbstractCsv implements \Iterator, \Countable |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* |
16
|
|
|
* @var int |
17
|
|
|
*/ |
18
|
|
|
private $position = 0; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* |
22
|
|
|
* @var array |
23
|
|
|
*/ |
24
|
|
|
private $currentValues = []; |
25
|
|
|
|
26
|
|
|
/** |
27
|
|
|
* |
28
|
|
|
* @var string |
29
|
|
|
*/ |
30
|
|
|
protected $detectedEncoding; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* |
34
|
|
|
* Default Excel Reading configuration |
35
|
|
|
* |
36
|
|
|
* available options : |
37
|
|
|
* - delimiter : (default = ';') |
38
|
|
|
* - enclosure : (default = '"') |
39
|
|
|
* - encoding : (default = 'CP1252') |
40
|
|
|
* - eol : (default = "\r\n") |
41
|
|
|
* - escape : (default = "\\") |
42
|
|
|
* - first_row_header : (default = false) use the first CSV row as header |
43
|
|
|
* - bom : (default = false) add UTF8 BOM marker |
44
|
|
|
* - translit : (default = 'translit') iconv translit option possible values : 'translit', 'ignore', null |
45
|
|
|
* - force_encoding_detect : (default = false) |
46
|
|
|
* - skip_empty : (default = false) remove lines with empty values |
47
|
|
|
* - trim : (default = false) trim each values on each line |
48
|
|
|
* |
49
|
|
|
* N.B. : Be careful, the options 'force_encoding_detect', 'skip_empty' and 'trim' |
50
|
|
|
* decrease significantly the performances |
51
|
|
|
* |
52
|
|
|
* @param array $options Dialect Options to describe CSV file parameters |
53
|
|
|
*/ |
54
|
29 |
|
public function __construct($options = []) |
55
|
|
|
{ |
56
|
29 |
|
parent::__construct($options); |
57
|
29 |
|
$this->mode = self::MODE_READING; |
58
|
29 |
|
$this->fileHandlerMode = 'rb'; |
59
|
29 |
|
} |
60
|
|
|
|
61
|
3 |
|
protected function getCompatibleFileHanderModes() |
62
|
|
|
{ |
63
|
3 |
|
return ['rb', 'r+b', 'w+b', 'a+b', 'x+b', 'c+b']; |
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* open a csv file to read |
68
|
|
|
* |
69
|
|
|
* @param string|resource $file filename or stream resource, default = null |
70
|
|
|
* @return CsvReader |
71
|
|
|
*/ |
72
|
16 |
|
public function open($file = null) |
73
|
|
|
{ |
74
|
16 |
|
parent::open($file); |
75
|
14 |
|
$this->detectEncoding(); |
76
|
14 |
|
$this->transcoder = new Transcoder($this->detectedEncoding); |
77
|
|
|
|
78
|
14 |
|
return $this; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
/** |
82
|
|
|
* Detect current file encoding if ForceEncodingDetection is set to true or encoding parameter is null |
83
|
|
|
*/ |
84
|
14 |
|
protected function detectEncoding() |
85
|
|
|
{ |
86
|
14 |
|
$this->detectedEncoding = $this->dialect->getEncoding(); |
87
|
14 |
|
if ($this->isFileOpened() && ($this->dialect->getForceEncodingDetection() || empty($this->detectedEncoding))) { |
88
|
|
|
//only read the 100 first lines to detect encoding to improve performance |
89
|
1 |
|
$text = ''; |
90
|
1 |
|
$line = 0; |
91
|
1 |
|
while (!feof($this->getFileHandler()) && $line <= 100) { |
92
|
1 |
|
$text .= fgets($this->getFileHandler()); |
93
|
1 |
|
$line++; |
94
|
1 |
|
} |
95
|
|
|
|
96
|
1 |
|
if ($text !== false) { |
97
|
1 |
|
$this->detectedEncoding = $this->transcoder->detectEncoding($text, $this->dialect->getEncoding()); |
98
|
1 |
|
} |
99
|
1 |
|
} |
100
|
14 |
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* |
104
|
|
|
* @param resource|null $fileHandler |
105
|
|
|
* @return array |
106
|
|
|
* |
107
|
|
|
* @throws \InvalidArgumentException |
108
|
|
|
*/ |
109
|
16 |
|
protected function readLine($fileHandler) |
110
|
|
|
{ |
111
|
16 |
|
$row = null; |
112
|
16 |
|
if (!is_resource($fileHandler)) { |
113
|
1 |
|
throw new \InvalidArgumentException('A valid file handler resource must be passed as parameter.'); |
114
|
|
|
} |
115
|
|
|
|
116
|
15 |
|
if (!feof($fileHandler)) { |
117
|
15 |
|
$enclosure = $this->dialect->getEnclosure(); |
118
|
15 |
|
$escape = $this->dialect->getEscape(); |
119
|
15 |
|
$line = fgetcsv($fileHandler, null, $this->dialect->getDelimiter(), $enclosure, $escape); |
120
|
|
|
|
121
|
15 |
|
if ($line !== false) { |
122
|
15 |
|
$trim = $this->dialect->getTrim(); |
123
|
15 |
|
$translit = $this->dialect->getTranslit(); |
124
|
15 |
|
$detectedEncoding = $this->detectedEncoding; |
125
|
15 |
|
$transcoder = $this->transcoder; |
126
|
|
|
|
127
|
15 |
|
if ($this->position <= 0) { |
128
|
15 |
|
$line[0] = $this->removeBom($line[0]); |
129
|
15 |
|
} |
130
|
|
|
|
131
|
|
|
$row = array_map(function ($var) use ($enclosure, $escape, $trim, $translit, $transcoder, $detectedEncoding) { |
132
|
|
|
// workaround when escape char is not equals to double quote |
133
|
15 |
|
if ($enclosure === '"' && $escape !== $enclosure) { |
134
|
15 |
|
$var = str_replace($escape.$enclosure, $enclosure, $var); |
135
|
15 |
|
} |
136
|
|
|
|
137
|
15 |
|
$var = $transcoder->transcode($var, $detectedEncoding, 'UTF-8', $translit); |
138
|
|
|
|
139
|
15 |
|
return $trim ? trim($var) : $var; |
140
|
15 |
|
}, $line); |
141
|
|
|
|
142
|
|
|
$notEmptyCount = count(array_filter($row, function ($var) { |
143
|
15 |
|
return $var !== false && $var !== null && $var !== ''; |
144
|
15 |
|
})); |
145
|
|
|
|
146
|
15 |
|
if ($this->dialect->getSkipEmptyLines() && 0 === $notEmptyCount) { |
147
|
4 |
|
$row = false; |
148
|
4 |
|
} |
149
|
15 |
|
} |
150
|
15 |
|
} |
151
|
|
|
|
152
|
15 |
|
if ($this->dialect->getFirstRowHeader() && !empty($this->headers) && !empty($row)) { |
153
|
1 |
|
$row = array_combine($this->headers, $row); |
154
|
1 |
|
} |
155
|
|
|
|
156
|
15 |
|
return $row; |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* return the current row and go to the next row |
161
|
|
|
* |
162
|
|
|
* @return array|false |
163
|
|
|
*/ |
164
|
12 |
|
public function getRow() |
165
|
|
|
{ |
166
|
12 |
|
if ($this->valid()) { |
167
|
12 |
|
$current = $this->current(); |
168
|
12 |
|
$this->next(); |
169
|
|
|
|
170
|
12 |
|
return $current; |
171
|
|
|
} else { |
172
|
12 |
|
return false; |
173
|
|
|
} |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
/** |
177
|
|
|
* get All rows as an array |
178
|
|
|
* |
179
|
|
|
* N.B.: Be careful, this method can consume a lot of memories on large CSV files. |
180
|
|
|
* |
181
|
|
|
* You should prefer iterate over the reader instead. |
182
|
|
|
* |
183
|
|
|
* @return array all rows in the CSV files |
184
|
|
|
*/ |
185
|
12 |
|
public function getRows() |
186
|
|
|
{ |
187
|
12 |
|
$rows = []; |
188
|
12 |
|
$this->rewind(); |
189
|
|
|
|
190
|
12 |
|
while ($this->valid()) { |
191
|
12 |
|
$rows[] = $this->current(); |
192
|
12 |
|
$this->next(); |
193
|
12 |
|
} |
194
|
|
|
|
195
|
12 |
|
return $rows; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* reset CSV reading to 1st line |
200
|
|
|
* |
201
|
|
|
* aliases for iterator rewind |
202
|
|
|
*/ |
203
|
12 |
|
public function reset() |
204
|
|
|
{ |
205
|
12 |
|
$this->rewind(); |
206
|
12 |
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* |
210
|
|
|
* @return array |
211
|
|
|
*/ |
212
|
15 |
|
public function current() |
213
|
|
|
{ |
214
|
15 |
|
return $this->currentValues; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* |
219
|
|
|
* @return int |
220
|
|
|
*/ |
221
|
13 |
|
public function key() |
222
|
|
|
{ |
223
|
13 |
|
return $this->position; |
224
|
|
|
} |
225
|
|
|
|
226
|
16 |
|
public function next() |
227
|
|
|
{ |
228
|
16 |
|
$this->currentValues = $this->readLine($this->getFileHandler()); |
|
|
|
|
229
|
15 |
|
$this->position++; |
230
|
|
|
|
231
|
15 |
|
if ($this->dialect->getSkipEmptyLines() && $this->currentValues === false) { |
232
|
4 |
|
$this->next(); |
233
|
4 |
|
} |
234
|
|
|
|
235
|
15 |
|
return $this->currentValues; |
236
|
|
|
} |
237
|
|
|
|
238
|
16 |
|
public function rewind() |
239
|
|
|
{ |
240
|
16 |
|
$this->openFile($this->fileHandlerMode); |
241
|
15 |
|
if ($this->isFileOpened()) { |
242
|
15 |
|
rewind($this->getFileHandler()); |
243
|
|
|
|
244
|
15 |
|
$this->position = -1; |
245
|
|
|
|
246
|
15 |
|
if ($this->dialect->getFirstRowHeader()) { |
247
|
1 |
|
$this->position++; |
248
|
1 |
|
$this->headers = []; |
249
|
1 |
|
$this->currentValues = null; |
|
|
|
|
250
|
1 |
|
$this->headers = array_map('trim', $this->readLine($this->getFileHandler())); |
251
|
1 |
|
} |
252
|
|
|
|
253
|
15 |
|
$this->next(); |
254
|
15 |
|
} |
255
|
15 |
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* |
259
|
|
|
* @return bool |
260
|
|
|
*/ |
261
|
15 |
|
public function valid() |
262
|
|
|
{ |
263
|
15 |
|
return $this->currentValues !== null; |
264
|
|
|
} |
265
|
|
|
|
266
|
9 |
|
public function count() |
267
|
|
|
{ |
268
|
9 |
|
$count = 0; |
269
|
9 |
|
$this->openFile($this->fileHandlerMode); |
270
|
9 |
|
if ($this->isFileOpened()) { |
271
|
9 |
|
rewind($this->getFileHandler()); |
272
|
|
|
|
273
|
9 |
|
$enclosure = $this->dialect->getEnclosure(); |
274
|
9 |
|
$escape = $this->dialect->getEscape(); |
275
|
9 |
|
$delimiter = $this->dialect->getDelimiter(); |
276
|
|
|
|
277
|
9 |
|
if ($this->dialect->getSkipEmptyLines()) { |
278
|
4 |
|
while (!feof($this->getFileHandler())) { |
279
|
4 |
|
$line = fgetcsv($this->getFileHandler(), null, $delimiter, $enclosure, $escape); |
280
|
4 |
|
if (!empty($line)) { |
281
|
4 |
|
$notEmptyCount = count(array_filter($line, function ($var) { |
282
|
|
|
// empty row pattern without alphanumeric |
283
|
4 |
|
return $var !== false && $var !== null && $var !== '' && preg_match('([[:alnum:]]+)', $var); |
284
|
4 |
|
})); |
285
|
4 |
|
if (0 !== $notEmptyCount) { |
286
|
4 |
|
$count++; |
287
|
4 |
|
} |
288
|
4 |
|
} |
289
|
4 |
|
} |
290
|
4 |
|
} else { |
291
|
5 |
|
while (!feof($this->getFileHandler())) { |
292
|
5 |
|
$line = fgetcsv($this->getFileHandler(), null, $delimiter, $enclosure, $escape); |
293
|
5 |
|
if (!empty($line)) { |
294
|
5 |
|
$count++; |
295
|
5 |
|
} |
296
|
5 |
|
} |
297
|
|
|
} |
298
|
9 |
|
} |
299
|
|
|
|
300
|
9 |
|
if ($this->dialect->getFirstRowHeader() && $count > 0) { |
301
|
1 |
|
--$count; |
302
|
1 |
|
} |
303
|
|
|
|
304
|
9 |
|
return $count; |
305
|
|
|
} |
306
|
|
|
} |
307
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.