1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* League.Csv (https://csv.thephpleague.com) |
5
|
|
|
* |
6
|
|
|
* (c) Ignace Nyamagana Butera <[email protected]> |
7
|
|
|
* |
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
9
|
|
|
* file that was distributed with this source code. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
declare(strict_types=1); |
13
|
|
|
|
14
|
|
|
namespace League\Csv; |
15
|
|
|
|
16
|
|
|
use BadMethodCallException; |
17
|
|
|
use CallbackFilterIterator; |
18
|
|
|
use Countable; |
19
|
|
|
use Generator; |
20
|
|
|
use Iterator; |
21
|
|
|
use IteratorAggregate; |
22
|
|
|
use JsonSerializable; |
23
|
|
|
use League\Csv\Polyfill\EmptyEscapeParser; |
24
|
|
|
use SplFileObject; |
25
|
|
|
use TypeError; |
26
|
|
|
use function array_combine; |
27
|
|
|
use function array_filter; |
28
|
|
|
use function array_pad; |
29
|
|
|
use function array_slice; |
30
|
|
|
use function array_unique; |
31
|
|
|
use function count; |
32
|
|
|
use function gettype; |
33
|
|
|
use function is_array; |
34
|
|
|
use function iterator_count; |
35
|
|
|
use function iterator_to_array; |
36
|
|
|
use function mb_strlen; |
37
|
|
|
use function mb_substr; |
38
|
|
|
use function sprintf; |
39
|
|
|
use function strlen; |
40
|
|
|
use function substr; |
41
|
|
|
use const PHP_VERSION_ID; |
42
|
|
|
use const STREAM_FILTER_READ; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* A class to parse and read records from a CSV document. |
46
|
|
|
* |
47
|
|
|
* @method array fetchOne(int $nth_record = 0) Returns a single record from the CSV |
48
|
|
|
* @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field |
49
|
|
|
* @method Generator fetchPairs(string|int $offset_index = 0, string|int $value_index = 1) Fetches the next key-value pairs from the CSV document |
50
|
|
|
*/ |
51
|
|
|
class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSerializable |
52
|
|
|
{ |
53
|
|
|
/** |
54
|
|
|
* header offset. |
55
|
|
|
* |
56
|
|
|
* @var int|null |
57
|
|
|
*/ |
58
|
|
|
protected $header_offset; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* header record. |
62
|
|
|
* |
63
|
|
|
* @var string[] |
64
|
|
|
*/ |
65
|
|
|
protected $header = []; |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* records count. |
69
|
|
|
* |
70
|
|
|
* @var int |
71
|
|
|
*/ |
72
|
|
|
protected $nb_records = -1; |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* {@inheritdoc} |
76
|
|
|
*/ |
77
|
|
|
protected $stream_filter_mode = STREAM_FILTER_READ; |
78
|
|
|
|
79
|
|
|
/** |
80
|
|
|
* @var bool |
81
|
|
|
*/ |
82
|
|
|
protected $is_empty_records_included = false; |
83
|
|
|
|
84
|
|
|
/** |
85
|
|
|
* {@inheritdoc} |
86
|
|
|
*/ |
87
|
3 |
|
public static function createFromPath(string $path, string $open_mode = 'r', $context = null) |
88
|
|
|
{ |
89
|
3 |
|
# Endswith zip extension for compressed file |
90
|
|
|
if(substr_compare(strtolower($path), ".zip", -strlen(".zip")) === 0) |
91
|
|
|
{ |
92
|
|
|
$tempFile=tempnam(sys_get_temp_dir(),''); |
93
|
|
|
if (file_exists($tempFile)) { |
94
|
|
|
unlink($tempFile); |
95
|
30 |
|
} |
96
|
|
|
mkdir($tempFile); |
97
|
30 |
|
if (!is_dir($tempFile)) { |
98
|
30 |
|
throw new AccessControl('Temproary folder creation failed. Permission error.'); |
99
|
30 |
|
} |
100
|
30 |
|
|
101
|
|
|
$zip = new \ZipArchive(); |
102
|
|
|
if ($zip->open($path) === true) { |
103
|
|
|
for($i = 0; $i < $zip->numFiles; $i++) { |
|
|
|
|
104
|
|
|
$zip->extractTo($tempFile, array($zip->getNameIndex($i))); |
105
|
|
|
$path = $tempFile.'/'.$zip->getNameIndex($i); |
106
|
|
|
break; |
107
|
|
|
} |
108
|
|
|
} |
109
|
21 |
|
$zip->close(); |
110
|
|
|
} |
111
|
21 |
|
return parent::createFromPath($path, $open_mode, $context); |
112
|
|
|
} |
113
|
|
|
|
114
|
|
|
/** |
115
|
|
|
* {@inheritdoc} |
116
|
|
|
*/ |
117
|
|
|
protected function resetProperties() |
118
|
|
|
{ |
119
|
|
|
parent::resetProperties(); |
120
|
|
|
$this->nb_records = -1; |
121
|
24 |
|
$this->header = []; |
122
|
|
|
} |
123
|
24 |
|
|
124
|
18 |
|
/** |
125
|
|
|
* Returns the header offset. |
126
|
|
|
* |
127
|
9 |
|
* If no CSV header offset is set this method MUST return null |
128
|
3 |
|
* |
129
|
|
|
* @return int|null |
130
|
|
|
*/ |
131
|
9 |
|
public function getHeaderOffset() |
132
|
|
|
{ |
133
|
6 |
|
return $this->header_offset; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Returns the CSV record used as header. |
138
|
|
|
* |
139
|
|
|
* The returned header is represented as an array of string values |
140
|
|
|
* |
141
|
|
|
* @return string[] |
142
|
|
|
*/ |
143
|
12 |
|
public function getHeader(): array |
144
|
|
|
{ |
145
|
12 |
|
if (null === $this->header_offset) { |
146
|
12 |
|
return $this->header; |
147
|
6 |
|
} |
148
|
|
|
|
149
|
|
|
if ([] !== $this->header) { |
150
|
6 |
|
return $this->header; |
151
|
3 |
|
} |
152
|
|
|
|
153
|
|
|
$this->header = $this->setHeader($this->header_offset); |
154
|
3 |
|
|
155
|
|
|
return $this->header; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Determine the CSV record header. |
160
|
|
|
* |
161
|
|
|
* @throws Exception If the header offset is set and no record is found or is the empty array |
162
|
12 |
|
* |
163
|
|
|
* @return string[] |
164
|
12 |
|
*/ |
165
|
12 |
|
protected function setHeader(int $offset): array |
166
|
8 |
|
{ |
167
|
|
|
$header = $this->seekRow($offset); |
168
|
|
|
if (false === $header || [] === $header || [null] === $header) { |
169
|
|
|
throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); |
170
|
6 |
|
} |
171
|
|
|
|
172
|
|
|
if (0 === $offset) { |
173
|
|
|
return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure); |
174
|
|
|
} |
175
|
|
|
|
176
|
21 |
|
return $header; |
177
|
|
|
} |
178
|
21 |
|
|
179
|
6 |
|
/** |
180
|
|
|
* Returns the row at a given offset. |
181
|
6 |
|
* |
182
|
|
|
* @return array|false |
183
|
|
|
*/ |
184
|
15 |
|
protected function seekRow(int $offset) |
185
|
15 |
|
{ |
186
|
15 |
|
foreach ($this->getDocument() as $index => $record) { |
187
|
|
|
if ($offset === $index) { |
188
|
15 |
|
return $record; |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
return false; |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* Returns the document as an Iterator. |
197
|
|
|
*/ |
198
|
12 |
|
protected function getDocument(): Iterator |
199
|
|
|
{ |
200
|
12 |
|
if (70400 > PHP_VERSION_ID && '' === $this->escape) { |
201
|
3 |
|
$this->document->setCsvControl($this->delimiter, $this->enclosure); |
202
|
|
|
|
203
|
|
|
return EmptyEscapeParser::parse($this->document); |
204
|
9 |
|
} |
205
|
9 |
|
|
206
|
6 |
|
$this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD); |
207
|
|
|
$this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape); |
208
|
|
|
$this->document->rewind(); |
209
|
3 |
|
|
210
|
|
|
return $this->document; |
211
|
3 |
|
} |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* Strip the BOM sequence from a record. |
215
|
|
|
* |
216
|
|
|
* @param string[] $record |
217
|
9 |
|
* |
218
|
|
|
* @return string[] |
219
|
9 |
|
*/ |
220
|
9 |
|
protected function removeBOM(array $record, int $bom_length, string $enclosure): array |
221
|
3 |
|
{ |
222
|
|
|
if (0 === $bom_length) { |
223
|
|
|
return $record; |
224
|
6 |
|
} |
225
|
|
|
|
226
|
|
|
$record[0] = mb_substr($record[0], $bom_length); |
227
|
|
|
if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) { |
228
|
|
|
return $record; |
229
|
|
|
} |
230
|
3 |
|
|
231
|
|
|
$record[0] = substr($record[0], 1, -1); |
232
|
3 |
|
|
233
|
3 |
|
return $record; |
234
|
|
|
} |
235
|
|
|
|
236
|
3 |
|
/** |
237
|
|
|
* {@inheritdoc} |
238
|
|
|
*/ |
239
|
|
|
public function __call($method, array $arguments) |
240
|
|
|
{ |
241
|
|
|
static $whitelisted = ['fetchColumn' => 1, 'fetchOne' => 1, 'fetchPairs' => 1]; |
242
|
6 |
|
if (isset($whitelisted[$method])) { |
243
|
|
|
return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments); |
244
|
6 |
|
} |
245
|
|
|
|
246
|
|
|
throw new BadMethodCallException(sprintf('%s::%s() method does not exist', static::class, $method)); |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
/** |
250
|
3 |
|
* {@inheritdoc} |
251
|
|
|
*/ |
252
|
3 |
|
public function count(): int |
253
|
|
|
{ |
254
|
|
|
if (-1 === $this->nb_records) { |
255
|
|
|
$this->nb_records = iterator_count($this->getRecords()); |
256
|
|
|
} |
257
|
|
|
|
258
|
|
|
return $this->nb_records; |
259
|
|
|
} |
260
|
|
|
|
261
|
|
|
/** |
262
|
|
|
* {@inheritdoc} |
263
|
|
|
*/ |
264
|
|
|
public function getIterator(): Iterator |
265
|
|
|
{ |
266
|
|
|
return $this->getRecords(); |
267
|
|
|
} |
268
|
|
|
|
269
|
36 |
|
/** |
270
|
|
|
* {@inheritdoc} |
271
|
36 |
|
*/ |
272
|
33 |
|
public function jsonSerialize(): array |
273
|
33 |
|
{ |
274
|
33 |
|
return iterator_to_array($this->getRecords(), false); |
275
|
|
|
} |
276
|
33 |
|
|
277
|
33 |
|
/** |
278
|
30 |
|
* Returns the CSV file path after uncompressed the file. |
279
|
|
|
* |
280
|
|
|
* Supported compressions are zip, gz |
281
|
33 |
|
* |
282
|
33 |
|
* @param String - File path of the input CSV compressed file |
283
|
33 |
|
*/ |
284
|
18 |
|
public function decompressCSV(String $path): String |
285
|
18 |
|
{ |
286
|
18 |
|
$temp_file=tempnam(sys_get_temp_dir(),''); |
287
|
|
|
if (file_exists($temp_file)) { |
288
|
|
|
unlink($temp_file); |
289
|
33 |
|
} |
290
|
12 |
|
|
291
|
12 |
|
mkdir($tempfile); |
|
|
|
|
292
|
12 |
|
if (!is_dir($tempfile)) { |
|
|
|
|
293
|
|
|
throw new AccessControl('Temproary folder creation failed. Permission error.'); |
294
|
|
|
} |
295
|
12 |
|
|
296
|
12 |
|
$zip = new ZipArchive; |
297
|
|
|
for($i = 0; $i < $zip->numFiles; $i++) { |
298
|
12 |
|
$zip->extractTo($tempfile, array($zip->getNameIndex($i))); |
|
|
|
|
299
|
|
|
echo $zip->getNameIndex($i); |
300
|
|
|
} |
301
|
33 |
|
$zip->close(); |
302
|
|
|
} |
303
|
|
|
|
304
|
|
|
/** |
305
|
|
|
* Returns the CSV records as an iterator object. |
306
|
|
|
* |
307
|
|
|
* Each CSV record is represented as a simple array containing strings or null values. |
308
|
|
|
* |
309
|
|
|
* If the CSV document has a header record then each record is combined |
310
|
|
|
* to the header record and the header record is removed from the iterator. |
311
|
|
|
* |
312
|
|
|
* If the CSV document is inconsistent. Missing record fields are |
313
|
30 |
|
* filled with null values while extra record fields are strip from |
314
|
|
|
* the returned object. |
315
|
30 |
|
* |
316
|
27 |
|
* @param string[] $header an optional header to use instead of the CSV document header |
317
|
|
|
*/ |
318
|
|
|
public function getRecords(array $header = []): Iterator |
319
|
30 |
|
{ |
320
|
27 |
|
$header = $this->computeHeader($header); |
321
|
|
|
$normalized = function ($record): bool { |
322
|
|
|
return is_array($record) && ($this->is_empty_records_included || $record != [null]); |
323
|
3 |
|
}; |
324
|
|
|
|
325
|
|
|
$bom = ''; |
326
|
|
|
if (!$this->is_input_bom_included) { |
327
|
|
|
$bom = $this->getInputBOM(); |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
$document = $this->getDocument(); |
331
|
36 |
|
$records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom); |
332
|
|
|
if (null !== $this->header_offset) { |
333
|
36 |
|
$records = new CallbackFilterIterator($records, function (array $record, int $offset): bool { |
334
|
27 |
|
return $offset !== $this->header_offset; |
335
|
|
|
}); |
336
|
|
|
} |
337
|
12 |
|
|
338
|
12 |
|
if ($this->is_empty_records_included) { |
339
|
12 |
|
$normalized_empty_records = static function (array $record): array { |
340
|
6 |
|
if ([null] === $record) { |
341
|
|
|
return []; |
342
|
|
|
} |
343
|
12 |
|
|
344
|
12 |
|
return $record; |
345
|
|
|
}; |
346
|
12 |
|
|
347
|
|
|
return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header); |
348
|
|
|
} |
349
|
|
|
|
350
|
|
|
return $this->combineHeader($records, $header); |
351
|
|
|
} |
352
|
30 |
|
|
353
|
|
|
/** |
354
|
30 |
|
* Returns the header to be used for iteration. |
355
|
21 |
|
* |
356
|
|
|
* @param string[] $header |
357
|
|
|
* |
358
|
9 |
|
* @throws Exception If the header contains non unique column name |
359
|
9 |
|
* |
360
|
9 |
|
* @return string[] |
361
|
3 |
|
*/ |
362
|
|
|
protected function computeHeader(array $header) |
363
|
|
|
{ |
364
|
9 |
|
if ([] === $header) { |
365
|
9 |
|
$header = $this->getHeader(); |
366
|
|
|
} |
367
|
9 |
|
|
368
|
|
|
if ($header === array_unique(array_filter($header, 'is_string'))) { |
369
|
|
|
return $header; |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
throw new SyntaxError('The header record must be empty or a flat array with unique string values'); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
/** |
376
|
|
|
* Combine the CSV header to each record if present. |
377
|
|
|
* |
378
|
|
|
* @param string[] $header |
379
|
|
|
*/ |
380
|
|
|
protected function combineHeader(Iterator $iterator, array $header): Iterator |
381
|
|
|
{ |
382
|
30 |
|
if ([] === $header) { |
383
|
|
|
return $iterator; |
384
|
30 |
|
} |
385
|
18 |
|
|
386
|
|
|
$field_count = count($header); |
387
|
|
|
$mapper = static function (array $record) use ($header, $field_count): array { |
388
|
12 |
|
if (count($record) != $field_count) { |
389
|
3 |
|
$record = array_slice(array_pad($record, $field_count, null), 0, $field_count); |
390
|
|
|
} |
391
|
|
|
|
392
|
9 |
|
return array_combine($header, $record); |
393
|
3 |
|
}; |
394
|
|
|
|
395
|
|
|
return new MapIterator($iterator, $mapper); |
396
|
6 |
|
} |
397
|
6 |
|
|
398
|
|
|
/** |
399
|
6 |
|
* Strip the BOM sequence from the returned records if necessary. |
400
|
|
|
*/ |
401
|
|
|
protected function stripBOM(Iterator $iterator, string $bom): Iterator |
402
|
|
|
{ |
403
|
|
|
if ('' === $bom) { |
404
|
|
|
return $iterator; |
405
|
12 |
|
} |
406
|
|
|
|
407
|
12 |
|
$bom_length = mb_strlen($bom); |
408
|
12 |
|
$mapper = function (array $record, int $index) use ($bom_length): array { |
409
|
12 |
|
if (0 !== $index) { |
410
|
|
|
return $record; |
411
|
|
|
} |
412
|
12 |
|
|
413
|
|
|
return $this->removeBOM($record, $bom_length, $this->enclosure); |
414
|
|
|
}; |
415
|
|
|
|
416
|
|
|
return new MapIterator($iterator, $mapper); |
417
|
|
|
} |
418
|
12 |
|
|
419
|
|
|
/** |
420
|
12 |
|
* Selects the record to be used as the CSV header. |
421
|
12 |
|
* |
422
|
12 |
|
* Because the header is represented as an array, to be valid |
423
|
|
|
* a header MUST contain only unique string value. |
424
|
|
|
* |
425
|
12 |
|
* @param int|null $offset the header record offset |
426
|
|
|
* |
427
|
|
|
* @throws Exception if the offset is a negative integer |
428
|
|
|
* |
429
|
|
|
* @return static |
430
|
|
|
*/ |
431
|
12 |
|
public function setHeaderOffset($offset): self |
432
|
|
|
{ |
433
|
12 |
|
if ($offset === $this->header_offset) { |
434
|
|
|
return $this; |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
if (!is_nullable_int($offset)) { |
438
|
|
|
throw new TypeError(sprintf(__METHOD__.'() expects 1 Argument to be null or an integer %s given', gettype($offset))); |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
if (null !== $offset && 0 > $offset) { |
442
|
|
|
throw new InvalidArgument(__METHOD__.'() expects 1 Argument to be greater or equal to 0'); |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
$this->header_offset = $offset; |
446
|
|
|
$this->resetProperties(); |
447
|
|
|
|
448
|
|
|
return $this; |
449
|
|
|
} |
450
|
|
|
|
451
|
|
|
/** |
452
|
|
|
* Enable skipping empty records. |
453
|
|
|
*/ |
454
|
|
|
public function skipEmptyRecords(): self |
455
|
|
|
{ |
456
|
|
|
if ($this->is_empty_records_included) { |
457
|
|
|
$this->is_empty_records_included = false; |
458
|
|
|
$this->nb_records = -1; |
459
|
|
|
} |
460
|
|
|
|
461
|
|
|
return $this; |
462
|
|
|
} |
463
|
|
|
|
464
|
|
|
/** |
465
|
|
|
* Disable skipping empty records. |
466
|
|
|
*/ |
467
|
|
|
public function includeEmptyRecords(): self |
468
|
|
|
{ |
469
|
|
|
if (!$this->is_empty_records_included) { |
470
|
|
|
$this->is_empty_records_included = true; |
471
|
|
|
$this->nb_records = -1; |
472
|
|
|
} |
473
|
|
|
|
474
|
|
|
return $this; |
475
|
|
|
} |
476
|
|
|
|
477
|
|
|
/** |
478
|
|
|
* Tells whether empty records are skipped by the instance. |
479
|
|
|
*/ |
480
|
|
|
public function isEmptyRecordsIncluded(): bool |
481
|
|
|
{ |
482
|
|
|
return $this->is_empty_records_included; |
483
|
|
|
} |
484
|
|
|
} |
485
|
|
|
|
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.