|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* This file is part of the League.csv library |
|
4
|
|
|
* |
|
5
|
|
|
* @license http://opensource.org/licenses/MIT |
|
6
|
|
|
* @link https://github.com/thephpleague/csv/ |
|
7
|
|
|
* @version 9.0.0 |
|
8
|
|
|
* @package League.csv |
|
9
|
|
|
* |
|
10
|
|
|
* For the full copyright and license information, please view the LICENSE |
|
11
|
|
|
* file that was distributed with this source code. |
|
12
|
|
|
*/ |
|
13
|
|
|
declare(strict_types=1); |
|
14
|
|
|
|
|
15
|
|
|
namespace League\Csv; |
|
16
|
|
|
|
|
17
|
|
|
use BadMethodCallException; |
|
18
|
|
|
use CallbackFilterIterator; |
|
19
|
|
|
use Countable; |
|
20
|
|
|
use Iterator; |
|
21
|
|
|
use IteratorAggregate; |
|
22
|
|
|
use JsonSerializable; |
|
23
|
|
|
use SplFileObject; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* A class to manage records selection from a CSV document |
|
27
|
|
|
* |
|
28
|
|
|
* @package League.csv |
|
29
|
|
|
* @since 3.0.0 |
|
30
|
|
|
* |
|
31
|
|
|
* @method array fetchOne(int $nth_record = 0) Returns a single record from the CSV |
|
32
|
|
|
* @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field |
|
33
|
|
|
* @method Generator fetchPairs(string|int $offset_index = 0, string|int $value_index = 1) Fetches the next key-value pairs from the CSV document |
|
34
|
|
|
*/ |
|
35
|
|
|
class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSerializable |
|
36
|
|
|
{ |
|
37
|
|
|
/** |
|
38
|
|
|
* CSV Document header offset |
|
39
|
|
|
* |
|
40
|
|
|
* @var int|null |
|
41
|
|
|
*/ |
|
42
|
|
|
protected $header_offset; |
|
43
|
|
|
|
|
44
|
|
|
/** |
|
45
|
|
|
* CSV Document Header record |
|
46
|
|
|
* |
|
47
|
|
|
* @var string[] |
|
48
|
|
|
*/ |
|
49
|
|
|
protected $header = []; |
|
50
|
|
|
|
|
51
|
|
|
/** |
|
52
|
|
|
* Records count |
|
53
|
|
|
* |
|
54
|
|
|
* @var int |
|
55
|
|
|
*/ |
|
56
|
|
|
protected $nb_records = -1; |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* @inheritdoc |
|
60
|
|
|
*/ |
|
61
|
|
|
protected $stream_filter_mode = STREAM_FILTER_READ; |
|
62
|
|
|
|
|
63
|
|
|
/** |
|
64
|
|
|
* Return a new instance from a file path |
|
65
|
|
|
* |
|
66
|
|
|
* @param string $url file url |
|
67
|
|
|
* @param string $open_mode the file open mode flag |
|
68
|
|
|
* @param resource|null $context the resource context |
|
69
|
|
|
* |
|
70
|
|
|
* @return static |
|
71
|
|
|
*/ |
|
72
|
|
|
public static function createFromUrl(string $url, string $open_mode = 'r+', $context = null): self |
|
73
|
|
|
{ |
|
74
|
|
|
$stream = Stream::createFromUrl($url, $open_mode, $context); |
|
|
|
|
|
|
75
|
|
|
return new static(new Stream($stream)); |
|
|
|
|
|
|
76
|
|
|
} |
|
77
|
|
|
|
|
78
|
|
|
/** |
|
79
|
|
|
* Returns the record offset used as header |
|
80
|
|
|
* |
|
81
|
|
|
* If no CSV record is used this method MUST return null |
|
82
|
|
|
* |
|
83
|
|
|
* @return int|null |
|
84
|
|
|
*/ |
|
85
|
10 |
|
public function getHeaderOffset() |
|
86
|
|
|
{ |
|
87
|
10 |
|
return $this->header_offset; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
/** |
|
91
|
|
|
* Returns the CSV record header |
|
92
|
|
|
* |
|
93
|
|
|
* The returned header is represented as an array of string values |
|
94
|
|
|
* |
|
95
|
|
|
* @return string[] |
|
96
|
|
|
*/ |
|
97
|
10 |
|
public function getHeader(): array |
|
98
|
|
|
{ |
|
99
|
10 |
|
if (null === $this->header_offset) { |
|
100
|
8 |
|
return $this->header; |
|
101
|
|
|
} |
|
102
|
|
|
|
|
103
|
4 |
|
if (!empty($this->header)) { |
|
104
|
2 |
|
return $this->header; |
|
105
|
|
|
} |
|
106
|
|
|
|
|
107
|
4 |
|
$this->header = $this->setHeader($this->header_offset); |
|
108
|
|
|
|
|
109
|
4 |
|
return $this->header; |
|
110
|
|
|
} |
|
111
|
|
|
|
|
112
|
|
|
/** |
|
113
|
|
|
* Determine the CSV record header |
|
114
|
|
|
* |
|
115
|
|
|
* @param int $offset |
|
116
|
|
|
* |
|
117
|
|
|
* @throws Exception If the header offset is an integer and the corresponding record is missing |
|
118
|
|
|
* or is an empty array |
|
119
|
|
|
* |
|
120
|
|
|
* @return string[] |
|
121
|
|
|
*/ |
|
122
|
6 |
|
protected function setHeader(int $offset): array |
|
123
|
|
|
{ |
|
124
|
6 |
|
$this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY); |
|
125
|
6 |
|
$this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape); |
|
126
|
6 |
|
$this->document->seek($offset); |
|
127
|
6 |
|
if (empty($header = $this->document->current())) { |
|
128
|
2 |
|
throw new Exception(sprintf('The header record does not exist or is empty at offset: `%s`', $offset)); |
|
129
|
|
|
} |
|
130
|
|
|
|
|
131
|
4 |
|
if (0 === $offset) { |
|
132
|
2 |
|
return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure); |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
2 |
|
return $header; |
|
136
|
|
|
} |
|
137
|
|
|
|
|
138
|
|
|
/** |
|
139
|
|
|
* Strip the BOM sequence from a record |
|
140
|
|
|
* |
|
141
|
|
|
* @param string[] $record |
|
142
|
|
|
* @param int $bom_length |
|
143
|
|
|
* @param string $enclosure |
|
144
|
|
|
* |
|
145
|
|
|
* @return string[] |
|
146
|
|
|
*/ |
|
147
|
8 |
|
protected function removeBOM(array $record, int $bom_length, string $enclosure): array |
|
148
|
|
|
{ |
|
149
|
8 |
|
if (0 == $bom_length) { |
|
150
|
2 |
|
return $record; |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
6 |
|
$record[0] = mb_substr($record[0], $bom_length); |
|
154
|
6 |
|
if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) { |
|
155
|
4 |
|
return $record; |
|
156
|
|
|
} |
|
157
|
|
|
|
|
158
|
2 |
|
$record[0] = substr($record[0], 1, -1); |
|
159
|
|
|
|
|
160
|
2 |
|
return $record; |
|
161
|
|
|
} |
|
162
|
|
|
|
|
163
|
|
|
/** |
|
164
|
|
|
* @inheritdoc |
|
165
|
|
|
*/ |
|
166
|
6 |
|
public function __call($method, array $arguments) |
|
167
|
|
|
{ |
|
168
|
6 |
|
$whitelisted = ['fetchColumn' => 1, 'fetchOne' => 1, 'fetchPairs' => 1]; |
|
169
|
6 |
|
if (isset($whitelisted[$method])) { |
|
170
|
2 |
|
return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments); |
|
171
|
|
|
} |
|
172
|
|
|
|
|
173
|
4 |
|
throw new BadMethodCallException(sprintf('%s::%s() method does not exist', __CLASS__, $method)); |
|
174
|
|
|
} |
|
175
|
|
|
|
|
176
|
|
|
/** |
|
177
|
|
|
* @inheritdoc |
|
178
|
|
|
*/ |
|
179
|
2 |
|
public function count(): int |
|
180
|
|
|
{ |
|
181
|
2 |
|
if (-1 === $this->nb_records) { |
|
182
|
2 |
|
$this->nb_records = iterator_count($this->getRecords()); |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
2 |
|
return $this->nb_records; |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
/** |
|
189
|
|
|
* @inheritdoc |
|
190
|
|
|
*/ |
|
191
|
2 |
|
public function getIterator(): Iterator |
|
192
|
|
|
{ |
|
193
|
2 |
|
return $this->getRecords(); |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
|
|
/** |
|
197
|
|
|
* @inheritdoc |
|
198
|
|
|
*/ |
|
199
|
2 |
|
public function jsonSerialize(): array |
|
200
|
|
|
{ |
|
201
|
2 |
|
return iterator_to_array($this->getRecords(), false); |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
/** |
|
205
|
|
|
* Returns the CSV records in an iterator object. |
|
206
|
|
|
* |
|
207
|
|
|
* Each CSV record is represented as a simple array of string or null values. |
|
208
|
|
|
* |
|
209
|
|
|
* If the CSV document has a header record then each record is combined |
|
210
|
|
|
* to each header record and the header record is removed from the iterator. |
|
211
|
|
|
* |
|
212
|
|
|
* If the CSV document is inconsistent. Missing record fields are |
|
213
|
|
|
* filled with null values while extra record fields are strip from |
|
214
|
|
|
* the returned object. |
|
215
|
|
|
* |
|
216
|
|
|
* @param string[] $header an optional header to use instead of the CSV document header |
|
217
|
|
|
* |
|
218
|
|
|
* @return Iterator |
|
219
|
|
|
*/ |
|
220
|
12 |
|
public function getRecords(array $header = []): Iterator |
|
221
|
|
|
{ |
|
222
|
12 |
|
$header = $this->computeHeader($header); |
|
223
|
10 |
|
$normalized = function ($record): bool { |
|
224
|
10 |
|
return is_array($record) && $record != [null]; |
|
225
|
10 |
|
}; |
|
226
|
10 |
|
$bom = $this->getInputBOM(); |
|
227
|
10 |
|
$this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY); |
|
228
|
10 |
|
$this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape); |
|
229
|
|
|
|
|
230
|
10 |
|
$records = $this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom); |
|
231
|
10 |
|
if (null !== $this->header_offset) { |
|
232
|
4 |
|
$records = new CallbackFilterIterator($records, function (array $record, int $offset): bool { |
|
233
|
4 |
|
return $offset !== $this->header_offset; |
|
234
|
4 |
|
}); |
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
10 |
|
return $this->combineHeader($records, $header); |
|
238
|
|
|
} |
|
239
|
|
|
|
|
240
|
|
|
/** |
|
241
|
|
|
* Returns the header to be used for iteration |
|
242
|
|
|
* |
|
243
|
|
|
* @param string[] $header |
|
244
|
|
|
* |
|
245
|
|
|
* @throws Exception If the header contains non unique column name |
|
246
|
|
|
* |
|
247
|
|
|
* @return string[] |
|
248
|
|
|
*/ |
|
249
|
16 |
|
protected function computeHeader(array $header) |
|
250
|
|
|
{ |
|
251
|
16 |
|
if (empty($header)) { |
|
252
|
14 |
|
$header = $this->getHeader(); |
|
253
|
|
|
} |
|
254
|
|
|
|
|
255
|
16 |
|
if ($header === array_unique(array_filter($header, 'is_string'))) { |
|
256
|
14 |
|
return $header; |
|
257
|
|
|
} |
|
258
|
|
|
|
|
259
|
2 |
|
throw new Exception('The header record must be empty or a flat array with unique string values'); |
|
260
|
|
|
} |
|
261
|
|
|
|
|
262
|
|
|
/** |
|
263
|
|
|
* Add the CSV header if present and valid |
|
264
|
|
|
* |
|
265
|
|
|
* @param Iterator $iterator |
|
266
|
|
|
* @param string[] $header |
|
267
|
|
|
* |
|
268
|
|
|
* @return Iterator |
|
269
|
|
|
*/ |
|
270
|
20 |
|
protected function combineHeader(Iterator $iterator, array $header): Iterator |
|
271
|
|
|
{ |
|
272
|
20 |
|
if (empty($header)) { |
|
273
|
14 |
|
return $iterator; |
|
274
|
|
|
} |
|
275
|
|
|
|
|
276
|
8 |
|
$field_count = count($header); |
|
277
|
8 |
|
$mapper = function (array $record) use ($header, $field_count): array { |
|
278
|
8 |
|
if (count($record) != $field_count) { |
|
279
|
4 |
|
$record = array_slice(array_pad($record, $field_count, null), 0, $field_count); |
|
280
|
|
|
} |
|
281
|
|
|
|
|
282
|
8 |
|
return array_combine($header, $record); |
|
283
|
8 |
|
}; |
|
284
|
|
|
|
|
285
|
8 |
|
return new MapIterator($iterator, $mapper); |
|
286
|
|
|
} |
|
287
|
|
|
|
|
288
|
|
|
/** |
|
289
|
|
|
* Strip the BOM sequence if present |
|
290
|
|
|
* |
|
291
|
|
|
* @param Iterator $iterator |
|
292
|
|
|
* @param string $bom |
|
293
|
|
|
* |
|
294
|
|
|
* @return Iterator |
|
295
|
|
|
*/ |
|
296
|
16 |
|
protected function stripBOM(Iterator $iterator, string $bom): Iterator |
|
297
|
|
|
{ |
|
298
|
16 |
|
if ('' === $bom) { |
|
299
|
10 |
|
return $iterator; |
|
300
|
|
|
} |
|
301
|
|
|
|
|
302
|
6 |
|
$bom_length = mb_strlen($bom); |
|
303
|
6 |
|
$mapper = function (array $record, int $index) use ($bom_length): array { |
|
304
|
6 |
|
if (0 != $index) { |
|
305
|
2 |
|
return $record; |
|
306
|
|
|
} |
|
307
|
|
|
|
|
308
|
6 |
|
return $this->removeBOM($record, $bom_length, $this->enclosure); |
|
309
|
6 |
|
}; |
|
310
|
|
|
|
|
311
|
6 |
|
return new MapIterator($iterator, $mapper); |
|
312
|
|
|
} |
|
313
|
|
|
|
|
314
|
|
|
/** |
|
315
|
|
|
* Selects the record to be used as the CSV header |
|
316
|
|
|
* |
|
317
|
|
|
* Because of the header is represented as an array, to be valid |
|
318
|
|
|
* a header MUST contain only unique string value. |
|
319
|
|
|
* |
|
320
|
|
|
* @param int|null $offset the header record offset |
|
321
|
|
|
* |
|
322
|
|
|
* @return static |
|
323
|
|
|
*/ |
|
324
|
10 |
|
public function setHeaderOffset($offset): self |
|
325
|
|
|
{ |
|
326
|
10 |
|
$this->filterNullableInteger($offset, 0, __METHOD__.'() expects the header offset index to be a positive integer or 0'); |
|
327
|
10 |
|
if ($offset !== $this->header_offset) { |
|
328
|
2 |
|
$this->header_offset = $offset; |
|
329
|
2 |
|
$this->resetProperties(); |
|
330
|
|
|
} |
|
331
|
|
|
|
|
332
|
10 |
|
return $this; |
|
333
|
|
|
} |
|
334
|
|
|
|
|
335
|
|
|
/** |
|
336
|
|
|
* @inheritdoc |
|
337
|
|
|
*/ |
|
338
|
8 |
|
protected function resetProperties() |
|
339
|
|
|
{ |
|
340
|
8 |
|
$this->nb_records = -1; |
|
341
|
8 |
|
$this->header = []; |
|
342
|
8 |
|
} |
|
343
|
|
|
} |
|
344
|
|
|
|
This check looks at variables that have been passed in as parameters and are passed out again to other methods.
If the outgoing method call has stricter type requirements than the method itself, an issue is raised.
An additional type check may prevent trouble.