Completed
Pull Request — master (#379)
by ignace nyamagana
01:23
created

Reader::count()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 0
dl 0
loc 8
ccs 1
cts 1
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv;
15
16
use CallbackFilterIterator;
17
use Iterator;
18
use JsonSerializable;
19
use League\Csv\Polyfill\EmptyEscapeParser;
20
use SplFileObject;
21
use function array_combine;
22
use function array_filter;
23
use function array_pad;
24
use function array_slice;
25
use function array_unique;
26
use function count;
27
use function is_array;
28
use function iterator_count;
29
use function iterator_to_array;
30
use function mb_strlen;
31
use function mb_substr;
32
use function sprintf;
33
use function strlen;
34
use function substr;
35
use const PHP_VERSION_ID;
36
use const STREAM_FILTER_READ;
37
38
/**
39
 * A class to parse and read records from a CSV document.
40
 */
41
class Reader extends AbstractCsv implements TabularDataReader, JsonSerializable
42
{
43
    /**
44
     * header offset.
45
     *
46
     * @var int|null
47
     */
48
    protected $header_offset;
49
50
    /**
51
     * header record.
52
     *
53
     * @var string[]
54
     */
55
    protected $header = [];
56
57
    /**
58
     * records count.
59
     *
60
     * @var int
61
     */
62
    protected $nb_records = -1;
63
64
    /**
65
     * {@inheritdoc}
66
     */
67
    protected $stream_filter_mode = STREAM_FILTER_READ;
68
69
    /**
70
     * @var bool
71
     */
72
    protected $is_empty_records_included = false;
73
74
    /**
75
     * {@inheritdoc}
76
     */
77
    public static function createFromPath(string $path, string $open_mode = 'r', $context = null)
78
    {
79
        return parent::createFromPath($path, $open_mode, $context);
80
    }
81
82
    /**
83
     * {@inheritdoc}
84
     */
85 3
    protected function resetProperties(): void
86
    {
87 3
        parent::resetProperties();
88
        $this->nb_records = -1;
89
        $this->header = [];
90
    }
91
92
    /**
93 33
     * Returns the header offset.
94
     *
95 33
     * If no CSV header offset is set this method MUST return null
96 33
     *
97 33
     */
98 33
    public function getHeaderOffset(): ?int
99
    {
100
        return $this->header_offset;
101
    }
102
103
    /**
104
     * {@inheritDoc}
105
     */
106 21
    public function getHeader(): array
107
    {
108 21
        if (null === $this->header_offset) {
109
            return $this->header;
110
        }
111
112
        if ([] !== $this->header) {
113
            return $this->header;
114
        }
115
116
        $this->header = $this->setHeader($this->header_offset);
117
118 24
        return $this->header;
119
    }
120 24
121 18
    /**
122
     * Determine the CSV record header.
123
     *
124 9
     * @throws Exception If the header offset is set and no record is found or is the empty array
125 3
     *
126
     * @return string[]
127
     */
128 9
    protected function setHeader(int $offset): array
129
    {
130 6
        $header = $this->seekRow($offset);
131
        if (in_array($header, [[], [null]], true)) {
132
            throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
133
        }
134
135
        if (0 === $offset) {
136
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
137
        }
138
139
        return $header;
140 12
    }
141
142 12
    /**
143 12
     * Returns the row at a given offset.
144 6
     */
145
    protected function seekRow(int $offset): array
146
    {
147 6
        foreach ($this->getDocument() as $index => $record) {
148 3
            if ($offset === $index) {
149
                return $record;
150
            }
151 3
        }
152
153
        return [];
154
    }
155
156
    /**
157
     * Returns the document as an Iterator.
158 12
     */
159
    protected function getDocument(): Iterator
160 12
    {
161 12
        if (70400 > PHP_VERSION_ID && '' === $this->escape) {
162 6
            $this->document->setCsvControl($this->delimiter, $this->enclosure);
163
164
            return EmptyEscapeParser::parse($this->document);
165
        }
166 6
167
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD);
168
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
169
        $this->document->rewind();
170
171
        return $this->document;
172 21
    }
173
174 21
    /**
175 4
     * Strip the BOM sequence from a record.
176
     *
177 4
     * @param string[] $record
178
     *
179
     * @return string[]
180 17
     */
181 17
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
182 17
    {
183
        if (0 === $bom_length) {
184 17
            return $record;
185
        }
186
187
        $record[0] = mb_substr($record[0], $bom_length);
188
        if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) {
189
            return $record;
190
        }
191
192
        $record[0] = substr($record[0], 1, -1);
193
194 12
        return $record;
195
    }
196 12
197 3
    /**
198
     * {@inheritdoc}
199
     */
200 9
    public function fetchColumn($index = 0): Iterator
201 9
    {
202 6
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
203
204
        return $tabular_data->fetchColumn($index);
205 3
    }
206
207 3
    /**
208
     * {@inheritdoc}
209
     */
210
    public function fetchOne(int $nth_record = 0): array
211
    {
212
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
213 9
214
        return $tabular_data->fetchOne($nth_record);
215 9
    }
216 9
217 3
    /**
218
     * {@inheritdoc}
219
     */
220 6
    public function fetchPairs($offset_index = 0, $value_index = 1): Iterator
221
    {
222
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
223
224
        return $tabular_data->fetchPairs($offset_index, $value_index);
225
    }
226 3
227
    /**
228 3
     * {@inheritdoc}
229 3
     */
230
    public function count(): int
231
    {
232 3
        if (-1 === $this->nb_records) {
233
            $this->nb_records = iterator_count($this->getRecords());
234
        }
235
236
        return $this->nb_records;
237
    }
238 6
239
    /**
240 6
     * {@inheritdoc}
241
     */
242
    public function getIterator(): Iterator
243
    {
244
        return $this->getRecords();
245
    }
246 3
247
    /**
248 3
     * {@inheritdoc}
249
     */
250
    public function jsonSerialize(): array
251
    {
252
        return iterator_to_array($this->getRecords(), false);
253
    }
254
255
    /**
256
     * Returns the CSV records as an iterator object.
257
     *
258
     * Each CSV record is represented as a simple array containing strings or null values.
259
     *
260
     * If the CSV document has a header record then each record is combined
261
     * to the header record and the header record is removed from the iterator.
262
     *
263
     * If the CSV document is inconsistent. Missing record fields are
264
     * filled with null values while extra record fields are strip from
265 36
     * the returned object.
266
     *
267 36
     * @param string[] $header an optional header to use instead of the CSV document header
268
     */
269 33
    public function getRecords(array $header = []): Iterator
270 33
    {
271
        $header = $this->computeHeader($header);
272 33
        $normalized = function ($record): bool {
273 33
            return is_array($record) && ($this->is_empty_records_included || $record != [null]);
274 30
        };
275
276
        $bom = '';
277 33
        if (!$this->is_input_bom_included) {
278 33
            $bom = $this->getInputBOM();
279 33
        }
280
281 18
        $document = $this->getDocument();
282 18
        $records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom);
283
        if (null !== $this->header_offset) {
284
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
285 33
                return $offset !== $this->header_offset;
286
            });
287 12
        }
288 12
289
        if ($this->is_empty_records_included) {
290
            $normalized_empty_records = static function (array $record): array {
291 12
                if ([null] === $record) {
292 12
                    return [];
293
                }
294 12
295
                return $record;
296
            };
297 33
298
            return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header);
299
        }
300
301
        return $this->combineHeader($records, $header);
302
    }
303
304
    /**
305
     * Returns the header to be used for iteration.
306
     *
307
     * @param string[] $header
308
     *
309 30
     * @throws Exception If the header contains non unique column name
310
     *
311 30
     * @return string[]
312 27
     */
313
    protected function computeHeader(array $header)
314
    {
315 30
        if ([] === $header) {
316 27
            $header = $this->getHeader();
317
        }
318
319 3
        if ($header === array_unique(array_filter($header, 'is_string'))) {
320
            return $header;
321
        }
322
323
        throw new SyntaxError('The header record must be an empty or a flat array with unique string values.');
324
    }
325
326
    /**
327 36
     * Combine the CSV header to each record if present.
328
     *
329 36
     * @param string[] $header
330 27
     */
331
    protected function combineHeader(Iterator $iterator, array $header): Iterator
332
    {
333 12
        if ([] === $header) {
334
            return $iterator;
335 12
        }
336 6
337
        $field_count = count($header);
338
        $mapper = static function (array $record) use ($header, $field_count): array {
339
            if (count($record) != $field_count) {
340 12
                $record = array_slice(array_pad($record, $field_count, null), 0, $field_count);
341
            }
342 12
343 12
            /** @var array<string|null> $assocRecord */
344
            $assocRecord = array_combine($header, $record);
345 12
346
            return $assocRecord;
347
        };
348
349
        return new MapIterator($iterator, $mapper);
350
    }
351 30
352
    /**
353 30
     * Strip the BOM sequence from the returned records if necessary.
354 21
     */
355
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
356
    {
357 9
        if ('' === $bom) {
358
            return $iterator;
359 9
        }
360 3
361
        $bom_length = mb_strlen($bom);
362
        $mapper = function (array $record, int $index) use ($bom_length): array {
363 9
            if (0 !== $index) {
364 9
                return $record;
365
            }
366 9
367
            return $this->removeBOM($record, $bom_length, $this->enclosure);
368
        };
369
370
        return new MapIterator($iterator, $mapper);
371
    }
372
373
    /**
374
     * Selects the record to be used as the CSV header.
375
     *
376
     * Because the header is represented as an array, to be valid
377
     * a header MUST contain only unique string value.
378
     *
379
     * @param int|null $offset the header record offset
380
     *
381 27
     * @throws Exception if the offset is a negative integer
382
     *
383 27
     * @return static
384 18
     */
385
    public function setHeaderOffset(?int $offset): self
386
    {
387 9
        if ($offset === $this->header_offset) {
388 3
            return $this;
389
        }
390
391 6
        if (null !== $offset && 0 > $offset) {
392 6
            throw new InvalidArgument(__METHOD__.'() expects 1 Argument to be greater or equal to 0');
393
        }
394 6
395
        $this->header_offset = $offset;
396
        $this->resetProperties();
397
398
        return $this;
399
    }
400 12
401
    /**
402 12
     * Enable skipping empty records.
403 12
     */
404 12
    public function skipEmptyRecords(): self
405
    {
406
        if ($this->is_empty_records_included) {
407 12
            $this->is_empty_records_included = false;
408
            $this->nb_records = -1;
409
        }
410
411
        return $this;
412
    }
413 12
414
    /**
415 12
     * Disable skipping empty records.
416 12
     */
417 12
    public function includeEmptyRecords(): self
418
    {
419
        if (!$this->is_empty_records_included) {
420 12
            $this->is_empty_records_included = true;
421
            $this->nb_records = -1;
422
        }
423
424
        return $this;
425
    }
426 12
427
    /**
428 12
     * Tells whether empty records are skipped by the instance.
429
     */
430
    public function isEmptyRecordsIncluded(): bool
431
    {
432
        return $this->is_empty_records_included;
433
    }
434
}
435