Completed
Pull Request — master (#354)
by ignace nyamagana
01:32
created

Reader::computeHeader()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 4
nop 1
dl 0
loc 12
ccs 6
cts 6
cp 1
crap 3
rs 9.8666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv;
15
16
use BadMethodCallException;
17
use CallbackFilterIterator;
18
use Countable;
19
use Generator;
20
use Iterator;
21
use IteratorAggregate;
22
use JsonSerializable;
23
use League\Csv\Polyfill\EmptyEscapeParser;
24
use SplFileObject;
25
use TypeError;
26
use function array_combine;
27
use function array_filter;
28
use function array_pad;
29
use function array_slice;
30
use function array_unique;
31
use function count;
32
use function gettype;
33
use function is_array;
34
use function iterator_count;
35
use function iterator_to_array;
36
use function mb_strlen;
37
use function mb_substr;
38
use function sprintf;
39
use function strlen;
40
use function substr;
41
use const PHP_VERSION_ID;
42
use const STREAM_FILTER_READ;
43
44
/**
45
 * A class to parse and read records from a CSV document.
46
 *
47
 * @method array fetchOne(int $nth_record = 0) Returns a single record from the CSV
48
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
49
 * @method Generator fetchPairs(string|int $offset_index = 0, string|int $value_index = 1) Fetches the next key-value pairs from the CSV document
50
 */
51
class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSerializable
52
{
53
    /**
54
     * header offset.
55
     *
56
     * @var int|null
57
     */
58
    protected $header_offset;
59
60
    /**
61
     * header record.
62
     *
63
     * @var string[]
64
     */
65
    protected $header = [];
66
67
    /**
68
     * records count.
69
     *
70
     * @var int
71
     */
72
    protected $nb_records = -1;
73
74
    /**
75
     * {@inheritdoc}
76
     */
77
    protected $stream_filter_mode = STREAM_FILTER_READ;
78
79
    /**
80
     * @var bool
81
     */
82
    protected $is_empty_records_skipped = true;
83
84
    /**
85
     * {@inheritdoc}
86
     */
87 3
    public static function createFromPath(string $path, string $open_mode = 'r', $context = null)
88
    {
89 3
        return parent::createFromPath($path, $open_mode, $context);
90
    }
91
92
    /**
93
     * {@inheritdoc}
94
     */
95 24
    protected function resetProperties()
96
    {
97 24
        parent::resetProperties();
98 24
        $this->nb_records = -1;
99 24
        $this->header = [];
100 24
    }
101
102
    /**
103
     * Returns the header offset.
104
     *
105
     * If no CSV header offset is set this method MUST return null
106
     *
107
     * @return int|null
108
     */
109 15
    public function getHeaderOffset()
110
    {
111 15
        return $this->header_offset;
112
    }
113
114
    /**
115
     * Returns the CSV record used as header.
116
     *
117
     * The returned header is represented as an array of string values
118
     *
119
     * @return string[]
120
     */
121 18
    public function getHeader(): array
122
    {
123 18
        if (null === $this->header_offset) {
124 12
            return $this->header;
125
        }
126
127 9
        if ([] !== $this->header) {
128 3
            return $this->header;
129
        }
130
131 9
        $this->header = $this->setHeader($this->header_offset);
132
133 6
        return $this->header;
134
    }
135
136
    /**
137
     * Determine the CSV record header.
138
     *
139
     * @throws Exception If the header offset is set and no record is found or is the empty array
140
     *
141
     * @return string[]
142
     */
143 12
    protected function setHeader(int $offset): array
144
    {
145 12
        $header = $this->seekRow($offset);
146 12
        if (false === $header || [] === $header || [null] === $header) {
147 6
            throw new Exception(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
148
        }
149
150 6
        if (0 === $offset) {
151 3
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
152
        }
153
154 3
        return $header;
155
    }
156
157
    /**
158
     * Returns the row at a given offset.
159
     *
160
     * @return array|false
161
     */
162 12
    protected function seekRow(int $offset)
163
    {
164 12
        foreach ($this->getDocument() as $index => $record) {
165 12
            if ($offset === $index) {
166 10
                return $record;
167
            }
168
        }
169
170 6
        return false;
171
    }
172
173
    /**
174
     * Returns the document as an Iterator.
175
     */
176 15
    protected function getDocument(): Iterator
177
    {
178 15
        if (70400 > PHP_VERSION_ID && '' === $this->escape) {
179 6
            $this->document->setCsvControl($this->delimiter, $this->enclosure);
180
181 6
            return EmptyEscapeParser::parse($this->document);
182
        }
183
184 9
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD);
185 9
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
186 9
        $this->document->rewind();
187
188 9
        return $this->document;
189
    }
190
191
    /**
192
     * Strip the BOM sequence from a record.
193
     *
194
     * @param string[] $record
195
     *
196
     * @return string[]
197
     */
198 12
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
199
    {
200 12
        if (0 === $bom_length) {
201 3
            return $record;
202
        }
203
204 9
        $record[0] = mb_substr($record[0], $bom_length);
205 9
        if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) {
206 6
            return $record;
207
        }
208
209 3
        $record[0] = substr($record[0], 1, -1);
210
211 3
        return $record;
212
    }
213
214
    /**
215
     * {@inheritdoc}
216
     */
217 9
    public function __call($method, array $arguments)
218
    {
219 9
        static $whitelisted = ['fetchColumn' => 1, 'fetchOne' => 1, 'fetchPairs' => 1];
220 9
        if (isset($whitelisted[$method])) {
221 3
            return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments);
222
        }
223
224 6
        throw new BadMethodCallException(sprintf('%s::%s() method does not exist', static::class, $method));
225
    }
226
227
    /**
228
     * {@inheritdoc}
229
     */
230 3
    public function count(): int
231
    {
232 3
        if (-1 === $this->nb_records) {
233 3
            $this->nb_records = iterator_count($this->getRecords());
234
        }
235
236 3
        return $this->nb_records;
237
    }
238
239
    /**
240
     * {@inheritdoc}
241
     */
242 3
    public function getIterator(): Iterator
243
    {
244 3
        return $this->getRecords();
245
    }
246
247
    /**
248
     * {@inheritdoc}
249
     */
250 3
    public function jsonSerialize(): array
251
    {
252 3
        return iterator_to_array($this->getRecords(), false);
253
    }
254
255
    /**
256
     * Returns the CSV records as an iterator object.
257
     *
258
     * Each CSV record is represented as a simple array containing strings or null values.
259
     *
260
     * If the CSV document has a header record then each record is combined
261
     * to the header record and the header record is removed from the iterator.
262
     *
263
     * If the CSV document is inconsistent. Missing record fields are
264
     * filled with null values while extra record fields are strip from
265
     * the returned object.
266
     *
267
     * @param string[] $header an optional header to use instead of the CSV document header
268
     */
269 30
    public function getRecords(array $header = []): Iterator
270
    {
271 30
        $header = $this->computeHeader($header);
272 27
        $normalized = function ($record): bool {
273 27
            return is_array($record) && (!$this->is_empty_records_skipped || $record != [null]);
274 27
        };
275 27
        $bom = $this->getInputBOM();
276 27
        $document = $this->getDocument();
277
278 27
        $records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom);
279 27
        if (null !== $this->header_offset) {
280 6
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
281 6
                return $offset !== $this->header_offset;
282 6
            });
283
        }
284
285 27
        if (!$this->is_empty_records_skipped) {
286 12
            $normalized_empty_records = static function (array $record): array {
287 12
                if ([null] === $record) {
288 12
                    return [];
289
                }
290
291 12
                return $record;
292 12
            };
293
294 12
            return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header);
295
        }
296
297 27
        return $this->combineHeader($records, $header);
298
    }
299
300
    /**
301
     * Returns the header to be used for iteration.
302
     *
303
     * @param string[] $header
304
     *
305
     * @throws Exception If the header contains non unique column name
306
     *
307
     * @return string[]
308
     */
309 24
    protected function computeHeader(array $header)
310
    {
311 24
        if ([] === $header) {
312 21
            $header = $this->getHeader();
313
        }
314
315 24
        if ($header === array_unique(array_filter($header, 'is_string'))) {
316 21
            return $header;
317
        }
318
319 3
        throw new Exception('The header record must be empty or a flat array with unique string values');
320
    }
321
322
    /**
323
     * Combine the CSV header to each record if present.
324
     *
325
     * @param string[] $header
326
     */
327 30
    protected function combineHeader(Iterator $iterator, array $header): Iterator
328
    {
329 30
        if ([] === $header) {
330 21
            return $iterator;
331
        }
332
333 12
        $field_count = count($header);
334 12
        $mapper = static function (array $record) use ($header, $field_count): array {
335 12
            if (count($record) != $field_count) {
336 6
                $record = array_slice(array_pad($record, $field_count, null), 0, $field_count);
337
            }
338
339 12
            return array_combine($header, $record);
340 12
        };
341
342 12
        return new MapIterator($iterator, $mapper);
343
    }
344
345
    /**
346
     * Strip the BOM sequence from the returned records if necessary.
347
     */
348 24
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
349
    {
350 24
        if ('' === $bom) {
351 15
            return $iterator;
352
        }
353
354 9
        $bom_length = mb_strlen($bom);
355 9
        $mapper = function (array $record, int $index) use ($bom_length): array {
356 9
            if (0 !== $index) {
357 3
                return $record;
358
            }
359
360 9
            return $this->removeBOM($record, $bom_length, $this->enclosure);
361 9
        };
362
363 9
        return new MapIterator($iterator, $mapper);
364
    }
365
366
    /**
367
     * Selects the record to be used as the CSV header.
368
     *
369
     * Because the header is represented as an array, to be valid
370
     * a header MUST contain only unique string value.
371
     *
372
     * @param int|null $offset the header record offset
373
     *
374
     * @throws Exception if the offset is a negative integer
375
     *
376
     * @return static
377
     */
378 24
    public function setHeaderOffset($offset): self
379
    {
380 24
        if ($offset === $this->header_offset) {
381 12
            return $this;
382
        }
383
384 12
        if (!is_nullable_int($offset)) {
385 3
            throw new TypeError(sprintf(__METHOD__.'() expects 1 Argument to be null or an integer %s given', gettype($offset)));
386
        }
387
388 9
        if (null !== $offset && 0 > $offset) {
389 3
            throw new Exception(__METHOD__.'() expects 1 Argument to be greater or equal to 0');
390
        }
391
392 6
        $this->header_offset = $offset;
393 6
        $this->resetProperties();
394
395 6
        return $this;
396
    }
397
398
    /**
399
     * Enable skipping empty records.
400
     */
401 12
    public function enableEmptyRecordsSkipping(): self
402
    {
403 12
        if (!$this->is_empty_records_skipped) {
404 12
            $this->is_empty_records_skipped = true;
405 12
            $this->nb_records = -1;
406
        }
407
408 12
        return $this;
409
    }
410
411
    /**
412
     * Disable skipping empty records.
413
     */
414 12
    public function disableEmptyRecordsSkipping(): self
415
    {
416 12
        if ($this->is_empty_records_skipped) {
417 12
            $this->is_empty_records_skipped = false;
418 12
            $this->nb_records = -1;
419
        }
420
421 12
        return $this;
422
    }
423
424
    /**
425
     * Tells whether empty records are skipped by the instance.
426
     */
427 12
    public function isEmptyRecordsSkipped(): bool
428
    {
429 12
        return $this->is_empty_records_skipped;
430
    }
431
}
432