Completed
Pull Request — master (#379)
by ignace nyamagana
01:41
created

Reader::setHeader()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 3
nop 1
dl 0
loc 13
ccs 7
cts 7
cp 1
crap 3
rs 9.8333
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com)
5
 *
6
 * (c) Ignace Nyamagana Butera <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\Csv;
15
16
use CallbackFilterIterator;
17
use Iterator;
18
use JsonSerializable;
19
use League\Csv\Polyfill\EmptyEscapeParser;
20
use SplFileObject;
21
use function array_combine;
22
use function array_filter;
23
use function array_pad;
24
use function array_slice;
25
use function array_unique;
26
use function count;
27
use function is_array;
28
use function iterator_count;
29
use function iterator_to_array;
30
use function mb_strlen;
31
use function mb_substr;
32
use function sprintf;
33
use function strlen;
34
use function substr;
35
use const PHP_VERSION_ID;
36
use const STREAM_FILTER_READ;
37
38
/**
39
 * A class to parse and read records from a CSV document.
40
 */
41
class Reader extends AbstractCsv implements TabularDataReader, JsonSerializable
42
{
43
    /**
44
     * header offset.
45
     *
46
     * @var int|null
47
     */
48
    protected $header_offset;
49
50
    /**
51
     * header record.
52
     *
53
     * @var string[]
54
     */
55
    protected $header = [];
56
57
    /**
58
     * records count.
59
     *
60
     * @var int
61
     */
62
    protected $nb_records = -1;
63
64
    /**
65
     * {@inheritdoc}
66
     */
67
    protected $stream_filter_mode = STREAM_FILTER_READ;
68
69
    /**
70
     * @var bool
71
     */
72
    protected $is_empty_records_included = false;
73
74
    /**
75
     * {@inheritdoc}
76
     */
77 3
    public static function createFromPath(string $path, string $open_mode = 'r', $context = null)
78
    {
79 3
        return parent::createFromPath($path, $open_mode, $context);
80
    }
81
82
    /**
83
     * {@inheritdoc}
84
     */
85 30
    protected function resetProperties(): void
86
    {
87 30
        parent::resetProperties();
88 30
        $this->nb_records = -1;
89 30
        $this->header = [];
90 30
    }
91
92
    /**
93
     * Returns the header offset.
94
     *
95
     * If no CSV header offset is set this method MUST return null
96
     *
97
     */
98 21
    public function getHeaderOffset(): ?int
99
    {
100 21
        return $this->header_offset;
101
    }
102
103
    /**
104
     * {@inheritDoc}
105
     */
106 24
    public function getHeader(): array
107
    {
108 24
        if (null === $this->header_offset) {
109 18
            return $this->header;
110
        }
111
112 9
        if ([] !== $this->header) {
113 3
            return $this->header;
114
        }
115
116 9
        $this->header = $this->setHeader($this->header_offset);
117
118 6
        return $this->header;
119
    }
120
121
    /**
122
     * Determine the CSV record header.
123
     *
124
     * @throws Exception If the header offset is set and no record is found or is the empty array
125
     *
126
     * @return string[]
127
     */
128 12
    protected function setHeader(int $offset): array
129
    {
130 12
        $header = $this->seekRow($offset);
131 12
        if (in_array($header, [[], [null]], true)) {
132 6
            throw new SyntaxError(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
133
        }
134
135 6
        if (0 === $offset) {
136 3
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
137
        }
138
139 3
        return $header;
140
    }
141
142
    /**
143
     * Returns the row at a given offset.
144
     */
145 12
    protected function seekRow(int $offset): array
146
    {
147 12
        foreach ($this->getDocument() as $index => $record) {
148 12
            if ($offset === $index) {
149 6
                return $record;
150
            }
151
        }
152
153 6
        return [];
154
    }
155
156
    /**
157
     * Returns the document as an Iterator.
158
     */
159 21
    protected function getDocument(): Iterator
160
    {
161 21
        if (70400 > PHP_VERSION_ID && '' === $this->escape) {
162 6
            $this->document->setCsvControl($this->delimiter, $this->enclosure);
163
164 6
            return EmptyEscapeParser::parse($this->document);
165
        }
166
167 15
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD);
168 15
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
169 15
        $this->document->rewind();
170
171 15
        return $this->document;
172
    }
173
174
    /**
175
     * Strip the BOM sequence from a record.
176
     *
177
     * @param string[] $record
178
     *
179
     * @return string[]
180
     */
181 12
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
182
    {
183 12
        if (0 === $bom_length) {
184 3
            return $record;
185
        }
186
187 9
        $record[0] = mb_substr($record[0], $bom_length);
188 9
        if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) {
189 6
            return $record;
190
        }
191
192 3
        $record[0] = substr($record[0], 1, -1);
193
194 3
        return $record;
195
    }
196
197
    /**
198
     * {@inheritdoc}
199
     */
200 3
    public function fetchColumn($index = 0): Iterator
201
    {
202 3
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
203
204 3
        return $tabular_data->fetchColumn($index);
205
    }
206
207
    /**
208
     * {@inheritdoc}
209
     */
210 3
    public function fetchOne(int $nth_record = 0): array
211
    {
212 3
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
213
214 3
        return $tabular_data->fetchOne($nth_record);
215
    }
216
217
    /**
218
     * {@inheritdoc}
219
     */
220 3
    public function fetchPairs($offset_index = 0, $value_index = 1): Iterator
221
    {
222 3
        $tabular_data = new ResultSet($this->getRecords(), $this->getHeader());
223
224 3
        return $tabular_data->fetchPairs($offset_index, $value_index);
225
    }
226
227
    /**
228
     * {@inheritdoc}
229
     */
230 3
    public function count(): int
231
    {
232 3
        if (-1 === $this->nb_records) {
233 3
            $this->nb_records = iterator_count($this->getRecords());
234
        }
235
236 3
        return $this->nb_records;
237
    }
238
239
    /**
240
     * {@inheritdoc}
241
     */
242 6
    public function getIterator(): Iterator
243
    {
244 6
        return $this->getRecords();
245
    }
246
247
    /**
248
     * {@inheritdoc}
249
     */
250 3
    public function jsonSerialize(): array
251
    {
252 3
        return iterator_to_array($this->getRecords(), false);
253
    }
254
255
    /**
256
     * Returns the CSV records as an iterator object.
257
     *
258
     * Each CSV record is represented as a simple array containing strings or null values.
259
     *
260
     * If the CSV document has a header record then each record is combined
261
     * to the header record and the header record is removed from the iterator.
262
     *
263
     * If the CSV document is inconsistent. Missing record fields are
264
     * filled with null values while extra record fields are strip from
265
     * the returned object.
266
     *
267
     * @param string[] $header an optional header to use instead of the CSV document header
268
     */
269 36
    public function getRecords(array $header = []): Iterator
270
    {
271 36
        $header = $this->computeHeader($header);
272
        $normalized = function ($record): bool {
273 33
            return is_array($record) && ($this->is_empty_records_included || $record != [null]);
274 33
        };
275
276 33
        $bom = '';
277 33
        if (!$this->is_input_bom_included) {
278 30
            $bom = $this->getInputBOM();
279
        }
280
281 33
        $document = $this->getDocument();
282 33
        $records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom);
283 33
        if (null !== $this->header_offset) {
284
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
285 18
                return $offset !== $this->header_offset;
286 18
            });
287
        }
288
289 33
        if ($this->is_empty_records_included) {
290
            $normalized_empty_records = static function (array $record): array {
291 12
                if ([null] === $record) {
292 12
                    return [];
293
                }
294
295 12
                return $record;
296 12
            };
297
298 12
            return $this->combineHeader(new MapIterator($records, $normalized_empty_records), $header);
299
        }
300
301 33
        return $this->combineHeader($records, $header);
302
    }
303
304
    /**
305
     * Returns the header to be used for iteration.
306
     *
307
     * @param string[] $header
308
     *
309
     * @throws Exception If the header contains non unique column name
310
     *
311
     * @return string[]
312
     */
313 30
    protected function computeHeader(array $header)
314
    {
315 30
        if ([] === $header) {
316 27
            $header = $this->getHeader();
317
        }
318
319 30
        if ($header === array_unique(array_filter($header, 'is_string'))) {
320 27
            return $header;
321
        }
322
323 3
        throw new SyntaxError('The header record must be an empty or a flat array with unique string values.');
324
    }
325
326
    /**
327
     * Combine the CSV header to each record if present.
328
     *
329
     * @param string[] $header
330
     */
331 36
    protected function combineHeader(Iterator $iterator, array $header): Iterator
332
    {
333 36
        if ([] === $header) {
334 27
            return $iterator;
335
        }
336
337 12
        $field_count = count($header);
338
        $mapper = static function (array $record) use ($header, $field_count): array {
339 12
            if (count($record) != $field_count) {
340 6
                $record = array_slice(array_pad($record, $field_count, null), 0, $field_count);
341
            }
342
343
            /** @var array<string|null> $assocRecord */
344 12
            $assocRecord = array_combine($header, $record);
345
346 12
            return $assocRecord;
347 12
        };
348
349 12
        return new MapIterator($iterator, $mapper);
350
    }
351
352
    /**
353
     * Strip the BOM sequence from the returned records if necessary.
354
     */
355 30
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
356
    {
357 30
        if ('' === $bom) {
358 21
            return $iterator;
359
        }
360
361 9
        $bom_length = mb_strlen($bom);
362
        $mapper = function (array $record, int $index) use ($bom_length): array {
363 9
            if (0 !== $index) {
364 3
                return $record;
365
            }
366
367 9
            return $this->removeBOM($record, $bom_length, $this->enclosure);
368 9
        };
369
370 9
        return new MapIterator($iterator, $mapper);
371
    }
372
373
    /**
374
     * Selects the record to be used as the CSV header.
375
     *
376
     * Because the header is represented as an array, to be valid
377
     * a header MUST contain only unique string value.
378
     *
379
     * @param int|null $offset the header record offset
380
     *
381
     * @throws Exception if the offset is a negative integer
382
     *
383
     * @return static
384
     */
385 27
    public function setHeaderOffset(?int $offset): self
386
    {
387 27
        if ($offset === $this->header_offset) {
388 18
            return $this;
389
        }
390
391 9
        if (null !== $offset && 0 > $offset) {
392 3
            throw new InvalidArgument(__METHOD__.'() expects 1 Argument to be greater or equal to 0');
393
        }
394
395 6
        $this->header_offset = $offset;
396 6
        $this->resetProperties();
397
398 6
        return $this;
399
    }
400
401
    /**
402
     * Enable skipping empty records.
403
     */
404 12
    public function skipEmptyRecords(): self
405
    {
406 12
        if ($this->is_empty_records_included) {
407 12
            $this->is_empty_records_included = false;
408 12
            $this->nb_records = -1;
409
        }
410
411 12
        return $this;
412
    }
413
414
    /**
415
     * Disable skipping empty records.
416
     */
417 12
    public function includeEmptyRecords(): self
418
    {
419 12
        if (!$this->is_empty_records_included) {
420 12
            $this->is_empty_records_included = true;
421 12
            $this->nb_records = -1;
422
        }
423
424 12
        return $this;
425
    }
426
427
    /**
428
     * Tells whether empty records are skipped by the instance.
429
     */
430 12
    public function isEmptyRecordsIncluded(): bool
431
    {
432 12
        return $this->is_empty_records_included;
433
    }
434
}
435