Completed
Pull Request — master (#234)
by ignace nyamagana
02:39
created

Reader::setRecords()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 9
nc 2
nop 0
dl 0
loc 15
ccs 9
cts 9
cp 1
crap 3
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use BadMethodCallException;
18
use CallbackFilterIterator;
19
use Countable;
20
use Iterator;
21
use IteratorAggregate;
22
use League\Csv\Exception\RuntimeException;
23
use LimitIterator;
24
use SplFileObject;
25
26
/**
27
 * A class to manage records selection from a CSV document
28
 *
29
 * @package League.csv
30
 * @since  3.0.0
31
 *
32
 * @method array fetchAll() Returns a sequential array of all CSV records
33
 * @method array fetchOne(int $offset = 0) Returns a single record from the CSV
34
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
35
 * @method Generator fetchPairs(string|int $offset_index, string|int $value_index) Fetches the next key-value pairs from the CSV document
36
 */
37
class Reader extends AbstractCsv implements Countable, IteratorAggregate
38
{
39
    /**
40
     * @inheritdoc
41
     */
42
    protected $stream_filter_mode = STREAM_FILTER_READ;
43
44
    /**
45
     * The value to pad if the record is less than header size.
46
     *
47
     * @var mixed
48
     */
49
    protected $record_padding_value;
50
51
    /**
52
     * CSV Document header offset
53
     *
54
     * @var int|null
55
     */
56
    protected $header_offset;
57
58
    /**
59
     * CSV Document Header record
60
     *
61
     * @var string[]
62
     */
63
    protected $header = [];
64
65
    /**
66
     * Records Iterator
67
     *
68
     * @var Iterator
69
     */
70
    protected $records;
71
72
    /**
73
     * Records count
74
     *
75
     * @var int
76
     */
77
    protected $nb_records = -1;
78
79
    /**
80
     * Detect Delimiters occurences in the CSV
81
     *
82
     * Returns a associative array where each key represents
83
     * a valid delimiter and each value the number of occurences
84
     *
85
     * @param string[] $delimiters the delimiters to consider
86
     * @param int      $nb_records Detection is made using $nb_records of the CSV
87
     *
88
     * @return array
89
     */
90 6
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
91
    {
92
        $filter = function ($value): bool {
93 4
            return 1 == strlen($value);
94 3
        };
95
96 6
        $nb_records = $this->filterMinRange($nb_records, 1, __METHOD__.'() expects the number of records to consider to be a valid positive integer, %s given');
97 4
        $delimiters = array_unique(array_filter($delimiters, $filter));
98
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
99 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
100
101 4
            return $res;
102 4
        };
103
104 4
        $res = array_reduce($delimiters, $reducer, []);
105 4
        arsort($res, SORT_NUMERIC);
106
107 4
        return $res;
108
    }
109
110
    /**
111
     * Returns the cell count for a specified delimiter
112
     * and a specified number of records
113
     *
114
     * @param string $delimiter  CSV delimiter
115
     * @param int    $nb_records CSV records to consider
116
     *
117
     * @return int
118
     */
119 2
    protected function getCellCount(string $delimiter, int $nb_records): int
120
    {
121
        $filter = function ($record): bool {
122 2
            return is_array($record) && count($record) > 1;
123 1
        };
124
125 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
126 2
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
127 2
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
128
129 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
130
    }
131
132
    /**
133
     * Returns the record padding value
134
     *
135
     * @return mixed
136
     */
137 2
    public function getRecordPaddingValue()
138
    {
139 2
        return $this->record_padding_value;
140
    }
141
142
    /**
143
     * Returns the record offset used as header
144
     *
145
     * If no CSV record is used this method MUST return null
146
     *
147
     * @return int|null
148
     */
149 2
    public function getHeaderOffset()
150
    {
151 2
        return $this->header_offset;
152
    }
153
154
    /**
155
     * Returns wether the selected header can be combine to each record
156
     *
157
     * A valid header must be empty or contains unique string field names
158
     *
159
     * @return bool
160
     */
161 2
    public function supportsHeaderAsRecordKeys(): bool
162
    {
163 2
        $header = $this->getHeader();
164
165 2
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
166
    }
167
168
    /**
169
     * Returns the CSV record header
170
     *
171
     * The returned header is represented as an array of string values
172
     *
173
     * @return string[]
174
     */
175 4
    public function getHeader(): array
176
    {
177 4
        if (null === $this->header_offset) {
178 2
            return $this->header;
179
        }
180
181 4
        if (empty($this->header)) {
182 4
            $this->header = $this->setHeader($this->header_offset);
183
        }
184
185 4
        return $this->header;
186
    }
187
188
    /**
189
     * Determine the CSV record header
190
     *
191
     * @param int $offset
192
     *
193
     * @throws RuntimeException If the header offset is an integer
194
     *                          and the corresponding record is missing
195
     *                          or is an empty array
196
     *
197
     * @return string[]
198
     */
199 6
    protected function setHeader(int $offset): array
200
    {
201 6
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
202 6
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
203 6
        $this->document->seek($offset);
204 6
        $header = $this->document->current();
205 6
        if (empty($header)) {
206 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
207
        }
208
209 4
        if (0 === $offset) {
210 2
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
211
        }
212
213 2
        return $header;
214
    }
215
216
    /**
217
     * Strip the BOM sequence from a record
218
     *
219
     * @param string[] $record
220
     * @param int      $bom_length
221
     * @param string   $enclosure
222
     *
223
     * @return string[]
224
     */
225 8
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
226
    {
227 8
        if (0 == $bom_length) {
228 2
            return $record;
229
        }
230
231 6
        $record[0] = mb_substr($record[0], $bom_length);
232 6
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
233 2
            $record[0] = mb_substr($record[0], 1, -1);
234
        }
235
236 6
        return $record;
237
    }
238
239
    /**
240
     * @inheritdoc
241
     */
242 6
    public function __call($method, array $arguments)
243
    {
244 6
        $whitelisted = ['fetchColumn' => 1, 'fetchPairs' => 1, 'fetchOne' => 1, 'fetchAll' => 1];
245 6
        if (isset($whitelisted[$method])) {
246 2
            return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments);
247
        }
248
249 4
        throw new BadMethodCallException(sprintf('%s::%s() method does not exist', __CLASS__, $method));
250
    }
251
252
    /**
253
     * @inheritdoc
254
     */
255 2
    public function count(): int
256
    {
257 2
        if (-1 === $this->nb_records) {
258 2
            $this->nb_records = iterator_count($this->getRecords());
259
        }
260
261 2
        return $this->nb_records;
262
    }
263
264
    /**
265
     * Returns the CSV records in an iterator object.
266
     *
267
     * @return Iterator
268
     */
269 6
    public function getRecords(): Iterator
270
    {
271 6
        $this->records = $this->records ?? $this->setRecords();
272
273 4
        return $this->records;
274
    }
275
276
    /**
277
     * Returns the CSV records in an iterator object.
278
     *
279
     * Each CSV record is represented as a simple array of string or null values.
280
     *
281
     * If the CSV document has a header record then each record is combined
282
     * to each header record and the header record is removed from the iterator.
283
     *
284
     * If the CSV document is inconsistent. Missing record fields are
285
     * filled with null values while extra record fields are strip from
286
     * the returned object.
287
     *
288
     * @throws RuntimeException If the header contains non unique column name
289
     *
290
     * @return Iterator
291
     */
292 4
    protected function setRecords(): Iterator
293
    {
294 4
        if (!$this->supportsHeaderAsRecordKeys()) {
295 2
            throw new RuntimeException('The header record must be empty or a flat array with unique string values');
296
        }
297
298
        $normalized = function ($record): bool {
299 2
            return is_array($record) && $record != [null];
300 1
        };
301 2
        $bom = $this->getInputBOM();
302 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
303 2
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
304
305 2
        return $this->combineHeader($this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom));
306
    }
307
308
    /**
309
     * Add the CSV header if present and valid
310
     *
311
     * @param Iterator $iterator
312
     *
313
     * @return Iterator
314
     */
315 14
    protected function combineHeader(Iterator $iterator): Iterator
316
    {
317 14
        if (null === $this->header_offset) {
318 8
            return $iterator;
319
        }
320
321
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
322 8
            return $offset != $this->header_offset;
323 8
        });
324
325 8
        $header = $this->getHeader();
326 8
        $field_count = count($header);
327
        $mapper = function (array $record) use ($header, $field_count): array {
328 8
            if (count($record) != $field_count) {
329 4
                $record = array_slice(array_pad($record, $field_count, $this->record_padding_value), 0, $field_count);
330
            }
331
332 8
            return array_combine($header, $record);
333 8
        };
334
335 8
        return new MapIterator($iterator, $mapper);
336
    }
337
338
    /**
339
     * Strip the BOM sequence if present
340
     *
341
     * @param Iterator $iterator
342
     * @param string   $bom
343
     *
344
     * @return Iterator
345
     */
346 10
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
347
    {
348 10
        if ('' === $bom) {
349 4
            return $iterator;
350
        }
351
352 6
        $bom_length = mb_strlen($bom);
353 6
        $mapper = function (array $record, int $index) use ($bom_length): array {
354 6
            if (0 != $index) {
355 2
                return $record;
356
            }
357
358 6
            return $this->removeBOM($record, $bom_length, $this->enclosure);
359 6
        };
360
361 6
        return new MapIterator($iterator, $mapper);
362
    }
363
364
    /**
365
     * @inheritdoc
366
     */
367 2
    public function getIterator(): Iterator
368
    {
369 2
        return $this->getRecords();
370
    }
371
372
    /**
373
     * Set the record padding value
374
     *
375
     * @param mixed $record_padding_value
376
     *
377
     * @return static
378
     */
379 2
    public function setRecordPaddingValue($record_padding_value): self
380
    {
381 2
        $this->record_padding_value = $record_padding_value;
382
383 2
        return $this;
384
    }
385
386
    /**
387
     * Selects the record to be used as the CSV header
388
     *
389
     * Because of the header is represented as an array, to be valid
390
     * a header MUST contain only unique string value.
391
     *
392
     * @param int|null $offset the header record offset
393
     *
394
     * @return static
395
     */
396 2
    public function setHeaderOffset($offset): self
397
    {
398 2
        if (null !== $offset) {
399 2
            $offset = $this->filterMinRange($offset, 0, __METHOD__.'() expects the header offset index to be a positive integer or 0, %s given');
400
        }
401
402 2
        if ($offset !== $this->header_offset) {
403 2
            $this->header_offset = $offset;
404 2
            $this->resetProperties();
405
        }
406
407 2
        return $this;
408
    }
409
410
    /**
411
     * @inheritdoc
412
     */
413 4
    protected function resetProperties()
414
    {
415 4
        $this->nb_records = -1;
416 4
        $this->header = [];
417 4
        $this->records = null;
418 4
    }
419
}
420