Completed
Push — master ( 6f5dc8...ace255 )
by ignace nyamagana
27:40 queued 12:47
created

Reader::computeHeader()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 6
nc 3
nop 1
dl 0
loc 12
ccs 5
cts 5
cp 1
crap 4
rs 9.2
c 0
b 0
f 0
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use BadMethodCallException;
18
use CallbackFilterIterator;
19
use Countable;
20
use Iterator;
21
use IteratorAggregate;
22
use League\Csv\Exception\RuntimeException;
23
use LimitIterator;
24
use SplFileObject;
25
26
/**
27
 * A class to manage records selection from a CSV document
28
 *
29
 * @package League.csv
30
 * @since  3.0.0
31
 *
32
 * @method array fetchAll() Returns a sequential array of all CSV records
33
 * @method array fetchOne(int $offset = 0) Returns a single record from the CSV
34
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
35
 * @method Generator fetchPairs(string|int $offset_index, string|int $value_index) Fetches the next key-value pairs from the CSV document
36
 */
37
class Reader extends AbstractCsv implements Countable, IteratorAggregate
38
{
39
    /**
40
     * @inheritdoc
41
     */
42
    protected $stream_filter_mode = STREAM_FILTER_READ;
43
44
    /**
45
     * The value to pad if the record is less than header size.
46
     *
47
     * @var mixed
48
     */
49
    protected $record_padding_value;
50
51
    /**
52
     * CSV Document header offset
53
     *
54
     * @var int|null
55
     */
56
    protected $header_offset;
57
58
    /**
59
     * CSV Document Header record
60
     *
61
     * @var string[]
62
     */
63
    protected $header = [];
64
65
    /**
66
     * Records count
67
     *
68
     * @var int
69
     */
70
    protected $nb_records = -1;
71
72
    /**
73
     * Detect Delimiters occurences in the CSV
74
     *
75
     * Returns a associative array where each key represents
76
     * a valid delimiter and each value the number of occurences
77
     *
78
     * @param string[] $delimiters the delimiters to consider
79
     * @param int      $nb_records Detection is made using $nb_records of the CSV
80
     *
81
     * @return array
82
     */
83 6
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
84
    {
85
        $filter = function ($value): bool {
86 4
            return 1 == strlen($value);
87 3
        };
88
89 6
        $nb_records = $this->filterMinRange($nb_records, 1, __METHOD__.'() expects the number of records to consider to be a valid positive integer, %s given');
90 4
        $delimiters = array_unique(array_filter($delimiters, $filter));
91
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
92 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
93
94 4
            return $res;
95 4
        };
96
97 4
        $res = array_reduce($delimiters, $reducer, []);
98 4
        arsort($res, SORT_NUMERIC);
99
100 4
        return $res;
101
    }
102
103
    /**
104
     * Returns the cell count for a specified delimiter
105
     * and a specified number of records
106
     *
107
     * @param string $delimiter  CSV delimiter
108
     * @param int    $nb_records CSV records to consider
109
     *
110
     * @return int
111
     */
112 2
    protected function getCellCount(string $delimiter, int $nb_records): int
113
    {
114
        $filter = function ($record): bool {
115 2
            return is_array($record) && count($record) > 1;
116 1
        };
117
118 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
119 2
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
120 2
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
121
122 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
123
    }
124
125
    /**
126
     * Returns the record padding value
127
     *
128
     * @return mixed
129
     */
130 2
    public function getRecordPaddingValue()
131
    {
132 2
        return $this->record_padding_value;
133
    }
134
135
    /**
136
     * Returns the record offset used as header
137
     *
138
     * If no CSV record is used this method MUST return null
139
     *
140
     * @return int|null
141
     */
142 2
    public function getHeaderOffset()
143
    {
144 2
        return $this->header_offset;
145
    }
146
147
    /**
148
     * Returns wether the selected header can be combine to each record
149
     *
150
     * A valid header must be empty or contains unique string field names
151
     *
152
     * @return bool
153
     */
154 2
    public function supportsHeaderAsRecordKeys(): bool
155
    {
156 2
        $header = $this->getHeader();
157
158 2
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
159
    }
160
161
    /**
162
     * Returns the CSV record header
163
     *
164
     * The returned header is represented as an array of string values
165
     *
166
     * @return string[]
167
     */
168 4
    public function getHeader(): array
169
    {
170 4
        if (null === $this->header_offset) {
171 2
            return $this->header;
172
        }
173
174 4
        if (empty($this->header)) {
175 4
            $this->header = $this->setHeader($this->header_offset);
176
        }
177
178 4
        return $this->header;
179
    }
180
181
    /**
182
     * Determine the CSV record header
183
     *
184
     * @param int $offset
185
     *
186
     * @throws RuntimeException If the header offset is an integer
187
     *                          and the corresponding record is missing
188
     *                          or is an empty array
189
     *
190
     * @return string[]
191
     */
192 6
    protected function setHeader(int $offset): array
193
    {
194 6
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
195 6
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
196 6
        $this->document->seek($offset);
197 6
        $header = $this->document->current();
198 6
        if (empty($header)) {
199 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
200
        }
201
202 4
        if (0 === $offset) {
203 2
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
204
        }
205
206 2
        return $header;
207
    }
208
209
    /**
210
     * Strip the BOM sequence from a record
211
     *
212
     * @param string[] $record
213
     * @param int      $bom_length
214
     * @param string   $enclosure
215
     *
216
     * @return string[]
217
     */
218 8
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
219
    {
220 8
        if (0 == $bom_length) {
221 2
            return $record;
222
        }
223
224 6
        $record[0] = mb_substr($record[0], $bom_length);
225 6
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
226 2
            $record[0] = mb_substr($record[0], 1, -1);
227
        }
228
229 6
        return $record;
230
    }
231
232
    /**
233
     * @inheritdoc
234
     */
235 6
    public function __call($method, array $arguments)
236
    {
237 6
        $whitelisted = ['fetchColumn' => 1, 'fetchPairs' => 1, 'fetchOne' => 1, 'fetchAll' => 1];
238 6
        if (isset($whitelisted[$method])) {
239 2
            return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments);
240
        }
241
242 4
        throw new BadMethodCallException(sprintf('%s::%s() method does not exist', __CLASS__, $method));
243
    }
244
245
    /**
246
     * @inheritdoc
247
     */
248 2
    public function count(): int
249
    {
250 2
        if (-1 === $this->nb_records) {
251 2
            $this->nb_records = iterator_count($this->getRecords());
252
        }
253
254 2
        return $this->nb_records;
255
    }
256
257
    /**
258
     * @inheritdoc
259
     */
260 2
    public function getIterator(): Iterator
261
    {
262 2
        return $this->getRecords();
263
    }
264
265
    /**
266
     * Returns the CSV records in an iterator object.
267
     *
268
     * Each CSV record is represented as a simple array of string or null values.
269
     *
270
     * If the CSV document has a header record then each record is combined
271
     * to each header record and the header record is removed from the iterator.
272
     *
273
     * If the CSV document is inconsistent. Missing record fields are
274
     * filled with null values while extra record fields are strip from
275
     * the returned object.
276
     *
277
     *
278
     * @param  array            $header
279
     * @throws RuntimeException If the header contains non unique column name
280
     * @return Iterator
281 6
     */
282
    public function getRecords(array $header = []): Iterator
283 6
    {
284 2
        $header = $this->computeHeader($header);
285
        $normalized = function ($record): bool {
286
            return is_array($record) && $record != [null];
287
        };
288 4
        $bom = $this->getInputBOM();
289 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
290 4
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
291 4
292 4
        $records = $this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom);
293
        if (null !== $this->header_offset) {
294 4
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
295
                return $offset !== $this->header_offset;
296
            });
297
        }
298
299
        return $this->combineHeader($records, $header);
300
    }
301
302
    protected function computeHeader(array $header)
303
    {
304 14
        if (!empty($header) && $header === array_unique(array_filter($header, 'is_string'))) {
305
            return $header;
306 14
        }
307 8
308
        if ($this->supportsHeaderAsRecordKeys()) {
309
            return $this->getHeader();
310
        }
311 8
312 8
        throw new RuntimeException('The header record must be empty or a flat array with unique string values');
313
    }
314 8
315 8
    /**
316
     * Add the CSV header if present and valid
317 8
     *
318 4
     * @param Iterator $iterator
319
     * @param array    $header
320
     *
321 8
     * @return Iterator
322 8
     */
323
    protected function combineHeader(Iterator $iterator, array $header): Iterator
324 8
    {
325
        if (empty($header)) {
326
            return $iterator;
327
        }
328
329
        $field_count = count($header);
330
        $mapper = function (array $record) use ($header, $field_count): array {
331
            if (count($record) != $field_count) {
332
                $record = array_slice(array_pad($record, $field_count, $this->record_padding_value), 0, $field_count);
333
            }
334
335 10
            return array_combine($header, $record);
336
        };
337 10
338 4
        return new MapIterator($iterator, $mapper);
339
    }
340
341 6
    /**
342 6
     * Strip the BOM sequence if present
343 6
     *
344 2
     * @param Iterator $iterator
345
     * @param string   $bom
346
     *
347 6
     * @return Iterator
348 6
     */
349
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
350 6
    {
351
        if ('' === $bom) {
352
            return $iterator;
353
        }
354
355
        $bom_length = mb_strlen($bom);
356
        $mapper = function (array $record, int $index) use ($bom_length): array {
357
            if (0 != $index) {
358
                return $record;
359
            }
360 2
361
            return $this->removeBOM($record, $bom_length, $this->enclosure);
362 2
        };
363
364 2
        return new MapIterator($iterator, $mapper);
365
    }
366
367
    /**
368
     * Set the record padding value
369
     *
370
     * @param mixed $record_padding_value
371
     *
372
     * @return static
373
     */
374
    public function setRecordPaddingValue($record_padding_value): self
375
    {
376
        $this->record_padding_value = $record_padding_value;
377 2
378
        return $this;
379 2
    }
380 2
381
    /**
382
     * Selects the record to be used as the CSV header
383 2
     *
384 2
     * Because of the header is represented as an array, to be valid
385 2
     * a header MUST contain only unique string value.
386
     *
387
     * @param int|null $offset the header record offset
388 2
     *
389
     * @return static
390
     */
391
    public function setHeaderOffset($offset): self
392
    {
393
        if (null !== $offset) {
394 2
            $offset = $this->filterMinRange($offset, 0, __METHOD__.'() expects the header offset index to be a positive integer or 0, %s given');
395
        }
396 2
397 2
        if ($offset !== $this->header_offset) {
398 2
            $this->header_offset = $offset;
399
            $this->resetProperties();
400
        }
401
402
        return $this;
403
    }
404
405
    /**
406
     * @inheritdoc
407
     */
408
    protected function resetProperties()
409
    {
410
        $this->nb_records = -1;
411
        $this->header = [];
412
    }
413
}
414