Completed
Push — master ( a25e8e...8b5e7d )
by ignace nyamagana
04:42 queued 02:53
created

Reader::removeBOM()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 7
cts 7
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 7
nc 3
nop 3
crap 4
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use BadMethodCallException;
18
use CallbackFilterIterator;
19
use Iterator;
20
use IteratorAggregate;
21
use League\Csv\Exception\RuntimeException;
22
use LimitIterator;
23
use SplFileObject;
24
25
/**
26
 * A class to manage records selection from a CSV document
27
 *
28
 * @package League.csv
29
 * @since  3.0.0
30
 *
31
 * @method array fetchAll() Returns a sequential array of all CSV records
32
 * @method array fetchOne(int $offset = 0) Returns a single record from the CSV
33
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
34
 * @method Generator fetchPairs(string|int $offset_index, string|int $value_index) Fetches the next key-value pairs from the CSV document
35
 */
36
class Reader extends AbstractCsv implements IteratorAggregate
37
{
38
    /**
39
     * @inheritdoc
40
     */
41
    protected $stream_filter_mode = STREAM_FILTER_READ;
42
43
    /**
44
     * CSV Document header offset
45
     *
46
     * @var int|null
47
     */
48
    protected $header_offset;
49
50
    /**
51
     * CSV Document Header record
52
     *
53
     * @var string[]
54
     */
55
    protected $header = [];
56
57
    /**
58
     * Tell whether the header needs to be re-generated
59
     *
60
     * @var bool
61
     */
62
    protected $is_header_loaded = false;
63
64
    /**
65
     * The value to pad if the record is less than header size.
66
     *
67
     * @var mixed
68
     */
69
    protected $record_padding_value;
70
71
    /**
72
     * Returns the record offset used as header
73
     *
74
     * If no CSV record is used this method MUST return null
75
     *
76
     * @return int|null
77
     */
78 2
    public function getHeaderOffset()
79
    {
80 2
        return $this->header_offset;
81
    }
82
83
    /**
84
     * Returns wether the selected header can be combine to each record
85
     *
86
     * A valid header must be empty or contains unique string field names
87
     *
88
     * @return bool
89
     */
90 2
    public function supportsHeaderAsRecordKeys(): bool
91
    {
92 2
        $header = $this->getHeader();
93
94 2
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
95
    }
96
97
    /**
98
     * Returns the CSV record header
99
     *
100
     * The returned header is represented as an array of string values
101
     *
102
     * @return string[]
103
     */
104 4
    public function getHeader(): array
105
    {
106 4
        if ($this->is_header_loaded) {
107 2
            return $this->header;
108
        }
109
110 4
        $this->is_header_loaded = true;
111 4
        $this->header = [];
112 4
        if (null !== $this->header_offset) {
113 4
            $this->header = $this->setHeader($this->header_offset);
114
        }
115
116 4
        return $this->header;
117
    }
118
119
    /**
120
     * Determine the CSV record header
121
     *
122
     * @param int $offset
123
     *
124
     * @throws RuntimeException If the header offset is an integer
125
     *                          and the corresponding record is missing
126
     *                          or is an empty array
127
     *
128
     * @return string[]
129
     */
130 6
    protected function setHeader(int $offset): array
131
    {
132 6
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
133 6
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
134 6
        $this->document->seek($offset);
135 6
        $header = $this->document->current();
136 6
        if (empty($header)) {
137 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
138
        }
139
140 4
        if (0 === $offset) {
141 2
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
142
        }
143
144 2
        return $header;
145
    }
146
147
    /**
148
     * Strip the BOM sequence from a record
149
     *
150
     * @param string[] $record
151
     * @param int      $bom_length
152
     * @param string   $enclosure
153
     *
154
     * @return string[]
155
     */
156 8
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
157
    {
158 8
        if (0 == $bom_length) {
159 2
            return $record;
160
        }
161
162 6
        $record[0] = mb_substr($record[0], $bom_length);
163 6
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
164 2
            $record[0] = mb_substr($record[0], 1, -1);
165
        }
166
167 6
        return $record;
168
    }
169
170
    /**
171
     * Returns the record padding value
172
     *
173
     * @return mixed
174
     */
175 2
    public function getRecordPaddingValue()
176
    {
177 2
        return $this->record_padding_value;
178
    }
179
180
    /**
181
     * Returns a CSV records collection
182
     *
183
     * @param Statement $stmt
184
     *
185
     * @return ResultSet
186
     */
187 2
    public function select(Statement $stmt): ResultSet
188
    {
189 2
        return $stmt->process($this);
190
    }
191
192
    /**
193
     * @inheritdoc
194
     */
195 6
    public function __call($method, array $arguments)
196
    {
197 6
        $whitelisted = ['fetchColumn' => 1, 'fetchPairs' => 1, 'fetchOne' => 1, 'fetchAll' => 1];
198 6
        if (isset($whitelisted[$method])) {
199 2
            return (new ResultSet($this->getRecords(), $this->getHeader()))
200 2
                ->$method(...$arguments)
201
            ;
202
        }
203
204 4
        throw new BadMethodCallException(sprintf('Reader::%s does not exists', $method));
205
    }
206
207
    /**
208
     * Detect Delimiters occurences in the CSV
209
     *
210
     * Returns a associative array where each key represents
211
     * a valid delimiter and each value the number of occurences
212
     *
213
     * @param string[] $delimiters the delimiters to consider
214
     * @param int      $nb_records Detection is made using $nb_records of the CSV
215
     *
216
     * @return array
217
     */
218 6
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
219
    {
220
        $filter = function ($value): bool {
221 4
            return 1 == strlen($value);
222 3
        };
223
224 6
        $nb_records = $this->filterMinRange($nb_records, 1, 'The number of records to consider must be a valid positive integer');
225 4
        $delimiters = array_unique(array_filter($delimiters, $filter));
226
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
227 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
228
229 4
            return $res;
230 4
        };
231
232 4
        $res = array_reduce($delimiters, $reducer, []);
233 4
        arsort($res, SORT_NUMERIC);
234
235 4
        return $res;
236
    }
237
238
    /**
239
     * Returns the cell count for a specified delimiter
240
     * and a specified number of records
241
     *
242
     * @param string $delimiter  CSV delimiter
243
     * @param int    $nb_records CSV records to consider
244
     *
245
     * @return int
246
     */
247 2
    protected function getCellCount(string $delimiter, int $nb_records): int
248
    {
249
        $filter = function ($record): bool {
250 2
            return is_array($record) && count($record) > 1;
251 1
        };
252
253 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
254 2
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
255 2
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
256
257 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
258
    }
259
260
    /**
261
     * @inheritdoc
262
     */
263 18
    public function getIterator(): Iterator
264
    {
265 18
        return $this->getRecords();
266
    }
267
268
    /**
269
     * Returns the CSV records in an iterator object.
270
     *
271
     * Each CSV record is represented as a simple array of string or null values.
272
     *
273
     * If the CSV document has a header record then each record is combined
274
     * to each header record and the header record is removed from the iterator.
275
     *
276
     * If the CSV document is inconsistent. Missing record fields are
277
     * filled with null values while extra record fields are strip from
278
     * the returned object.
279
     *
280
     * @throws RuntimeException If the header contains non unique column name
281
     *
282
     * @return Iterator
283
     */
284 4
    public function getRecords(): Iterator
285
    {
286 4
        if (!$this->supportsHeaderAsRecordKeys()) {
287 2
            throw new RuntimeException('The header record must be empty or a flat array with unique string values');
288
        }
289
290
        $normalized = function ($record): bool {
291 2
            return is_array($record) && $record != [null];
292 1
        };
293 2
        $bom = $this->getInputBOM();
294 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
295 2
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
296
297 2
        return $this->combineHeader($this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom));
298
    }
299
300
    /**
301
     * Add the CSV header if present and valid
302
     *
303
     * @param Iterator $iterator
304
     *
305
     * @return Iterator
306
     */
307 14
    protected function combineHeader(Iterator $iterator): Iterator
308
    {
309 14
        if (null === $this->header_offset) {
310 8
            return $iterator;
311
        }
312
313
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
314 8
            return $offset != $this->header_offset;
315 8
        });
316
317 8
        $header = $this->getHeader();
318 8
        $field_count = count($header);
319
        $mapper = function (array $record) use ($header, $field_count): array {
320 8
            if (count($record) != $field_count) {
321 4
                $record = array_slice(array_pad($record, $field_count, $this->record_padding_value), 0, $field_count);
322
            }
323
324 8
            return array_combine($header, $record);
325 8
        };
326
327 8
        return new MapIterator($iterator, $mapper);
328
    }
329
330
    /**
331
     * Strip the BOM sequence if present
332
     *
333
     * @param Iterator $iterator
334
     * @param string   $bom
335
     *
336
     * @return Iterator
337
     */
338 10
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
339
    {
340 10
        if ('' === $bom) {
341 4
            return $iterator;
342
        }
343
344 6
        $bom_length = mb_strlen($bom);
345 6
        $mapper = function (array $record, int $index) use ($bom_length): array {
346 6
            if (0 != $index) {
347 2
                return $record;
348
            }
349
350 6
            return $this->removeBOM($record, $bom_length, $this->enclosure);
351 6
        };
352
353 6
        return new MapIterator($iterator, $mapper);
354
    }
355
356
357
    /**
358
     * Selects the record to be used as the CSV header
359
     *
360
     * Because of the header is represented as an array, to be valid
361
     * a header MUST contain only unique string value.
362
     *
363
     * @param int|null $offset the header record offset
364
     *
365
     * @return static
366
     */
367 2
    public function setHeaderOffset($offset): self
368
    {
369 2
        if (null !== $offset) {
370 2
            $offset = $this->filterMinRange($offset, 0, 'The header offset index must be a positive integer or 0');
371
        }
372
373 2
        if ($offset !== $this->header_offset) {
374 2
            $this->header_offset = $offset;
375 2
            $this->resetProperties();
376
        }
377
378 2
        return $this;
379
    }
380
381
    /**
382
     * Set the record padding value
383
     *
384
     * @param mixed $record_padding_value
385
     *
386
     * @return static
387
     */
388 2
    public function setRecordPaddingValue($record_padding_value): self
389
    {
390 2
        $this->record_padding_value = $record_padding_value;
391
392 2
        return $this;
393
    }
394
395
    /**
396
     * @inheritdoc
397
     */
398 2
    protected function resetProperties()
399
    {
400 2
        return $this->is_header_loaded = false;
401
    }
402
}
403