Completed
Pull Request — master (#230)
by ignace nyamagana
02:02
created

Reader::removeBOM()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 7
cts 7
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 7
nc 3
nop 3
crap 4
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 * A class to manage records selection from a CSV document
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 * @method int count()
31
 * @method array fetchAll()
32
 * @method array fetchOne(int $offset = 0)
33
 * @method Generator fetchPairs(string|int $offset_index, string|int $value_index)
34
 * @method Generator fetchColumn(string|int $column_index)
35
 * @method ResultSet preserveRecordOffset(bool $status)
36
 * @method bool isRecordOffsetPreserved()
37
 * @method array getColumnNames()
38
 */
39
class Reader extends AbstractCsv implements IteratorAggregate
40
{
41
    /**
42
     * @inheritdoc
43
     */
44
    protected $stream_filter_mode = STREAM_FILTER_READ;
45
46
    /**
47
     * CSV Document header offset
48
     *
49
     * @var int|null
50
     */
51
    protected $header_offset;
52
53
    /**
54
     * CSV Document Header record
55
     *
56
     * @var string[]
57
     */
58
    protected $header = [];
59
60
    /**
61
     * Tell whether the header needs to be re-generated
62
     *
63
     * @var bool
64
     */
65
    protected $is_header_loaded = false;
66
67
    /**
68
     * The value to pad if the record is less than header size.
69
     *
70
     * @var mixed
71
     */
72
    protected $record_padding_value;
73
74
    /**
75
     * Returns the record offset used as header
76
     *
77
     * If no CSV record is used this method MUST return null
78
     *
79
     * @return int|null
80
     */
81 2
    public function getHeaderOffset()
82
    {
83 2
        return $this->header_offset;
84
    }
85
86
    /**
87
     * Returns wether the selected header can be combine to each record
88
     *
89
     * A valid header must be empty or contains unique string field names
90
     *
91
     * @return bool
92
     */
93 2
    public function supportsHeaderAsRecordKeys(): bool
94
    {
95 2
        $header = $this->getHeader();
96
97 2
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
98
    }
99
100
    /**
101
     * Returns the CSV record header
102
     *
103
     * The returned header is represented as an array of string values
104
     *
105
     * @return string[]
106
     */
107 4
    public function getHeader(): array
108
    {
109 4
        if ($this->is_header_loaded) {
110 2
            return $this->header;
111
        }
112
113 4
        $this->is_header_loaded = true;
114 4
        $this->header = [];
115 4
        if (null !== $this->header_offset) {
116 4
            $this->header = $this->setHeader($this->header_offset);
117
        }
118
119 4
        return $this->header;
120
    }
121
122
    /**
123
     * Determine the CSV record header
124
     *
125
     * @param int $offset
126
     *
127
     * @throws RuntimeException If the header offset is an integer
128
     *                          and the corresponding record is missing
129
     *                          or is an empty array
130
     *
131
     * @return string[]
132
     */
133 6
    protected function setHeader(int $offset): array
134
    {
135 6
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
136 6
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
137 6
        $this->document->seek($offset);
138 6
        $header = $this->document->current();
139 6
        if (empty($header)) {
140 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
141
        }
142
143 4
        if (0 === $offset) {
144 2
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
145
        }
146
147 2
        return $header;
148
    }
149
150
    /**
151
     * Strip the BOM sequence from a record
152
     *
153
     * @param string[] $record
154
     * @param int      $bom_length
155
     * @param string   $enclosure
156
     *
157
     * @return string[]
158
     */
159 8
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
160
    {
161 8
        if (0 == $bom_length) {
162 2
            return $record;
163
        }
164
165 6
        $record[0] = mb_substr($record[0], $bom_length);
166 6
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
167 2
            $record[0] = mb_substr($record[0], 1, -1);
168
        }
169
170 6
        return $record;
171
    }
172
173
    /**
174
     * Returns the record padding value
175
     *
176
     * @return mixed
177
     */
178 2
    public function getRecordPaddingValue()
179
    {
180 2
        return $this->record_padding_value;
181
    }
182
183
    /**
184
     * Returns a CSV records collection
185
     *
186
     * @param Statement $stmt
187
     *
188
     * @return ResultSet
189
     */
190 2
    public function select(Statement $stmt): ResultSet
191
    {
192 2
        return $stmt->process($this);
193
    }
194
195
    /**
196
     * @inheritdoc
197
     */
198 2
    public function __call($method, array $arguments)
199
    {
200 2
        return (new Statement())->process($this)->$method(...$arguments);
201
    }
202
203
    /**
204
     * Detect Delimiters occurences in the CSV
205
     *
206
     * Returns a associative array where each key represents
207
     * a valid delimiter and each value the number of occurences
208
     *
209
     * @param string[] $delimiters the delimiters to consider
210
     * @param int      $nb_records Detection is made using $nb_records of the CSV
211
     *
212
     * @return array
213
     */
214 6
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
215
    {
216
        $filter = function ($value): bool {
217 4
            return 1 == strlen($value);
218 3
        };
219
220 6
        $nb_records = $this->filterMinRange($nb_records, 1, 'The number of records to consider must be a valid positive integer');
221 4
        $delimiters = array_unique(array_filter($delimiters, $filter));
222
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
223 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
224
225 4
            return $res;
226 4
        };
227
228 4
        $res = array_reduce($delimiters, $reducer, []);
229 4
        arsort($res, SORT_NUMERIC);
230
231 4
        return $res;
232
    }
233
234
    /**
235
     * Returns the cell count for a specified delimiter
236
     * and a specified number of records
237
     *
238
     * @param string $delimiter  CSV delimiter
239
     * @param int    $nb_records CSV records to consider
240
     *
241
     * @return int
242
     */
243 2
    protected function getCellCount(string $delimiter, int $nb_records): int
244
    {
245
        $filter = function ($record): bool {
246 2
            return is_array($record) && count($record) > 1;
247 1
        };
248
249 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
250 2
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
251 2
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
252
253 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
254
    }
255
256
    /**
257
     * @inheritdoc
258
     */
259 18
    public function getIterator(): Iterator
260
    {
261 18
        return $this->getRecords();
262
    }
263
264
    /**
265
     * Returns the CSV records in an iterator object.
266
     *
267
     * Each CSV record is represented as a simple array of string or null values.
268
     *
269
     * If the CSV document has a header record then each record is combined
270
     * to each header record and the header record is removed from the iterator.
271
     *
272
     * If the CSV document is inconsistent. Missing record fields are
273
     * filled with null values while extra record fields are strip from
274
     * the returned object.
275
     *
276
     * @throws RuntimeException If the header contains non unique column name
277
     *
278
     * @return Iterator
279
     */
280 4
    public function getRecords(): Iterator
281
    {
282 4
        if (!$this->supportsHeaderAsRecordKeys()) {
283 2
            throw new RuntimeException('The header record must be empty or a flat array with unique string values');
284
        }
285
286
        $normalized = function ($record): bool {
287 2
            return is_array($record) && $record != [null];
288 1
        };
289 2
        $bom = $this->getInputBOM();
290 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
291 2
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
292
293 2
        return $this->combineHeader($this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom));
294
    }
295
296
    /**
297
     * Add the CSV header if present and valid
298
     *
299
     * @param Iterator $iterator
300
     *
301
     * @return Iterator
302
     */
303 14
    protected function combineHeader(Iterator $iterator): Iterator
304
    {
305 14
        if (null === $this->header_offset) {
306 8
            return $iterator;
307
        }
308
309
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
310 8
            return $offset != $this->header_offset;
311 8
        });
312
313 8
        $header = $this->getHeader();
314 8
        $field_count = count($header);
315
        $mapper = function (array $record) use ($header, $field_count): array {
316 8
            if (count($record) != $field_count) {
317 4
                $record = array_slice(array_pad($record, $field_count, $this->record_padding_value), 0, $field_count);
318
            }
319
320 8
            return array_combine($header, $record);
321 8
        };
322
323 8
        return new MapIterator($iterator, $mapper);
324
    }
325
326
    /**
327
     * Strip the BOM sequence if present
328
     *
329
     * @param Iterator $iterator
330
     * @param string   $bom
331
     *
332
     * @return Iterator
333
     */
334 10
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
335
    {
336 10
        if ('' === $bom) {
337 4
            return $iterator;
338
        }
339
340 6
        $bom_length = mb_strlen($bom);
341 6
        $mapper = function (array $record, int $index) use ($bom_length): array {
342 6
            if (0 != $index) {
343 2
                return $record;
344
            }
345
346 6
            return $this->removeBOM($record, $bom_length, $this->enclosure);
347 6
        };
348
349 6
        return new MapIterator($iterator, $mapper);
350
    }
351
352
353
    /**
354
     * Selects the record to be used as the CSV header
355
     *
356
     * Because of the header is represented as an array, to be valid
357
     * a header MUST contain only unique string value.
358
     *
359
     * @param int|null $offset the header record offset
360
     *
361
     * @return static
362
     */
363 2
    public function setHeaderOffset($offset): self
364
    {
365 2
        if (null !== $offset) {
366 2
            $offset = $this->filterMinRange($offset, 0, 'The header offset index must be a positive integer or 0');
367
        }
368
369 2
        if ($offset !== $this->header_offset) {
370 2
            $this->header_offset = $offset;
371 2
            $this->resetProperties();
372
        }
373
374 2
        return $this;
375
    }
376
377
    /**
378
     * Set the record padding value
379
     *
380
     * @param mixed $record_padding_value
381
     *
382
     * @return static
383
     */
384 2
    public function setRecordPaddingValue($record_padding_value): self
385
    {
386 2
        $this->record_padding_value = $record_padding_value;
387
388 2
        return $this;
389
    }
390
391
    /**
392
     * @inheritdoc
393
     */
394 2
    protected function resetProperties()
395
    {
396 2
        return $this->is_header_loaded = false;
397
    }
398
}
399