Completed
Push — master ( b4a812...401201 )
by ignace nyamagana
03:31 queued 02:24
created

Reader::removeBOM()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
nc 3
nop 3
dl 0
loc 15
ccs 8
cts 8
cp 1
crap 3
rs 9.7666
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * League.Csv (https://csv.thephpleague.com).
5
 *
6
 * @author  Ignace Nyamagana Butera <[email protected]>
7
 * @license https://github.com/thephpleague/csv/blob/master/LICENSE (MIT License)
8
 * @version 9.2.0
9
 * @link    https://github.com/thephpleague/csv
10
 *
11
 * For the full copyright and license information, please view the LICENSE
12
 * file that was distributed with this source code.
13
 */
14
15
declare(strict_types=1);
16
17
namespace League\Csv;
18
19
use BadMethodCallException;
20
use CallbackFilterIterator;
21
use Countable;
22
use Iterator;
23
use IteratorAggregate;
24
use JsonSerializable;
25
use SplFileObject;
26
use TypeError;
27
use const STREAM_FILTER_READ;
28
use function array_combine;
29
use function array_filter;
30
use function array_pad;
31
use function array_slice;
32
use function array_unique;
33
use function gettype;
34
use function is_array;
35
use function iterator_count;
36
use function iterator_to_array;
37
use function mb_strlen;
38
use function mb_substr;
39
use function sprintf;
40
use function strlen;
41
use function substr;
42
43
/**
44
 * A class to parse and read records from a CSV document.
45
 *
46
 * @method array fetchOne(int $nth_record = 0) Returns a single record from the CSV
47
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
48
 * @method Generator fetchPairs(string|int $offset_index = 0, string|int $value_index = 1) Fetches the next key-value pairs from the CSV document
49
 */
50
class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSerializable
51
{
52
    /**
53
     * header offset.
54
     *
55
     * @var int|null
56
     */
57
    protected $header_offset;
58
59
    /**
60
     * header record.
61
     *
62
     * @var string[]
63
     */
64
    protected $header = [];
65
66
    /**
67
     * records count.
68
     *
69
     * @var int
70
     */
71
    protected $nb_records = -1;
72
73
    /**
74
     * {@inheritdoc}
75
     */
76
    protected $stream_filter_mode = STREAM_FILTER_READ;
77
78
    /**
79
     * {@inheritdoc}
80
     */
81 3
    public static function createFromPath(string $path, string $open_mode = 'r', $context = null)
82
    {
83 3
        return parent::createFromPath($path, $open_mode, $context);
84
    }
85
86
    /**
87
     * {@inheritdoc}
88
     */
89 24
    protected function resetProperties()
90
    {
91 24
        parent::resetProperties();
92 24
        $this->nb_records = -1;
93 24
        $this->header = [];
94 24
    }
95
96
    /**
97
     * Returns the header offset.
98
     *
99
     * If no CSV header offset is set this method MUST return null
100
     *
101
     * @return int|null
102
     */
103 15
    public function getHeaderOffset()
104
    {
105 15
        return $this->header_offset;
106
    }
107
108
    /**
109
     * Returns the CSV record used as header.
110
     *
111
     * The returned header is represented as an array of string values
112
     *
113
     * @return string[]
114
     */
115 18
    public function getHeader(): array
116
    {
117 18
        if (null === $this->header_offset) {
118 12
            return $this->header;
119
        }
120
121 9
        if ([] !== $this->header) {
122 3
            return $this->header;
123
        }
124
125 9
        $this->header = $this->setHeader($this->header_offset);
126
127 6
        return $this->header;
128
    }
129
130
    /**
131
     * Determine the CSV record header.
132
     *
133
     * @throws Exception If the header offset is set and no record is found or is the empty array
134
     *
135
     * @return string[]
136
     */
137 12
    protected function setHeader(int $offset): array
138
    {
139 12
        $header = $this->seekRow($offset);
140 12
        if (false === $header || [] === $header) {
141 6
            throw new Exception(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
142
        }
143
144 6
        if (0 === $offset) {
145 3
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
146
        }
147
148 3
        return $header;
149
    }
150
151
    /**
152
     * Returns the row at a given offset.
153
     *
154
     * @return array|false
155
     */
156 9
    protected function seekRow(int $offset)
157
    {
158 9
        foreach ($this->getDocument() as $index => $record) {
159 9
            if ($offset === $index) {
160 9
                return [null] === $record ? false : $record;
161
            }
162
        }
163
164 9
        return false;
165
    }
166
167
    /**
168
     * Returns the document as an Iterator.
169
     */
170 15
    protected function getDocument(): Iterator
171
    {
172 15
        if ('' === $this->escape && PHP_VERSION_ID < 70400) {
173 6
            return EmptyEscapeParser::parse($this->document, $this->delimiter, $this->enclosure);
174
        }
175
176 9
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
177 9
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
178 9
        $this->document->rewind();
179
180 9
        return $this->document;
181
    }
182
183
    /**
184
     * Strip the BOM sequence from a record.
185
     *
186
     * @param string[] $record
187
     *
188
     * @return string[]
189
     */
190 12
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
191
    {
192 12
        if (0 === $bom_length) {
193 3
            return $record;
194
        }
195
196 9
        $record[0] = mb_substr($record[0], $bom_length);
197 9
        if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) {
198 6
            return $record;
199
        }
200
201 3
        $record[0] = substr($record[0], 1, -1);
202
203 3
        return $record;
204
    }
205
206
    /**
207
     * {@inheritdoc}
208
     */
209 9
    public function __call($method, array $arguments)
210
    {
211 9
        static $whitelisted = ['fetchColumn' => 1, 'fetchOne' => 1, 'fetchPairs' => 1];
212 9
        if (isset($whitelisted[$method])) {
213 3
            return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments);
214
        }
215
216 6
        throw new BadMethodCallException(sprintf('%s::%s() method does not exist', static::class, $method));
217
    }
218
219
    /**
220
     * {@inheritdoc}
221
     */
222 3
    public function count(): int
223
    {
224 3
        if (-1 === $this->nb_records) {
225 3
            $this->nb_records = iterator_count($this->getRecords());
226
        }
227
228 3
        return $this->nb_records;
229
    }
230
231
    /**
232
     * {@inheritdoc}
233
     */
234 3
    public function getIterator(): Iterator
235
    {
236 3
        return $this->getRecords();
237
    }
238
239
    /**
240
     * {@inheritdoc}
241
     */
242 3
    public function jsonSerialize(): array
243
    {
244 3
        return iterator_to_array($this->getRecords(), false);
245
    }
246
247
    /**
248
     * Returns the CSV records as an iterator object.
249
     *
250
     * Each CSV record is represented as a simple array containig strings or null values.
251
     *
252
     * If the CSV document has a header record then each record is combined
253
     * to the header record and the header record is removed from the iterator.
254
     *
255
     * If the CSV document is inconsistent. Missing record fields are
256
     * filled with null values while extra record fields are strip from
257
     * the returned object.
258
     *
259
     * @param string[] $header an optional header to use instead of the CSV document header
260
     */
261 18
    public function getRecords(array $header = []): Iterator
262
    {
263 18
        $header = $this->computeHeader($header);
264 15
        $normalized = function ($record): bool {
265 15
            return is_array($record) && $record != [null];
266 15
        };
267 15
        $bom = $this->getInputBOM();
268 15
        $document = $this->getDocument();
269
270 15
        $records = $this->stripBOM(new CallbackFilterIterator($document, $normalized), $bom);
271 15
        if (null !== $this->header_offset) {
272 6
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
273 6
                return $offset !== $this->header_offset;
274 6
            });
275
        }
276
277 15
        return $this->combineHeader($records, $header);
278
    }
279
280
    /**
281
     * Returns the header to be used for iteration.
282
     *
283
     * @param string[] $header
284
     *
285
     * @throws Exception If the header contains non unique column name
286
     *
287
     * @return string[]
288
     */
289 24
    protected function computeHeader(array $header)
290
    {
291 24
        if ([] === $header) {
292 21
            $header = $this->getHeader();
293
        }
294
295 24
        if ($header === array_unique(array_filter($header, 'is_string'))) {
296 21
            return $header;
297
        }
298
299 3
        throw new Exception('The header record must be empty or a flat array with unique string values');
300
    }
301
302
    /**
303
     * Combine the CSV header to each record if present.
304
     *
305
     * @param string[] $header
306
     */
307 30
    protected function combineHeader(Iterator $iterator, array $header): Iterator
308
    {
309 30
        if ([] === $header) {
310 21
            return $iterator;
311
        }
312
313 12
        $field_count = count($header);
314 12
        $mapper = function (array $record) use ($header, $field_count): array {
315 12
            if (count($record) != $field_count) {
316 6
                $record = array_slice(array_pad($record, $field_count, null), 0, $field_count);
317
            }
318
319 12
            return array_combine($header, $record);
320 12
        };
321
322 12
        return new MapIterator($iterator, $mapper);
323
    }
324
325
    /**
326
     * Strip the BOM sequence from the returned records if necessary.
327
     */
328 24
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
329
    {
330 24
        if ('' === $bom) {
331 15
            return $iterator;
332
        }
333
334 9
        $bom_length = mb_strlen($bom);
335 9
        $mapper = function (array $record, int $index) use ($bom_length): array {
336 9
            if (0 != $index) {
337 3
                return $record;
338
            }
339
340 9
            return $this->removeBOM($record, $bom_length, $this->enclosure);
341 9
        };
342
343 9
        return new MapIterator($iterator, $mapper);
344
    }
345
346
    /**
347
     * Selects the record to be used as the CSV header.
348
     *
349
     * Because the header is represented as an array, to be valid
350
     * a header MUST contain only unique string value.
351
     *
352
     * @param int|null $offset the header record offset
353
     *
354
     * @throws Exception if the offset is a negative integer
355
     *
356
     * @return static
357
     */
358 24
    public function setHeaderOffset($offset): self
359
    {
360 24
        if ($offset === $this->header_offset) {
361 12
            return $this;
362
        }
363
364 12
        if (!is_nullable_int($offset)) {
365 3
            throw new TypeError(sprintf(__METHOD__.'() expects 1 Argument to be null or an integer %s given', gettype($offset)));
366
        }
367
368 9
        if (null !== $offset && 0 > $offset) {
369 3
            throw new Exception(__METHOD__.'() expects 1 Argument to be greater or equal to 0');
370
        }
371
372 6
        $this->header_offset = $offset;
373 6
        $this->resetProperties();
374
375 6
        return $this;
376
    }
377
}
378