Completed
Pull Request — master (#266)
by
unknown
14:00
created

Reader::removeBOM()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
cc 3
eloc 8
nc 3
nop 3
dl 0
loc 15
ccs 6
cts 6
cp 1
crap 3
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.1.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use BadMethodCallException;
18
use CallbackFilterIterator;
19
use Countable;
20
use Iterator;
21
use IteratorAggregate;
22
use JsonSerializable;
23
use SplFileObject;
24
use TypeError;
25
26
/**
27
 * A class to select records from a CSV document
28
 *
29
 * @package League.csv
30
 * @since  3.0.0
31
 *
32
 * @method array fetchOne(int $nth_record = 0) Returns a single record from the CSV
33
 * @method Generator fetchColumn(string|int $column_index) Returns the next value from a single CSV record field
34
 * @method Generator fetchPairs(string|int $offset_index = 0, string|int $value_index = 1) Fetches the next key-value pairs from the CSV document
35
 */
36
class Reader extends AbstractCsv implements Countable, IteratorAggregate, JsonSerializable
37
{
38
    /**
39
     * header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * records count
54
     *
55
     * @var int
56
     */
57
    protected $nb_records = -1;
58
59
    /**
60
     * @inheritdoc
61
     */
62
    protected $stream_filter_mode = STREAM_FILTER_READ;
63
64
    /**
65
     * @inheritdoc
66
     */
67
    public static function createFromPath(string $path, string $open_mode = 'r', $context = null): AbstractCsv
68
    {
69
        return new static(Stream::createFromPath($path, $open_mode, $context));
70
    }
71 10
72
    /**
73 10
     * Returns the header offset
74
     *
75
     * If no CSV header offset is set this method MUST return null
76
     *
77
     * @return int|null
78
     */
79
    public function getHeaderOffset()
80
    {
81
        return $this->header_offset;
82
    }
83 10
84
    /**
85 10
     * Returns the CSV record used as header
86 8
     *
87
     * The returned header is represented as an array of string values
88
     *
89 4
     * @return string[]
90 2
     */
91
    public function getHeader(): array
92
    {
93 4
        if (null === $this->header_offset) {
94
            return $this->header;
95 4
        }
96
97
        if (!empty($this->header)) {
98
            return $this->header;
99
        }
100
101
        $this->header = $this->setHeader($this->header_offset);
102
103
        return $this->header;
104
    }
105
106
    /**
107 6
     * Determine the CSV record header
108
     *
109 6
     * @param int $offset
110 6
     *
111 6
     * @throws Exception If the header offset is set and no record is found or is the empty array
112 6
     *
113 2
     * @return string[]
114
     */
115
    protected function setHeader(int $offset): array
116 4
    {
117 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
118
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
119
        $this->document->seek($offset);
120 2
        if (empty($header = $this->document->current())) {
121
            throw new Exception(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
122
        }
123
124
        if (0 === $offset) {
125
            return $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
126
        }
127
128
        return $header;
129
    }
130
131
    /**
132 8
     * Strip the BOM sequence from a record
133
     *
134 8
     * @param string[] $record
135 2
     * @param int      $bom_length
136
     * @param string   $enclosure
137
     *
138 6
     * @return string[]
139 6
     */
140 4
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
141
    {
142
        if (0 == $bom_length) {
143 2
            return $record;
144
        }
145 2
146
        $record[0] = mb_substr($record[0], $bom_length);
147
        if ($enclosure.$enclosure != substr($record[0].$record[0], strlen($record[0]) - 1, 2)) {
148
            return $record;
149
        }
150
151 6
        $record[0] = substr($record[0], 1, -1);
152
153 6
        return $record;
154 6
    }
155 2
156
    /**
157
     * @inheritdoc
158 4
     */
159
    public function __call($method, array $arguments)
160
    {
161
        static $whitelisted = ['fetchColumn' => 1, 'fetchOne' => 1, 'fetchPairs' => 1];
162
        if (isset($whitelisted[$method])) {
163
            return (new ResultSet($this->getRecords(), $this->getHeader()))->$method(...$arguments);
164 2
        }
165
166 2
        throw new BadMethodCallException(sprintf('%s::%s() method does not exist', __CLASS__, $method));
167 2
    }
168
169
    /**
170 2
     * @inheritdoc
171
     */
172
    public function count(): int
173
    {
174
        if (-1 === $this->nb_records) {
175
            $this->nb_records = iterator_count($this->getRecords());
176 2
        }
177
178 2
        return $this->nb_records;
179
    }
180
181
    /**
182
     * @inheritdoc
183
     */
184 2
    public function getIterator(): Iterator
185
    {
186 2
        return $this->getRecords();
187
    }
188
189
    /**
190
     * @inheritdoc
191
     */
192
    public function jsonSerialize(): array
193
    {
194
        return iterator_to_array($this->getRecords(), false);
195
    }
196
197
    /**
198
     * Returns the CSV records as an iterator object.
199
     *
200
     * Each CSV record is represented as a simple array containig strings or null values.
201
     *
202
     * If the CSV document has a header record then each record is combined
203
     * to the header record and the header record is removed from the iterator.
204
     *
205 12
     * If the CSV document is inconsistent. Missing record fields are
206
     * filled with null values while extra record fields are strip from
207 12
     * the returned object.
208 10
     *
209 10
     * @param string[] $header an optional header to use instead of the CSV document header
210 10
     *
211 10
     * @return Iterator
212 10
     */
213 10
    public function getRecords(array $header = []): Iterator
214
    {
215 10
        $header = $this->computeHeader($header);
216 10
        $normalized = function ($record): bool {
217 4
            return is_array($record) && $record != [null];
218 4
        };
219 4
        $bom = $this->getInputBOM();
220
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
221
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
222 10
223
        $records = $this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom);
224
        if (null !== $this->header_offset) {
225
            $records = new CallbackFilterIterator($records, function (array $record, int $offset): bool {
226
                return $offset !== $this->header_offset;
227
            });
228
        }
229
230
        return $this->combineHeader($records, $header);
231
    }
232
233
    /**
234 16
     * Returns the header to be used for iteration
235
     *
236 16
     * @param string[] $header
237 14
     *
238
     * @throws Exception If the header contains non unique column name
239
     *
240 16
     * @return string[]
241 14
     */
242
    protected function computeHeader(array $header)
243
    {
244 2
        if (empty($header)) {
245
            $header = $this->getHeader();
246
        }
247
248
        if ($header === array_unique(array_filter($header, 'is_string'))) {
249
            return $header;
250
        }
251
252
        throw new Exception('The header record must be empty or a flat array with unique string values');
253
    }
254
255 20
    /**
256
     * Combine the CSV header to each record if present
257 20
     *
258 14
     * @param Iterator $iterator
259
     * @param string[] $header
260
     *
261 8
     * @return Iterator
262 8
     */
263 8
    protected function combineHeader(Iterator $iterator, array $header): Iterator
264 4
    {
265
        if (empty($header)) {
266
            return $iterator;
267 8
        }
268 8
269
        $field_count = count($header);
270 8
        $mapper = function (array $record) use ($header, $field_count): array {
271
            if (count($record) != $field_count) {
272
                $record = array_slice(array_pad($record, $field_count, null), 0, $field_count);
273
            }
274
275
            return array_combine($header, $record);
276
        };
277
278
        return new MapIterator($iterator, $mapper);
279
    }
280
281 16
    /**
282
     * Strip the BOM sequence from the returned records if necessary
283 16
     *
284 10
     * @param Iterator $iterator
285
     * @param string   $bom
286
     *
287 6
     * @return Iterator
288 6
     */
289 6
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
290 2
    {
291
        if ('' === $bom) {
292
            return $iterator;
293 6
        }
294 6
295
        $bom_length = mb_strlen($bom);
296 6
        $mapper = function (array $record, int $index) use ($bom_length): array {
297
            if (0 != $index) {
298
                return $record;
299
            }
300
301
            return $this->removeBOM($record, $bom_length, $this->enclosure);
302
        };
303
304
        return new MapIterator($iterator, $mapper);
305
    }
306
307
    /**
308
     * Selects the record to be used as the CSV header
309
     *
310
     * Because the header is represented as an array, to be valid
311 14
     * a header MUST contain only unique string value.
312
     *
313 14
     * @param int|null $offset the header record offset
314 8
     *
315
     * @throws Exception if the offset is a negative integer
316
     *
317 6
     * @return static
318 2
     */
319
    public function setHeaderOffset($offset): self
320
    {
321 4
        if ($offset === $this->header_offset) {
322 2
            return $this;
323
        }
324
325 2
        if (!is_nullable_int($offset)) {
326 2
            throw new TypeError(sprintf(__METHOD__.'() expects 1 Argument to be null or an integer %s given', gettype($offset)));
327
        }
328 2
329
        if (null !== $offset && 0 > $offset) {
330
            throw new Exception(__METHOD__.'() expects 1 Argument to be greater or equal to 0');
331
        }
332
333
        $this->header_offset = $offset;
334 8
        $this->resetProperties();
335
336 8
        return $this;
337 8
    }
338 8
339
    /**
340
     * @inheritdoc
341
     */
342
    protected function resetProperties()
343
    {
344
        $this->nb_records = -1;
345
        $this->header = [];
346
    }
347
}
348