Completed
Push — master ( 86dff6...10771b )
by ignace nyamagana
04:44 queued 03:04
created

Reader   A

Complexity

Total Complexity 31

Size/Duplication

Total Lines 323
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Test Coverage

Coverage 98.92%

Importance

Changes 0
Metric Value
dl 0
loc 323
ccs 92
cts 93
cp 0.9892
rs 9.8
c 0
b 0
f 0
wmc 31
lcom 1
cbo 4

14 Methods

Rating   Name   Duplication   Size   Complexity  
A getHeaderOffset() 0 4 1
A select() 0 4 1
A fetchDelimitersOccurrence() 0 19 1
A getCellCount() 0 12 2
A getRecords() 0 4 1
A getIterator() 0 16 3
A supportsHeaderAsRecordKeys() 0 6 2
A getHeader() 0 14 3
A setHeader() 0 16 3
A combineHeader() 0 22 3
A removeBOM() 0 13 4
A setHeaderOffset() 0 13 3
A resetProperties() 0 4 1
A stripBOM() 0 17 3
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 * A class to manage records selection from a CSV document
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * Returns the record offset used as header
61
     *
62
     * If no CSV record is used this method MUST return null
63
     *
64
     * @return int|null
65
     */
66 2
    public function getHeaderOffset()
67
    {
68 2
        return $this->header_offset;
69
    }
70
71
    /**
72
     * Returns a CSV records collection
73
     *
74
     * @param Statement $stmt
75
     *
76
     * @return ResultSet
77
     */
78 2
    public function select(Statement $stmt): ResultSet
79
    {
80 2
        return $stmt->process($this);
81
    }
82
83
    /**
84
     * Detect Delimiters occurences in the CSV
85
     *
86
     * Returns a associative array where each key represents
87
     * a valid delimiter and each value the number of occurences
88
     *
89
     * @param string[] $delimiters the delimiters to consider
90
     * @param int      $nb_records Detection is made using $nb_records of the CSV
91
     *
92
     * @return array
93
     */
94 6
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
95
    {
96
        $filter = function ($value): bool {
97 4
            return 1 == strlen($value);
98 3
        };
99
100 6
        $nb_records = $this->filterMinRange($nb_records, 1, 'The number of records to consider must be a valid positive integer');
101 4
        $delimiters = array_unique(array_filter($delimiters, $filter));
102
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
103 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
104
105 4
            return $res;
106 4
        };
107
108 4
        $res = array_reduce($delimiters, $reducer, []);
109 4
        arsort($res, SORT_NUMERIC);
110
111 4
        return $res;
112
    }
113
114
    /**
115
     * Returns the cell count for a specified delimiter
116
     * and a specified number of records
117
     *
118
     * @param string $delimiter  CSV delimiter
119
     * @param int    $nb_records CSV records to consider
120
     *
121
     * @return int
122
     */
123 2
    protected function getCellCount(string $delimiter, int $nb_records): int
124
    {
125
        $filter = function ($record): bool {
126 2
            return is_array($record) && count($record) > 1;
127 1
        };
128
129 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
130 2
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
131 2
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
132
133 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
134
    }
135
136
    /**
137
     * Returns the CSV records in an iterator object.
138
     *
139
     * Each CSV record is represented as a simple array of string or null values.
140
     *
141
     * If the CSV document has a header record then each record is combined
142
     * to each header record and the header record is removed from the iterator.
143
     *
144
     * If the CSV document is inconsistent. Missing record fields are
145
     * filled with null values while extra record fields are strip from
146
     * the returned object.
147
     *
148
     * @see Reader::getIterator()
149
     *
150
     * @throws RuntimeException If the header contains non unique column name
151
     *
152
     * @return Iterator
153
     */
154 4
    public function getRecords(): Iterator
155
    {
156 4
        return $this->getIterator();
157
    }
158
159
    /**
160
     * @inheritdoc
161
     */
162 26
    public function getIterator(): Iterator
163
    {
164 26
        $bom = $this->getInputBOM();
165 26
        if (!$this->supportsHeaderAsRecordKeys()) {
166 2
            throw new RuntimeException('The header record must be empty or a flat array with unique string values');
167
        }
168 24
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
169 24
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
170
        $normalized = function ($record): bool {
171 24
            return is_array($record) && $record != [null];
172 12
        };
173
174 24
        $iterator = $this->combineHeader(new CallbackFilterIterator($this->document, $normalized));
175
176 24
        return $this->stripBOM($iterator, $bom);
177
    }
178
179
    /**
180
     * Returns wether the selected header can be combine to each record
181
     *
182
     * A valid header must be empty or contains unique string field names
183
     *
184
     * @return bool
185
     */
186 4
    public function supportsHeaderAsRecordKeys(): bool
187
    {
188 4
        $header = $this->getHeader();
189
190 2
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
191
    }
192
193
    /**
194
     * Returns the CSV record header
195
     *
196
     * The returned header is represented as an array of string values
197
     *
198
     * @return string[]
199
     */
200 4
    public function getHeader(): array
201
    {
202 4
        if ($this->is_header_loaded) {
203 2
            return $this->header;
204
        }
205
206 4
        $this->is_header_loaded = true;
207 4
        $this->header = [];
208 4
        if (null !== $this->header_offset) {
209 4
            $this->header = $this->setHeader($this->header_offset);
210
        }
211
212 4
        return $this->header;
213
    }
214
215
    /**
216
     * Determine the CSV record header
217
     *
218
     * @param int $offset
219
     *
220
     * @throws RuntimeException If the header offset is an integer
221
     *                          and the corresponding record is missing
222
     *                          or is an empty array
223
     *
224
     * @return string[]
225
     */
226 6
    protected function setHeader(int $offset): array
227
    {
228 6
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
229 6
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
230 6
        $this->document->seek($offset);
231 6
        $header = $this->document->current();
232 6
        if (empty($header)) {
233 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
234
        }
235
236 4
        if (0 === $offset) {
237 2
            $header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
238
        }
239
240 4
        return $header;
241
    }
242
243
    /**
244
     * Add the CSV header if present and valid
245
     *
246
     * @param Iterator $iterator
247
     *
248
     * @return Iterator
249
     */
250 4
    protected function combineHeader(Iterator $iterator): Iterator
251
    {
252 4
        if (null === $this->header_offset) {
253 2
            return $iterator;
254
        }
255
256
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
257 4
            return $offset != $this->header_offset;
258 4
        });
259
260 4
        $header = $this->getHeader();
261 4
        $header_field_count = count($header);
262
        $mapper = function (array $record) use ($header_field_count, $header): array {
263 4
            if ($header_field_count != count($record)) {
264 2
                $record = array_slice(array_pad($record, $header_field_count, null), 0, $header_field_count);
265
            }
266
267 4
            return array_combine($header, $record);
268 4
        };
269
270 4
        return new MapIterator($iterator, $mapper);
271
    }
272
273
    /**
274
     * Strip the BOM sequence if present
275
     *
276
     * @param Iterator $iterator
277
     * @param string   $bom
278
     *
279
     * @return Iterator
280
     */
281 4
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
282
    {
283 4
        if ('' === $bom) {
284 2
            return $iterator;
285
        }
286
287 2
        $bom_length = mb_strlen($bom);
288 2
        $mapper = function (array $record, int $index) use ($bom_length): array {
289 2
            if (0 != $index) {
290 2
                return $record;
291
            }
292
293
            return $this->removeBOM($record, $bom_length, $this->enclosure);
294 2
        };
295
296 2
        return new MapIterator($iterator, $mapper);
297
    }
298
299
    /**
300
     * Strip the BOM sequence from a record
301
     *
302
     * @param string[] $record
303
     * @param int      $bom_length
304
     * @param string   $enclosure
305
     *
306
     * @return string[]
307
     */
308 4
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
309
    {
310 4
        if (0 == $bom_length) {
311 2
            return $record;
312
        }
313
314 2
        $record[0] = mb_substr($record[0], $bom_length);
315 2
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
316 2
            $record[0] = mb_substr($record[0], 1, -1);
317
        }
318
319 2
        return $record;
320
    }
321
322
    /**
323
     * Selects the record to be used as the CSV header
324
     *
325
     * Because of the header is represented as an array, to be valid
326
     * a header MUST contain only unique string value.
327
     *
328
     * @param int|null $offset the header record offset
329
     *
330
     * @return static
331
     */
332 2
    public function setHeaderOffset($offset): self
333
    {
334 2
        if (null !== $offset) {
335 2
            $offset = $this->filterMinRange($offset, 0, 'The header offset index must be a positive integer or 0');
336
        }
337
338 2
        if ($offset !== $this->header_offset) {
339 2
            $this->header_offset = $offset;
340 2
            $this->resetProperties();
341
        }
342
343 2
        return $this;
344
    }
345
346
    /**
347
     * @inheritdoc
348
     */
349 2
    protected function resetProperties()
350
    {
351 2
        return $this->is_header_loaded = false;
352
    }
353
}
354