Completed
Push — master ( 147747...e61ddd )
by ignace nyamagana
23:03 queued 02:55
created

Reader::getRecords()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 * A class to manage records selection from a CSV document
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * Returns the record offset used as header
61
     *
62
     * If no CSV record is used this method MUST return null
63
     *
64
     * @return int|null
65
     */
66 2
    public function getHeaderOffset()
67
    {
68 2
        return $this->header_offset;
69
    }
70
71
    /**
72
     * Returns a CSV records collection
73
     *
74
     * @param Statement $stmt
75
     *
76
     * @return ResultSet
77
     */
78 2
    public function select(Statement $stmt): ResultSet
79
    {
80 2
        return $stmt->process($this);
81
    }
82
83
    /**
84
     * Detect Delimiters occurences in the CSV
85
     *
86
     * Returns a associative array where each key represents
87
     * a valid delimiter and each value the number of occurences
88
     *
89
     * @param string[] $delimiters the delimiters to consider
90
     * @param int      $nb_records Detection is made using $nb_records of the CSV
91
     *
92
     * @return array
93
     */
94 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
95
    {
96
        $filter = function ($value): bool {
97 6
            return 1 == strlen($value);
98 4
        };
99
100 8
        $nb_records = $this->filterMinRange($nb_records, 1, 'The number of records to consider must be a valid positive integer');
101 6
        $delimiters = array_unique(array_filter($delimiters, $filter));
102
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
103 6
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
104
105 6
            return $res;
106 6
        };
107
108 6
        $res = array_reduce($delimiters, $reducer, []);
109 6
        arsort($res, SORT_NUMERIC);
110
111 6
        return $res;
112
    }
113
114
    /**
115
     * Returns the cell count for a specified delimiter
116
     * and a specified number of records
117
     *
118
     * @param string $delimiter  CSV delimiter
119
     * @param int    $nb_records CSV records to consider
120
     *
121
     * @return int
122
     */
123 6
    protected function getCellCount(string $delimiter, int $nb_records): int
124
    {
125
        $filter = function ($record): bool {
126 6
            return is_array($record) && count($record) > 1;
127 3
        };
128
129 6
        $this->document->setFlags(SplFileObject::READ_CSV);
130 6
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
131 6
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
132
133 6
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
134
    }
135
136
    /**
137
     * @inheritdoc
138
     */
139 108
    public function getIterator(): Iterator
140
    {
141 108
        $bom = $this->getInputBOM();
142 108
        $header = $this->getHeader();
143 106
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
144 106
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
145
        $normalized = function ($record): bool {
146 90
            return is_array($record) && $record != [null];
147 53
        };
148
149 106
        $iterator = $this->combineHeader(new CallbackFilterIterator($this->document, $normalized), $header);
150
151 104
        return $this->stripBOM($iterator, $bom);
152
    }
153
154
    /**
155
     * Returns the CSV records in an iterator object.
156
     *
157
     * Each CSV record is represented as a simple array of string or null values.
158
     *
159
     * If the CSV document has a header record then each record is combined
160
     * to each header record and the header record is removed from the iterator.
161
     *
162
     * If the CSV document is inconsistent. Missing record fields are
163
     * filled with null values while extra record fields are strip from
164
     * the returned object.
165
     *
166
     * @see Reader::getIterator()
167
     *
168
     * @throws RuntimeException If the header contains non unique column name
169
     *
170
     * @return Iterator
171
     */
172 102
    public function getRecords(): Iterator
173
    {
174 102
        return $this->getIterator();
175
    }
176
177
    /**
178
     * Returns the CSV record header
179
     *
180
     * The returned header is represented as an array of string values
181
     *
182
     * @throws RuntimeException If the header offset is an integer
183
     *                          and the corresponding record is missing
184
     *                          or is an empty array
185
     *
186
     * @return string[]
187
     */
188 110
    public function getHeader(): array
189
    {
190 110
        if ($this->is_header_loaded) {
191 102
            return $this->header;
192
        }
193
194 110
        $this->is_header_loaded = true;
195 110
        if (null === $this->header_offset) {
196 76
            $this->header = [];
197
198 76
            return $this->header;
199
        }
200
201 36
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
202 36
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
203 36
        $this->document->seek($this->header_offset);
204 36
        $this->header = $this->document->current();
205 36
        if (empty($this->header)) {
206 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $this->header_offset));
207
        }
208
209 34
        if (0 === $this->header_offset) {
210 30
            $this->header = $this->removeBOM($this->header, mb_strlen($this->getInputBOM()), $this->enclosure);
211
        }
212
213 34
        return $this->header;
214
    }
215
216
    /**
217
     * Add the CSV header if present and valid
218
     *
219
     * @param Iterator $iterator
220
     * @param string[] $header
221
     *
222
     * @return Iterator
223
     */
224 106
    protected function combineHeader(Iterator $iterator, array $header): Iterator
225
    {
226 106
        if (null === $this->header_offset) {
227 74
            return $iterator;
228
        }
229
230 32
        $header = $this->filterColumnNames($header);
231 30
        $header_count = count($header);
232
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
233 22
            return $offset != $this->header_offset;
234 30
        });
235
236
        $mapper = function (array $record) use ($header_count, $header): array {
237 22
            if ($header_count != count($record)) {
238 2
                $record = array_slice(array_pad($record, $header_count, null), 0, $header_count);
239
            }
240
241 22
            return array_combine($header, $record);
242 30
        };
243
244 30
        return new MapIterator($iterator, $mapper);
245
    }
246
247
    /**
248
     * Validates the array to be used by the fetchAssoc method
249
     *
250
     * @param array $keys
251
     *
252
     * @throws RuntimeException If the submitted array fails the assertion
253
     *
254
     * @return array
255
     */
256 32
    protected function filterColumnNames(array $keys): array
257
    {
258 32
        if (empty($keys) || $keys === array_unique(array_filter($keys, 'is_string'))) {
259 30
            return $keys;
260
        }
261
262 2
        throw new RuntimeException('Use a flat array with unique string values');
263
    }
264
265
    /**
266
     * Strip the BOM sequence if present
267
     *
268
     * @param Iterator $iterator
269
     * @param string   $bom
270
     *
271
     * @return Iterator
272
     */
273 104
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
274
    {
275 104
        if ('' === $bom) {
276 84
            return $iterator;
277
        }
278
279 20
        $bom_length = mb_strlen($bom);
280 20
        $mapper = function (array $record, int $index) use ($bom_length): array {
281 20
            if (0 != $index) {
282 18
                return $record;
283
            }
284
285 12
            return $this->removeBOM($record, $bom_length, $this->enclosure);
286 20
        };
287
288 20
        return new MapIterator($iterator, $mapper);
289
    }
290
291
    /**
292
     * Strip the BOM sequence from a record
293
     *
294
     * @param string[] $record
295
     * @param int      $bom_length
296
     * @param string   $enclosure
297
     *
298
     * @return string[]
299
     */
300 42
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
301
    {
302 42
        if (0 == $bom_length) {
303 22
            return $record;
304
        }
305
306 20
        $record[0] = mb_substr($record[0], $bom_length);
307 20
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
308 10
            $record[0] = mb_substr($record[0], 1, -1);
309
        }
310
311 20
        return $record;
312
    }
313
314
    /**
315
     * Selects the record to be used as the CSV header
316
     *
317
     * Because of the header is represented as an array, to be valid
318
     * a header MUST contain only unique string value.
319
     *
320
     * @param int|null $offset the header record offset
321
     *
322
     * @return static
323
     */
324 40
    public function setHeaderOffset($offset): self
325
    {
326 40
        if (null !== $offset) {
327 36
            $offset = $this->filterMinRange($offset, 0, 'The header offset index must be a positive integer or 0');
328
        }
329
330 40
        if ($offset !== $this->header_offset) {
331 36
            $this->header_offset = $offset;
332 36
            $this->resetProperties();
333
        }
334
335 40
        return $this;
336
    }
337
338
    /**
339
     * @inheritdoc
340
     */
341 52
    protected function resetProperties()
342
    {
343 52
        return $this->is_header_loaded = false;
344
    }
345
}
346