Completed
Push — master ( 0a06af...8913e7 )
by ignace nyamagana
15:05
created

Reader::resetProperties()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
ccs 0
cts 0
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 * A class to manage records selection from a CSV document
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * The value to pad if the record is less than header size.
61
     *
62
     * @var mixed
63
     */
64
    protected $record_padding_value;
65
66 2
    /**
67
     * Returns the record offset used as header
68 2
     *
69
     * If no CSV record is used this method MUST return null
70
     *
71
     * @return int|null
72
     */
73
    public function getHeaderOffset()
74
    {
75
        return $this->header_offset;
76
    }
77
78 2
    /**
79
     * Returns wether the selected header can be combine to each record
80 2
     *
81
     * A valid header must be empty or contains unique string field names
82
     *
83
     * @return bool
84
     */
85
    public function supportsHeaderAsRecordKeys(): bool
86
    {
87
        $header = $this->getHeader();
88
89
        return empty($header) || $header === array_unique(array_filter($header, 'is_string'));
90
    }
91
92
    /**
93
     * Returns the CSV record header
94 6
     *
95
     * The returned header is represented as an array of string values
96
     *
97 4
     * @return string[]
98 3
     */
99
    public function getHeader(): array
100 6
    {
101 4
        if ($this->is_header_loaded) {
102
            return $this->header;
103 4
        }
104
105 4
        $this->is_header_loaded = true;
106 4
        $this->header = [];
107
        if (null !== $this->header_offset) {
108 4
            $this->header = $this->setHeader($this->header_offset);
109 4
        }
110
111 4
        return $this->header;
112
    }
113
114
    /**
115
     * Determine the CSV record header
116
     *
117
     * @param int $offset
118
     *
119
     * @throws RuntimeException If the header offset is an integer
120
     *                          and the corresponding record is missing
121
     *                          or is an empty array
122
     *
123 2
     * @return string[]
124
     */
125
    protected function setHeader(int $offset): array
126 2
    {
127 1
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
128
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
129 2
        $this->document->seek($offset);
130 2
        $header = $this->document->current();
131 2
        if (empty($header)) {
132
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $offset));
133 2
        }
134
135
        if (0 === $offset) {
136
            $header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
137
        }
138
139 18
        return $header;
140
    }
141 18
142
    /**
143
     * Strip the BOM sequence from a record
144
     *
145
     * @param string[] $record
146
     * @param int      $bom_length
147
     * @param string   $enclosure
148
     *
149
     * @return string[]
150
     */
151
    protected function removeBOM(array $record, int $bom_length, string $enclosure): array
152
    {
153
        if (0 == $bom_length) {
154
            return $record;
155
        }
156
157
        $record[0] = mb_substr($record[0], $bom_length);
158
        if ($enclosure == mb_substr($record[0], 0, 1) && $enclosure == mb_substr($record[0], -1, 1)) {
159
            $record[0] = mb_substr($record[0], 1, -1);
160 4
        }
161
162 4
        return $record;
163 2
    }
164
165
    /**
166
     * Returns the record padding value
167 2
     *
168 1
     * @return mixed
169
     */
170 2
    public function getRecordPaddingValue()
171 2
    {
172 2
        return $this->record_padding_value;
173
    }
174 2
175 2
    /**
176
     * Returns a CSV records collection
177
     *
178
     * @param Statement $stmt
179
     *
180
     * @return ResultSet
181
     */
182
    public function select(Statement $stmt): ResultSet
183
    {
184
        return $stmt->process($this);
185
    }
186 2
187
    /**
188 2
     * Detect Delimiters occurences in the CSV
189
     *
190 2
     * Returns a associative array where each key represents
191
     * a valid delimiter and each value the number of occurences
192
     *
193
     * @param string[] $delimiters the delimiters to consider
194
     * @param int      $nb_records Detection is made using $nb_records of the CSV
195
     *
196
     * @return array
197
     */
198
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
199
    {
200 4
        $filter = function ($value): bool {
201
            return 1 == strlen($value);
202 4
        };
203 2
204
        $nb_records = $this->filterMinRange($nb_records, 1, 'The number of records to consider must be a valid positive integer');
205
        $delimiters = array_unique(array_filter($delimiters, $filter));
206 4
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
207 4
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
208 4
209 4
            return $res;
210
        };
211
212 4
        $res = array_reduce($delimiters, $reducer, []);
213
        arsort($res, SORT_NUMERIC);
214
215
        return $res;
216
    }
217
218
    /**
219
     * Returns the cell count for a specified delimiter
220
     * and a specified number of records
221
     *
222
     * @param string $delimiter  CSV delimiter
223
     * @param int    $nb_records CSV records to consider
224
     *
225
     * @return int
226 6
     */
227
    protected function getCellCount(string $delimiter, int $nb_records): int
228 6
    {
229 6
        $filter = function ($record): bool {
230 6
            return is_array($record) && count($record) > 1;
231 6
        };
232 6
233 2
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
234
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
235
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
236 4
237 2
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
238
    }
239
240 4
    /**
241
     * @inheritdoc
242
     */
243
    public function getIterator(): Iterator
244
    {
245
        return $this->getRecords();
246
    }
247
248
    /**
249
     * Returns the CSV records in an iterator object.
250 4
     *
251
     * Each CSV record is represented as a simple array of string or null values.
252 4
     *
253 2
     * If the CSV document has a header record then each record is combined
254
     * to each header record and the header record is removed from the iterator.
255
     *
256
     * If the CSV document is inconsistent. Missing record fields are
257 4
     * filled with null values while extra record fields are strip from
258 4
     * the returned object.
259
     *
260 4
     * @throws RuntimeException If the header contains non unique column name
261 4
     *
262
     * @return Iterator
263 4
     */
264 4
    public function getRecords(): Iterator
265
    {
266
        if (!$this->supportsHeaderAsRecordKeys()) {
267 4
            throw new RuntimeException('The header record must be empty or a flat array with unique string values');
268 4
        }
269
270 4
        $normalized = function ($record): bool {
271
            return is_array($record) && $record != [null];
272
        };
273
        $bom = $this->getInputBOM();
274
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
275
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
276
277
        return $this->combineHeader($this->stripBOM(new CallbackFilterIterator($this->document, $normalized), $bom));
278
    }
279
280
    /**
281 10
     * Add the CSV header if present and valid
282
     *
283 10
     * @param Iterator $iterator
284 4
     *
285
     * @return Iterator
286
     */
287 6
    protected function combineHeader(Iterator $iterator): Iterator
288 6
    {
289 6
        if (null === $this->header_offset) {
290 2
            return $iterator;
291
        }
292
293 6
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
294 6
            return $offset != $this->header_offset;
295
        });
296 6
297
        $header = $this->getHeader();
298
        $field_count = count($header);
299
        $mapper = function (array $record) use ($header, $field_count): array {
300
            if (count($record) != $field_count) {
301
                $record = array_slice(array_pad($record, $field_count, $this->record_padding_value), 0, $field_count);
302
            }
303
304
            return array_combine($header, $record);
305
        };
306
307
        return new MapIterator($iterator, $mapper);
308 8
    }
309
310 8
    /**
311 2
     * Strip the BOM sequence if present
312
     *
313
     * @param Iterator $iterator
314 6
     * @param string   $bom
315 6
     *
316 2
     * @return Iterator
317
     */
318
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
319 6
    {
320
        if ('' === $bom) {
321
            return $iterator;
322
        }
323
324
        $bom_length = mb_strlen($bom);
325
        $mapper = function (array $record, int $index) use ($bom_length): array {
326
            if (0 != $index) {
327
                return $record;
328
            }
329
330
            return $this->removeBOM($record, $bom_length, $this->enclosure);
331
        };
332 2
333
        return new MapIterator($iterator, $mapper);
334 2
    }
335 2
336
337
    /**
338 2
     * Selects the record to be used as the CSV header
339 2
     *
340 2
     * Because of the header is represented as an array, to be valid
341
     * a header MUST contain only unique string value.
342
     *
343 2
     * @param int|null $offset the header record offset
344
     *
345
     * @return static
346
     */
347
    public function setHeaderOffset($offset): self
348
    {
349 2
        if (null !== $offset) {
350
            $offset = $this->filterMinRange($offset, 0, 'The header offset index must be a positive integer or 0');
351 2
        }
352
353
        if ($offset !== $this->header_offset) {
354
            $this->header_offset = $offset;
355
            $this->resetProperties();
356
        }
357
358
        return $this;
359
    }
360
361
    /**
362
     * Set the record padding value
363
     *
364
     * @param mixed $record_padding_value
365
     *
366
     * @return static
367
     */
368
    public function setRecordPaddingValue($record_padding_value): self
369
    {
370
        $this->record_padding_value = $record_padding_value;
371
372
        return $this;
373
    }
374
375
    /**
376
     * @inheritdoc
377
     */
378
    protected function resetProperties()
379
    {
380
        return $this->is_header_loaded = false;
381
    }
382
}
383