Completed
Push — master ( 1cb256...b5f6c9 )
by ignace nyamagana
06:43 queued 03:38
created

Reader::setHeaderOffset()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 13
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 13
ccs 7
cts 7
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 7
nc 4
nop 1
crap 3
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 *  A class to manage extracting and filtering a CSV
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * Returns the record offset used as header
61
     *
62
     * If no CSV record is used this method MUST return null
63
     *
64
     * @return int|null
65
     */
66 2
    public function getHeaderOffset()
67
    {
68 2
        return $this->header_offset;
69
    }
70
71
    /**
72
     * Returns a collection of selected records
73
     *
74
     * @param Statement $stmt
75
     *
76
     * @return RecordSet
77
     */
78 2
    public function select(Statement $stmt): RecordSet
79
    {
80 2
        return $stmt->process($this);
81
    }
82
83
    /**
84
     * Detect Delimiters occurences in the CSV
85
     *
86
     * Returns a associative array where each key represents
87
     * a valid delimiter and each value the number of occurences
88
     *
89
     * @param string[] $delimiters the delimiters to consider
90
     * @param int      $nb_records Detection is made using $nb_records of the CSV
91
     *
92
     * @return array
93
     */
94 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_records = 1): array
95
    {
96
        $filter = function ($value): bool {
97 6
            return 1 == strlen($value);
98 4
        };
99
100 8
        $nb_records = $this->filterInteger($nb_records, 1, __METHOD__.': the number of rows to consider must be a valid positive integer');
101 6
        $delimiters = array_unique(array_filter($delimiters, $filter));
102
103
        $reducer = function (array $res, string $delimiter) use ($nb_records): array {
104 6
            $res[$delimiter] = $this->getCellCount($delimiter, $nb_records);
105
106 6
            return $res;
107 6
        };
108
109 6
        $res = array_reduce($delimiters, $reducer, []);
110
111 6
        arsort($res, SORT_NUMERIC);
112
113 6
        return $res;
114
    }
115
116
    /**
117
     * Returns the cell count for a specified delimiter
118
     * and a specified number of records
119
     *
120
     * @param string $delimiter  CSV delimiter
121
     * @param int    $nb_records CSV records to consider
122
     *
123
     * @return int
124
     */
125 6
    protected function getCellCount(string $delimiter, int $nb_records)
126
    {
127
        $filter = function ($row): bool {
128 6
            return is_array($row) && count($row) > 1;
129 3
        };
130
131 6
        $this->document->setFlags(SplFileObject::READ_CSV);
132 6
        $this->document->setCsvControl($delimiter, $this->enclosure, $this->escape);
133 6
        $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_records), $filter);
134
135 6
        return count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
136
    }
137
138
    /**
139
     * @inheritdoc
140
     */
141 116
    public function getIterator(): Iterator
142
    {
143 116
        $bom = $this->getInputBOM();
144 116
        $header = $this->getHeader();
145 116
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
146 116
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
147
        $normalized = function ($row): bool {
148 98
            return is_array($row) && $row != [null];
149 58
        };
150 116
        $iterator = new CallbackFilterIterator($this->document, $normalized);
151 116
        $iterator = $this->combineHeader($iterator, $header);
152
153 116
        return $this->stripBOM($iterator, $bom);
154
    }
155
156
    /**
157
     * Returns the column header associate with the RecordSet
158
     *
159
     * @throws RuntimeException If no header is found
160
     *
161
     * @return string[]
162
     */
163 122
    public function getHeader(): array
164
    {
165 122
        if ($this->is_header_loaded) {
166 114
            return $this->header;
167
        }
168
169 122
        $this->is_header_loaded = true;
170 122
        if (null === $this->header_offset) {
171 104
            $this->header = [];
172
173 104
            return $this->header;
174
        }
175
176 22
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
177 22
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
178 22
        $this->document->seek($this->header_offset);
179 22
        $this->header = $this->document->current();
180 22
        if (empty($this->header)) {
181 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $this->header_offset));
182
        }
183
184 20
        if (0 === $this->header_offset) {
185 16
            $this->header = $this->removeBOM($this->header, mb_strlen($this->getInputBOM()), $this->enclosure);
186
        }
187
188 20
        return $this->header;
189
    }
190
191
    /**
192
     * Add the CSV header if present and valid
193
     *
194
     * @param Iterator $iterator
195
     * @param string[] $header
196
     *
197
     * @return Iterator
198
     */
199 116
    protected function combineHeader(Iterator $iterator, array $header): Iterator
200
    {
201 116
        if (null === $this->header_offset) {
202 100
            return $iterator;
203
        }
204
205 18
        $header = $this->filterColumnNames($header);
206 18
        $header_count = count($header);
207
        $iterator = new CallbackFilterIterator($iterator, function (array $record, int $offset): bool {
208 14
            return $offset != $this->header_offset;
209 18
        });
210
211
        $mapper = function (array $record) use ($header_count, $header): array {
212 14
            if ($header_count != count($record)) {
213
                $record = array_slice(array_pad($record, $header_count, null), 0, $header_count);
214
            }
215
216 14
            return array_combine($header, $record);
217 18
        };
218
219 18
        return new MapIterator($iterator, $mapper);
220
    }
221
222
    /**
223
     * Strip the BOM sequence if present
224
     *
225
     * @param Iterator $iterator
226
     * @param string   $bom
227
     *
228
     * @return Iterator
229
     */
230 116
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
231
    {
232 116
        if ('' === $bom) {
233 96
            return $iterator;
234
        }
235
236 20
        $bom_length = mb_strlen($bom);
237 20
        $mapper = function (array $record, int $index) use ($bom_length): array {
238 20
            if (0 != $index) {
239 16
                return $record;
240
            }
241
242 14
            return $this->removeBOM($record, $bom_length, $this->enclosure);
243 20
        };
244
245 20
        return new MapIterator($iterator, $mapper);
246
    }
247
248
    /**
249
     * Strip the BOM sequence from a record
250
     *
251
     * @param string[] $row
252
     * @param int      $bom_length
253
     * @param string   $enclosure
254
     *
255
     * @return string[]
256
     */
257 30
    protected function removeBOM(array $row, int $bom_length, string $enclosure): array
258
    {
259 30
        if (0 == $bom_length) {
260 10
            return $row;
261
        }
262
263 20
        $row[0] = mb_substr($row[0], $bom_length);
264 20
        if ($enclosure == mb_substr($row[0], 0, 1) && $enclosure == mb_substr($row[0], -1, 1)) {
265 10
            $row[0] = mb_substr($row[0], 1, -1);
266
        }
267
268 20
        return $row;
269
    }
270
271
    /**
272
     * Selects the record to be used as the CSV header
273
     *
274
     * Because of the header is represented as an array, to be valid
275
     * a header MUST contain only unique string value.
276
     *
277
     * @param int|null $offset the header row offset
278
     *
279
     * @return static
280
     */
281 26
    public function setHeaderOffset($offset): self
282
    {
283 26
        if (null !== $offset) {
284 22
            $offset = $this->filterInteger($offset, 0, __METHOD__.': the header offset index must be a positive integer or 0');
285
        }
286
287 26
        if ($offset !== $this->header_offset) {
288 22
            $this->header_offset = $offset;
289 22
            $this->resetProperties();
290
        }
291
292 26
        return $this;
293
    }
294
295
    /**
296
     * @inheritdoc
297
     */
298 34
    protected function resetProperties()
299
    {
300 34
        return $this->is_header_loaded = false;
301
    }
302
}
303