Completed
Push — master ( babf40...27ada1 )
by ignace nyamagana
02:52
created

Reader::getHeaderOffset()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\RuntimeException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 *  A class to manage extracting and filtering a CSV
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * Returns the record offset used as header
61
     *
62
     * If no CSV record is used this method MUST return null
63
     *
64
     * @return int|null
65
     */
66 2
    public function getHeaderOffset()
67
    {
68 2
        return $this->header_offset;
69
    }
70
71
    /**
72
     * Detect Delimiters occurences in the CSV
73
     *
74
     * Returns a associative array where each key represents
75
     * a valid delimiter and each value the number of occurences
76
     *
77
     * @param string[] $delimiters the delimiters to consider
78
     * @param int      $nb_rows    Detection is made using $nb_rows of the CSV
79
     *
80
     * @return array
81
     */
82 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_rows = 1): array
83
    {
84 8
        $nb_rows = $this->filterInteger($nb_rows, 1, __METHOD__.': the number of rows to consider must be a valid positive integer');
85
        $filter_row = function ($row): bool {
86 6
            return is_array($row) && count($row) > 1;
87 3
        };
88
        $delimiters = array_unique(array_filter($delimiters, function ($value): bool {
89 6
            return 1 == strlen($value);
90 6
        }));
91 6
        $this->document->setFlags(SplFileObject::READ_CSV);
92 6
        $res = [];
93 6
        foreach ($delimiters as $delim) {
94 6
            $this->document->setCsvControl($delim, $this->enclosure, $this->escape);
95 6
            $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_rows), $filter_row);
96 6
            $res[$delim] = count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
97
        }
98 6
        arsort($res, SORT_NUMERIC);
99
100 6
        return $res;
101
    }
102
103
    /**
104
     * Returns a collection of selected records
105
     *
106
     * @param Statement|null $stmt
107
     *
108
     * @return RecordSet
109
     */
110 80
    public function select(Statement $stmt = null): RecordSet
111
    {
112 80
        $stmt = $stmt ?? new Statement();
113
114 80
        return $stmt->process($this);
115
    }
116
117
    /**
118
     * @inheritdoc
119
     */
120 114
    public function getIterator(): Iterator
121
    {
122 114
        $bom = $this->getInputBOM();
123 114
        $header = $this->getHeader();
124 114
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
125 114
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
126
        $normalized = function ($row): bool {
127 98
            return is_array($row) && $row != [null];
128 57
        };
129 114
        $iterator = new CallbackFilterIterator($this->document, $normalized);
130 114
        $iterator = $this->combineHeader($iterator, $header);
131
132 114
        return $this->stripBOM($iterator, $bom);
133
    }
134
135
    /**
136
     * Add the CSV header if present and valid
137
     *
138
     * @param Iterator $iterator
139
     * @param string[] $header
140
     *
141
     * @return Iterator
142
     */
143 114
    protected function combineHeader(Iterator $iterator, array $header): Iterator
144
    {
145 114
        if (null === $this->header_offset) {
146 98
            return $iterator;
147
        }
148
149 18
        $header = $this->filterColumnNames($header);
150 18
        $header_count = count($header);
151
        $iterator = new CallbackFilterIterator($iterator, function (array $row, int $offset): bool {
152 14
            return $offset != $this->header_offset;
153 18
        });
154
155
        $mapper = function (array $row) use ($header_count, $header): array {
156 14
            if ($header_count != count($row)) {
157
                $row = array_slice(array_pad($row, $header_count, null), 0, $header_count);
158
            }
159
160 14
            return array_combine($header, $row);
161 18
        };
162
163 18
        return new MapIterator($iterator, $mapper);
164
    }
165
166
    /**
167
     * Strip the BOM sequence if present
168
     *
169
     * @param Iterator $iterator
170
     * @param string   $bom
171
     *
172
     * @return Iterator
173
     */
174 114
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
175
    {
176 114
        if ('' === $bom) {
177 94
            return $iterator;
178
        }
179
180 20
        $bom_length = mb_strlen($bom);
181 20
        $mapper = function (array $row, int $index) use ($bom_length): array {
182 20
            if (0 != $index) {
183 16
                return $row;
184
            }
185
186 14
            return $this->removeBOM($row, $bom_length, $this->enclosure);
187 20
        };
188
189 20
        return new MapIterator($iterator, $mapper);
190
    }
191
192
    /**
193
     * Strip the BOM sequence from a record
194
     *
195
     * @param string[] $row
196
     * @param int      $bom_length
197
     * @param string   $enclosure
198
     *
199
     * @return string[]
200
     */
201 30
    protected function removeBOM(array $row, int $bom_length, string $enclosure): array
202
    {
203 30
        if (0 == $bom_length) {
204 10
            return $row;
205
        }
206
207 20
        $row[0] = mb_substr($row[0], $bom_length);
208 20
        if ($enclosure == mb_substr($row[0], 0, 1) && $enclosure == mb_substr($row[0], -1, 1)) {
209 10
            $row[0] = mb_substr($row[0], 1, -1);
210
        }
211
212 20
        return $row;
213
    }
214
215
    /**
216
     * Returns the column header associate with the RecordSet
217
     *
218
     * @throws RuntimeException If no header is found
219
     *
220
     * @return string[]
221
     */
222 120
    public function getHeader(): array
223
    {
224 120
        if ($this->is_header_loaded) {
225 112
            return $this->header;
226
        }
227
228 120
        $this->is_header_loaded = true;
229 120
        if (null === $this->header_offset) {
230 102
            $this->header = [];
231
232 102
            return $this->header;
233
        }
234
235 22
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
236 22
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
237 22
        $this->document->seek($this->header_offset);
238 22
        $this->header = $this->document->current();
239 22
        if (empty($this->header)) {
240 2
            throw new RuntimeException(sprintf('The header record does not exist or is empty at offset: `%s`', $this->header_offset));
241
        }
242
243 20
        if (0 === $this->header_offset) {
244 16
            $this->header = $this->removeBOM($this->header, mb_strlen($this->getInputBOM()), $this->enclosure);
245
        }
246
247 20
        return $this->header;
248
    }
249
250
    /**
251
     * Selects the record to be used as the CSV header
252
     *
253
     * Because of the header is represented as an array, to be valid
254
     * a header MUST contain only unique string value.
255
     *
256
     * @param int|null $offset the header row offset
257
     *
258
     * @return static
259
     */
260 26
    public function setHeaderOffset($offset): self
261
    {
262 26
        if (null !== $offset) {
263 22
            $offset = $this->filterInteger($offset, 0, __METHOD__.': the header offset index must be a positive integer or 0');
264
        }
265
266 26
        if ($offset !== $this->header_offset) {
267 22
            $this->header_offset = $offset;
268 22
            $this->resetProperties();
269
        }
270
271 26
        return $this;
272
    }
273
274
    /**
275
     * @inheritdoc
276
     */
277 34
    protected function resetProperties()
278
    {
279 34
        return $this->is_header_loaded = false;
280
    }
281
}
282