Completed
Pull Request — master (#210)
by ignace nyamagana
02:39
created

Reader::getHeader()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 31
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 31
ccs 18
cts 18
cp 1
rs 8.439
c 0
b 0
f 0
cc 5
eloc 18
nc 5
nop 0
crap 5
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use LimitIterator;
21
use SplFileObject;
22
23
/**
24
 *  A class to manage extracting and filtering a CSV
25
 *
26
 * @package League.csv
27
 * @since  3.0.0
28
 *
29
 */
30
class Reader extends AbstractCsv implements IteratorAggregate
31
{
32
    /**
33
     * @inheritdoc
34
     */
35
    protected $stream_filter_mode = STREAM_FILTER_READ;
36
37
    /**
38
     * CSV Document header offset
39
     *
40
     * @var int|null
41
     */
42
    protected $header_offset;
43
44
    /**
45
     * CSV Document Header record
46
     *
47
     * @var string[]
48
     */
49
    protected $header = [];
50
51
    /**
52
     * Tell whether the header needs to be re-generated
53
     *
54
     * @var bool
55
     */
56
    protected $is_header_loaded = false;
57
58
    /**
59
     * Returns the record offset used as header
60
     *
61
     * If no CSV record is used this method MUST return null
62
     *
63
     * @return int|null
64
     */
65 2
    public function getHeaderOffset()
66
    {
67 2
        return $this->header_offset;
68
    }
69
70
    /**
71
     * Selects the record to be used as the CSV header
72
     *
73
     * Because of the header is represented as an array, to be valid
74
     * a header MUST contain only unique string value.
75
     *
76
     * @param int|null $offset the header row offset
77
     *
78
     * @return static
79
     */
80 28
    public function setHeaderOffset($offset): self
81
    {
82 28
        $this->header_offset = null;
83 28
        if (null !== $offset) {
84 24
            $this->header_offset = $this->filterInteger(
85
                $offset,
86 24
                0,
87 24
                'the header offset index must be a positive integer or 0'
88
            );
89
        }
90 26
        $this->resetDynamicProperties();
91
92 26
        return $this;
93
    }
94
95
    /**
96
     * @inheritdoc
97
     */
98 38
    protected function resetDynamicProperties()
99
    {
100 38
        return $this->is_header_loaded = false;
101
    }
102
103
    /**
104
     * Detect Delimiters occurences in the CSV
105
     *
106
     * Returns a associative array where each key represents
107
     * a valid delimiter and each value the number of occurences
108
     *
109
     * @param string[] $delimiters the delimiters to consider
110
     * @param int      $nb_rows    Detection is made using $nb_rows of the CSV
111
     *
112
     * @return array
113
     */
114 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_rows = 1): array
115
    {
116 8
        $nb_rows = $this->filterInteger($nb_rows, 1, 'The number of rows to consider must be a valid positive integer');
117
        $filter_row = function ($row) {
118 6
            return is_array($row) && count($row) > 1;
119 6
        };
120
        $delimiters = array_unique(array_filter($delimiters, function ($value) {
121 6
            return 1 == strlen($value);
122 6
        }));
123 6
        $this->document->setFlags(SplFileObject::READ_CSV);
124 6
        $res = [];
125 6
        foreach ($delimiters as $delim) {
126 6
            $this->document->setCsvControl($delim, $this->enclosure, $this->escape);
127 6
            $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_rows), $filter_row);
128 6
            $res[$delim] = count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
129
        }
130 6
        arsort($res, SORT_NUMERIC);
131
132 6
        return $res;
133
    }
134
135
    /**
136
     * Returns a collection of selected records
137
     *
138
     * @param Statement|null $stmt
139
     *
140
     * @return RecordSet
141
     */
142 80
    public function select(Statement $stmt = null): RecordSet
143
    {
144 80
        $stmt = $stmt ?? new Statement();
145
146 80
        return $stmt->process($this);
147
    }
148
149
    /**
150
     * @inheritdoc
151
     */
152 110
    public function getIterator(): Iterator
153
    {
154 110
        $bom = $this->getInputBOM();
155 110
        $header = $this->getHeader();
156 110
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
157 110
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
158
        $normalized = function ($row) {
159 98
            return is_array($row) && $row != [null];
160 110
        };
161 110
        $iterator = new CallbackFilterIterator($this->document, $normalized);
162 110
        $iterator = $this->combineHeader($iterator, $header);
163
164 110
        return $this->stripBOM($iterator, $bom);
165
    }
166
167
    /**
168
     * Add the CSV header if present and valid
169
     *
170
     * @param Iterator $iterator
171
     * @param string[] $header
172
     *
173
     * @return Iterator
174
     */
175 110
    protected function combineHeader(Iterator $iterator, array $header): Iterator
176
    {
177 110
        if (null === $this->header_offset) {
178 94
            return $iterator;
179
        }
180
181 18
        $header = $this->filterColumnNames($header);
182 18
        $header_count = count($header);
183
        $iterator = new CallbackFilterIterator($iterator, function (array $row, int $offset) {
184 14
            return $offset != $this->header_offset;
185 18
        });
186
187
        return new MapIterator($iterator, function (array $row) use ($header_count, $header) {
188 14
            if ($header_count != count($row)) {
189
                $row = array_slice(array_pad($row, $header_count, null), 0, $header_count);
190
            }
191
192 14
            return array_combine($header, $row);
193 18
        });
194
    }
195
196
    /**
197
     * Strip the BOM sequence if present
198
     *
199
     * @param Iterator $iterator
200
     * @param string   $bom
201
     *
202
     * @return Iterator
203
     */
204 110
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
205
    {
206 110
        if ('' === $bom) {
207 90
            return $iterator;
208
        }
209
210 20
        $bom_length = mb_strlen($bom);
211 20
        return new MapIterator($iterator, function (array $row, $index) use ($bom_length) {
212 20
            if (0 != $index) {
213 16
                return $row;
214
            }
215
216 14
            return $this->removeBOM($row, $bom_length, $this->enclosure);
217 20
        });
218
    }
219
220
    /**
221
     * Strip the BOM sequence from a record
222
     *
223
     * @param string[] $row
224
     * @param int      $bom_length
225
     * @param string   $enclosure
226
     *
227
     * @return string[]
228
     */
229 30
    protected function removeBOM(array $row, int $bom_length, string $enclosure): array
230
    {
231 30
        if (0 == $bom_length) {
232 10
            return $row;
233
        }
234
235 20
        $row[0] = mb_substr($row[0], $bom_length);
236 20
        if ($enclosure == mb_substr($row[0], 0, 1) && $enclosure == mb_substr($row[0], -1, 1)) {
237 10
            $row[0] = mb_substr($row[0], 1, -1);
238
        }
239
240 20
        return $row;
241
    }
242
243
    /**
244
     * Returns the column header associate with the RecordSet
245
     *
246
     * @throws InvalidArgumentException If no header is found
247
     *
248
     * @return string[]
249
     */
250 114
    public function getHeader(): array
251
    {
252 114
        if ($this->is_header_loaded) {
253 108
            return $this->header;
254
        }
255
256 114
        $this->is_header_loaded = true;
257 114
        if (null === $this->header_offset) {
258 96
            $this->header = [];
259
260 96
            return $this->header;
261
        }
262
263 22
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
264 22
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
265 22
        $this->document->seek($this->header_offset);
266 22
        $header = $this->document->current();
267 22
        if (empty($header)) {
268 2
            throw new InvalidArgumentException('The header record specified by `Reader::setHeaderOffset` does not exist or is empty');
269
        }
270
271 20
        if (0 !== $this->header_offset) {
272 4
            $this->header = $header;
273
274 4
            return $this->header;
275
        }
276
277 16
        $this->header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
278
279 16
        return $this->header;
280
    }
281
}
282