Completed
Pull Request — master (#210)
by ignace nyamagana
12:18
created

Reader::getFieldIndex()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 17
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.2
c 0
b 0
f 0
ccs 8
cts 8
cp 1
cc 4
eloc 9
nc 4
nop 2
crap 4
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use League\Csv\Exception\InvalidArgumentException;
21
use LimitIterator;
22
use SplFileObject;
23
24
/**
25
 *  A class to manage extracting and filtering a CSV
26
 *
27
 * @package League.csv
28
 * @since  3.0.0
29
 *
30
 */
31
class Reader extends AbstractCsv implements IteratorAggregate
32
{
33
    /**
34
     * @inheritdoc
35
     */
36
    protected $stream_filter_mode = STREAM_FILTER_READ;
37
38
    /**
39
     * CSV Document header offset
40
     *
41
     * @var int|null
42
     */
43
    protected $header_offset;
44
45
    /**
46
     * CSV Document Header record
47
     *
48
     * @var string[]
49
     */
50
    protected $header = [];
51
52
    /**
53
     * Tell whether the header needs to be re-generated
54
     *
55
     * @var bool
56
     */
57
    protected $is_header_loaded = false;
58
59
    /**
60
     * Returns the record offset used as header
61
     *
62
     * If no CSV record is used this method MUST return null
63
     *
64
     * @return int|null
65
     */
66 2
    public function getHeaderOffset()
67
    {
68 2
        return $this->header_offset;
69
    }
70
71
    /**
72
     * Selects the record to be used as the CSV header
73
     *
74
     * Because of the header is represented as an array, to be valid
75
     * a header MUST contain only unique string value.
76
     *
77
     * @param int|null $offset the header row offset
78
     *
79
     * @return static
80
     */
81 28
    public function setHeaderOffset($offset): self
82
    {
83 28
        $this->header_offset = null;
84 28
        if (null !== $offset) {
85 24
            $this->header_offset = $this->filterInteger(
86
                $offset,
87 24
                0,
88 24
                'the header offset index must be a positive integer or 0'
89
            );
90
        }
91 26
        $this->resetDynamicProperties();
92
93 26
        return $this;
94
    }
95
96
    /**
97
     * @inheritdoc
98
     */
99 38
    protected function resetDynamicProperties()
100
    {
101 38
        return $this->is_header_loaded = false;
102
    }
103
104
    /**
105
     * Detect Delimiters occurences in the CSV
106
     *
107
     * Returns a associative array where each key represents
108
     * a valid delimiter and each value the number of occurences
109
     *
110
     * @param string[] $delimiters the delimiters to consider
111
     * @param int      $nb_rows    Detection is made using $nb_rows of the CSV
112
     *
113
     * @return array
114
     */
115 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_rows = 1): array
116
    {
117 8
        $nb_rows = $this->filterInteger($nb_rows, 1, 'The number of rows to consider must be a valid positive integer');
118
        $filter_row = function ($row) {
119 6
            return is_array($row) && count($row) > 1;
120 6
        };
121
        $delimiters = array_unique(array_filter($delimiters, function ($value) {
122 6
            return 1 == strlen($value);
123 6
        }));
124 6
        $this->document->setFlags(SplFileObject::READ_CSV);
125 6
        $res = [];
126 6
        foreach ($delimiters as $delim) {
127 6
            $this->document->setCsvControl($delim, $this->enclosure, $this->escape);
128 6
            $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_rows), $filter_row);
129 6
            $res[$delim] = count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
130
        }
131 6
        arsort($res, SORT_NUMERIC);
132
133 6
        return $res;
134
    }
135
136
    /**
137
     * Returns a collection of selected records
138
     *
139
     * @param Statement|null $stmt
140
     *
141
     * @return RecordSet
142
     */
143 80
    public function select(Statement $stmt = null): RecordSet
144
    {
145 80
        $stmt = $stmt ?? new Statement();
146
147 80
        return $stmt->process($this);
148
    }
149
150
    /**
151
     * @inheritdoc
152
     */
153 110
    public function getIterator(): Iterator
154
    {
155 110
        $bom = $this->getInputBOM();
156 110
        $header = $this->getHeader();
157 110
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
158 110
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
159
        $normalized = function ($row) {
160 98
            return is_array($row) && $row != [null];
161 110
        };
162 110
        $iterator = new CallbackFilterIterator($this->document, $normalized);
163 110
        $iterator = $this->combineHeader($iterator, $header);
164
165 110
        return $this->stripBOM($iterator, $bom);
166
    }
167
168
    /**
169
     * Add the CSV header if present and valid
170
     *
171
     * @param Iterator $iterator
172
     * @param string[] $header
173
     *
174
     * @return Iterator
175
     */
176 110
    protected function combineHeader(Iterator $iterator, array $header): Iterator
177
    {
178 110
        if (null === $this->header_offset) {
179 94
            return $iterator;
180
        }
181
182 18
        $header = $this->filterColumnNames($header);
183 18
        $header_count = count($header);
184
        $iterator = new CallbackFilterIterator($iterator, function (array $row, int $offset) {
185 14
            return $offset != $this->header_offset;
186 18
        });
187
188
        return new MapIterator($iterator, function (array $row) use ($header_count, $header) {
189 14
            if ($header_count != count($row)) {
190
                $row = array_slice(array_pad($row, $header_count, null), 0, $header_count);
191
            }
192
193 14
            return array_combine($header, $row);
194 18
        });
195
    }
196
197
    /**
198
     * Strip the BOM sequence if present
199
     *
200
     * @param Iterator $iterator
201
     * @param string   $bom
202
     *
203
     * @return Iterator
204
     */
205 110
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
206
    {
207 110
        if ('' === $bom) {
208 90
            return $iterator;
209
        }
210
211 20
        $bom_length = mb_strlen($bom);
212 20
        return new MapIterator($iterator, function (array $row, $index) use ($bom_length) {
213 20
            if (0 != $index) {
214 16
                return $row;
215
            }
216
217 14
            return $this->removeBOM($row, $bom_length, $this->enclosure);
218 20
        });
219
    }
220
221
    /**
222
     * Strip the BOM sequence from a record
223
     *
224
     * @param string[] $row
225
     * @param int      $bom_length
226
     * @param string   $enclosure
227
     *
228
     * @return string[]
229
     */
230 30
    protected function removeBOM(array $row, int $bom_length, string $enclosure): array
231
    {
232 30
        if (0 == $bom_length) {
233 10
            return $row;
234
        }
235
236 20
        $row[0] = mb_substr($row[0], $bom_length);
237 20
        if ($enclosure == mb_substr($row[0], 0, 1) && $enclosure == mb_substr($row[0], -1, 1)) {
238 10
            $row[0] = mb_substr($row[0], 1, -1);
239
        }
240
241 20
        return $row;
242
    }
243
244
    /**
245
     * Returns the column header associate with the RecordSet
246
     *
247
     * @throws InvalidArgumentException If no header is found
248
     *
249
     * @return string[]
250
     */
251 116
    public function getHeader(): array
252
    {
253 116
        if ($this->is_header_loaded) {
254 108
            return $this->header;
255
        }
256
257 116
        $this->is_header_loaded = true;
258 116
        if (null === $this->header_offset) {
259 98
            $this->header = [];
260
261 98
            return $this->header;
262
        }
263
264 22
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
265 22
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
266 22
        $this->document->seek($this->header_offset);
267 22
        $header = $this->document->current();
268 22
        if (empty($header)) {
269 2
            throw new InvalidArgumentException('The header record specified by `Reader::setHeaderOffset` does not exist or is empty');
270
        }
271
272 20
        if (0 !== $this->header_offset) {
273 4
            $this->header = $header;
274
275 4
            return $this->header;
276
        }
277
278 16
        $this->header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
279
280 16
        return $this->header;
281
    }
282
}
283