Completed
Pull Request — master (#210)
by ignace nyamagana
02:52
created

Reader   A

Complexity

Total Complexity 25

Size/Duplication

Total Lines 242
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 25
lcom 1
cbo 4
dl 0
loc 242
ccs 83
cts 83
cp 1
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A getHeaderOffset() 0 4 1
A setHeaderOffset() 0 14 2
A resetDynamicProperties() 0 4 1
A fetchDelimitersOccurrence() 0 20 3
A select() 0 6 1
A getIterator() 0 14 2
A combineHeader() 0 20 3
A stripBOM() 0 15 3
A removeBOM() 0 13 4
B getHeader() 0 31 5
1
<?php
2
/**
3
* This file is part of the League.csv library
4
*
5
* @license http://opensource.org/licenses/MIT
6
* @link https://github.com/thephpleague/csv/
7
* @version 9.0.0
8
* @package League.csv
9
*
10
* For the full copyright and license information, please view the LICENSE
11
* file that was distributed with this source code.
12
*/
13
declare(strict_types=1);
14
15
namespace League\Csv;
16
17
use CallbackFilterIterator;
18
use Iterator;
19
use IteratorAggregate;
20
use LimitIterator;
21
use SplFileObject;
22
23
/**
24
 *  A class to manage extracting and filtering a CSV
25
 *
26
 * @package League.csv
27
 * @since  3.0.0
28
 *
29
 */
30
class Reader extends AbstractCsv implements IteratorAggregate
31
{
32
    /**
33
     * @inheritdoc
34
     */
35
    protected $stream_filter_mode = STREAM_FILTER_READ;
36
37
    /**
38
     * CSV Document header offset
39
     *
40
     * @var int|null
41
     */
42
    protected $header_offset;
43
44
    protected $header = [];
45
46
    protected $is_header_loaded = false;
47
48
    /**
49
     * Returns the record offset used as header
50
     *
51
     * If no CSV record is used this method MUST return null
52
     *
53
     * @return int|null
54
     */
55 2
    public function getHeaderOffset()
56
    {
57 2
        return $this->header_offset;
58
    }
59
60
    /**
61
     * Selects the record to be used as the CSV header
62
     *
63
     * Because of the header is represented as an array, to be valid
64
     * a header MUST contain only unique string value.
65
     *
66
     * @param int|null $offset the header row offset
67
     *
68
     * @return static
69
     */
70 28
    public function setHeaderOffset($offset): self
71
    {
72 28
        $this->resetDynamicProperties();
73 28
        $this->header_offset = null;
74 28
        if (null !== $offset) {
75 24
            $this->header_offset = $this->filterInteger(
76
                $offset,
77 24
                0,
78 24
                'the header offset index must be a positive integer or 0'
79
            );
80
        }
81
82 26
        return $this;
83
    }
84
85
    /**
86
     * @inheritdoc
87
     */
88 40
    protected function resetDynamicProperties()
89
    {
90 40
        return $this->is_header_loaded = false;
91
    }
92
93
    /**
94
     * Detect Delimiters occurences in the CSV
95
     *
96
     * Returns a associative array where each key represents
97
     * a valid delimiter and each value the number of occurences
98
     *
99
     * @param string[] $delimiters the delimiters to consider
100
     * @param int      $nb_rows    Detection is made using $nb_rows of the CSV
101
     *
102
     * @return array
103
     */
104 8
    public function fetchDelimitersOccurrence(array $delimiters, int $nb_rows = 1): array
105
    {
106 8
        $nb_rows = $this->filterInteger($nb_rows, 1, 'The number of rows to consider must be a valid positive integer');
107
        $filter_row = function ($row) {
108 6
            return is_array($row) && count($row) > 1;
109 6
        };
110
        $delimiters = array_unique(array_filter($delimiters, function ($value) {
111 6
            return 1 == strlen($value);
112 6
        }));
113 6
        $this->document->setFlags(SplFileObject::READ_CSV);
114 6
        $res = [];
115 6
        foreach ($delimiters as $delim) {
116 6
            $this->document->setCsvControl($delim, $this->enclosure, $this->escape);
117 6
            $iterator = new CallbackFilterIterator(new LimitIterator($this->document, 0, $nb_rows), $filter_row);
118 6
            $res[$delim] = count(iterator_to_array($iterator, false), COUNT_RECURSIVE);
119
        }
120 6
        arsort($res, SORT_NUMERIC);
121
122 6
        return $res;
123
    }
124
125
    /**
126
     * Returns a collection of selected records
127
     *
128
     * @param Statement|null $stmt
129
     *
130
     * @return RecordSet
131
     */
132 80
    public function select(Statement $stmt = null): RecordSet
133
    {
134 80
        $stmt = $stmt ?? new Statement();
135
136 80
        return $stmt->process($this);
137
    }
138
139
    /**
140
     * @inheritdoc
141
     */
142 110
    public function getIterator(): Iterator
143
    {
144 110
        $bom = $this->getInputBOM();
145 110
        $header = $this->getHeader();
146 110
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
147 110
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
148
        $normalized = function ($row) {
149 98
            return is_array($row) && $row != [null];
150 110
        };
151 110
        $iterator = new CallbackFilterIterator($this->document, $normalized);
152 110
        $iterator = $this->combineHeader($iterator, $header);
153
154 110
        return $this->stripBOM($iterator, $bom);
155
    }
156
157
    /**
158
     * Add the CSV header if present and valid
159
     *
160
     * @param Iterator $iterator
161
     * @param string[] $header
162
     *
163
     * @return Iterator
164
     */
165 110
    protected function combineHeader(Iterator $iterator, array $header): Iterator
166
    {
167 110
        if (null === $this->header_offset) {
168 94
            return $iterator;
169
        }
170
171 18
        $header = $this->filterColumnNames($header);
172 18
        $header_count = count($header);
173
        $iterator = new CallbackFilterIterator($iterator, function (array $row, int $offset) {
174 14
            return $offset != $this->header_offset;
175 18
        });
176
177
        return new MapIterator($iterator, function (array $row) use ($header_count, $header) {
178 14
            if ($header_count != count($row)) {
179
                $row = array_slice(array_pad($row, $header_count, null), 0, $header_count);
180
            }
181
182 14
            return array_combine($header, $row);
183 18
        });
184
    }
185
186
    /**
187
     * Strip the BOM sequence if present
188
     *
189
     * @param Iterator $iterator
190
     * @param string   $bom
191
     *
192
     * @return Iterator
193
     */
194 110
    protected function stripBOM(Iterator $iterator, string $bom): Iterator
195
    {
196 110
        if ('' === $bom) {
197 90
            return $iterator;
198
        }
199
200 20
        $bom_length = mb_strlen($bom);
201 20
        return new MapIterator($iterator, function (array $row, $index) use ($bom_length) {
202 20
            if (0 != $index) {
203 16
                return $row;
204
            }
205
206 14
            return $this->removeBOM($row, $bom_length, $this->enclosure);
207 20
        });
208
    }
209
210
    /**
211
     * Strip the BOM sequence from a record
212
     *
213
     * @param string[] $row
214
     * @param int      $bom_length
215
     * @param string   $enclosure
216
     *
217
     * @return string[]
218
     */
219 30
    protected function removeBOM(array $row, int $bom_length, string $enclosure): array
220
    {
221 30
        if (0 == $bom_length) {
222 10
            return $row;
223
        }
224
225 20
        $row[0] = mb_substr($row[0], $bom_length);
226 20
        if ($enclosure == mb_substr($row[0], 0, 1) && $enclosure == mb_substr($row[0], -1, 1)) {
227 10
            $row[0] = mb_substr($row[0], 1, -1);
228
        }
229
230 20
        return $row;
231
    }
232
233
    /**
234
     * Returns the column header associate with the RecordSet
235
     *
236
     * @throws Exception If no header is found
237
     *
238
     * @return string[]
239
     */
240 114
    public function getHeader(): array
241
    {
242 114
        if ($this->is_header_loaded) {
243 108
            return $this->header;
244
        }
245
246 114
        $this->is_header_loaded = true;
247 114
        if (null === $this->header_offset) {
248 96
            $this->header = [];
249
250 96
            return $this->header;
251
        }
252
253 22
        $this->document->setFlags(SplFileObject::READ_CSV | SplFileObject::READ_AHEAD | SplFileObject::SKIP_EMPTY);
254 22
        $this->document->setCsvControl($this->delimiter, $this->enclosure, $this->escape);
255 22
        $this->document->seek($this->header_offset);
256 22
        $header = $this->document->current();
257 22
        if (empty($header)) {
258 2
            throw new Exception('The header record specified by `Reader::setHeaderOffset` does not exist or is empty');
259
        }
260
261 20
        if (0 !== $this->header_offset) {
262 4
            $this->header = $header;
263
264 4
            return $this->header;
265
        }
266
267 16
        $this->header = $this->removeBOM($header, mb_strlen($this->getInputBOM()), $this->enclosure);
268
269 16
        return $this->header;
270
    }
271
}
272