Completed
Pull Request — master (#3)
by Ben
05:05
created

CsvReader::current()   B

Complexity

Conditions 8
Paths 20

Size

Total Lines 39

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 8.006

Importance

Changes 0
Metric Value
dl 0
loc 39
ccs 21
cts 22
cp 0.9545
rs 8.0515
c 0
b 0
f 0
cc 8
nc 20
nop 0
crap 8.006
1
<?php
2
3
namespace Port\Csv;
4
5
use Port\Exception\DuplicateHeadersException;
6
use Port\Reader\CountableReader;
7
8
/**
9
 * Reads a CSV file, using as little memory as possible
10
 *
11
 * @author David de Boer <[email protected]>
12
 */
13
class CsvReader implements CountableReader, \SeekableIterator
14
{
15
    const DUPLICATE_HEADERS_INCREMENT = 1;
16
    const DUPLICATE_HEADERS_MERGE     = 2;
17
18
    /**
19
     * Number of the row that contains the column names
20
     *
21
     * @var integer
22
     */
23
    protected $headerRowNumber;
24
25
    /**
26
     * CSV file
27
     *
28
     * @var \SplFileObject
29
     */
30
    protected $file;
31
32
    /**
33
     * Column headers as read from the CSV file
34
     *
35
     * @var array
36
     */
37
    protected $columnHeaders = [];
38
39
    /**
40
     * Number of column headers, stored and re-used for performance
41
     *
42
     * In case of duplicate headers, this is always the number of unmerged headers.
43
     *
44
     * @var integer
45
     */
46
    protected $headersCount;
47
48
    /**
49
     * Total number of rows in the CSV file
50
     *
51
     * @var integer
52
     */
53
    protected $count;
54
55
    /**
56
     * Faulty CSV rows
57
     *
58
     * @var array
59
     */
60
    protected $errors = [];
61
62
    /**
63
     * Strict parsing - skip any lines mismatching header length
64
     *
65
     * @var boolean
66
     */
67
    protected $strict = true;
68
69
    /**
70
     * How to handle duplicate headers
71
     *
72
     * @var integer
73
     */
74
    protected $duplicateHeadersFlag;
75
76
    /**
77
     * @param \SplFileObject $file
78
     * @param string         $delimiter
79
     * @param string         $enclosure
80
     * @param string         $escape
81
     */
82 19
    public function __construct(\SplFileObject $file, $delimiter = ',', $enclosure = '"', $escape = '\\')
83
    {
84 19
        ini_set('auto_detect_line_endings', true);
85
86 19
        $this->file = $file;
87 19
        $this->file->setFlags(
88 19
            \SplFileObject::READ_CSV |
89 19
            \SplFileObject::SKIP_EMPTY |
90 19
            \SplFileObject::READ_AHEAD |
91
            \SplFileObject::DROP_NEW_LINE
92 19
        );
93 19
        $this->file->setCsvControl(
94 19
            $delimiter,
95 19
            $enclosure,
96
            $escape
97 19
        );
98 19
    }
99
100
    /**
101
     * Return the current row as an array
102
     *
103
     * If a header row has been set, an associative array will be returned
104
     *
105
     * @return array
106
     */
107 11
    public function current()
108
    {
109
        // If the CSV has no column headers just return the line
110 11
        if (empty($this->columnHeaders)) {
111
            return $this->file->current();
112
        }
113
114
        // Since the CSV has column headers use them to construct an associative array for the columns in this line
115 11
        while($this->valid()) {
116 11
            $line = $this->file->current();
117
118
            // In non-strict mode pad/slice the line to match the column headers
119 11
            if (!$this->isStrict()) {
120 3
                if ($this->headersCount > count($line)) {
121 2
                    $line = array_pad($line, $this->headersCount, null); // Line too short
122 2
                } else {
123 3
                    $line = array_slice($line, 0, $this->headersCount); // Line too long
124
                }
125 3
            }
126
127
            // See if values for duplicate headers should be merged
128 11
            if (self::DUPLICATE_HEADERS_MERGE === $this->duplicateHeadersFlag) {
129 1
                $line = $this->mergeDuplicates($line);
0 ignored issues
show
Bug introduced by
It seems like $line can also be of type string; however, Port\Csv\CsvReader::mergeDuplicates() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
130 1
            }
131
132
            // Count the number of elements in both: they must be equal.
133 11
            if (count($this->columnHeaders) === count($line)) {
134 10
                return array_combine(array_keys($this->columnHeaders), $line);
135
            }
136
137
            // They are not equal, so log the row as error and skip it.
138 3
            if ($this->valid()) {
139 3
                $this->errors[$this->key()] = $line;
140 3
                $this->next();
141 3
            }
142 3
        }
143
144 2
        return null;
145
    }
146
147
    /**
148
     * Get column headers
149
     *
150
     * @return array
151
     */
152 3
    public function getColumnHeaders()
153
    {
154 3
        return array_keys($this->columnHeaders);
155
    }
156
157
    /**
158
     * Set column headers
159
     *
160
     * @param array $columnHeaders
161
     */
162 15
    public function setColumnHeaders(array $columnHeaders)
163
    {
164 15
        $this->columnHeaders = array_count_values($columnHeaders);
165 15
        $this->headersCount = count($columnHeaders);
166 15
    }
167
168
    /**
169
     * Set header row number
170
     *
171
     * @param integer $rowNumber  Number of the row that contains column header names
172
     * @param integer $duplicates How to handle duplicates (optional). One of:
173
     *                        - CsvReader::DUPLICATE_HEADERS_INCREMENT;
174
     *                        increments duplicates (dup, dup1, dup2 etc.)
175
     *                        - CsvReader::DUPLICATE_HEADERS_MERGE; merges
176
     *                        values for duplicate headers into an array
177
     *                        (dup => [value1, value2, value3])
178
     *
179
     * @throws DuplicateHeadersException If duplicate headers are encountered
180
     *                                   and no duplicate handling has been
181
     *                                   specified
182
     */
183 11
    public function setHeaderRowNumber($rowNumber, $duplicates = null)
184
    {
185 11
        $this->duplicateHeadersFlag = $duplicates;
186 11
        $this->headerRowNumber = $rowNumber;
187 11
        $headers = $this->readHeaderRow($rowNumber);
188
189 10
        $this->setColumnHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers defined by $this->readHeaderRow($rowNumber) on line 187 can also be of type string; however, Port\Csv\CsvReader::setColumnHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
190 10
    }
191
192
    /**
193
     * Rewind the file pointer
194
     *
195
     * If a header row has been set, the pointer is set just below the header
196
     * row. That way, when you iterate over the rows, that header row is
197
     * skipped.
198
     */
199 17
    public function rewind()
200
    {
201 17
        $this->file->rewind();
202 17
        if (null !== $this->headerRowNumber) {
203 10
            $this->file->seek($this->headerRowNumber + 1);
204 10
        }
205 17
    }
206
207
    /**
208
     * {@inheritdoc}
209
     */
210 9
    public function count()
211
    {
212 9
        if (null === $this->count) {
213 9
            $position = $this->key();
214
215 9
            $this->count = iterator_count($this);
216
217 9
            $this->seek($position);
218 9
        }
219
220 9
        return $this->count;
221
    }
222
223
    /**
224
     * {@inheritdoc}
225
     */
226 15
    public function next()
227
    {
228 15
        $this->file->next();
229 15
    }
230
231
    /**
232
     * {@inheritdoc}
233
     */
234 17
    public function valid()
235
    {
236 17
        return $this->file->valid();
237
    }
238
239
    /**
240
     * {@inheritdoc}
241
     */
242 12
    public function key()
243
    {
244 12
        return $this->file->key();
245
    }
246
247
    /**
248
     * {@inheritdoc}
249
     */
250 10
    public function seek($pointer)
251
    {
252 10
        $this->file->seek($pointer);
253 10
    }
254
255
    /**
256
     * Get a row
257
     *
258
     * @param integer $number Row number
259
     *
260
     * @return array
261
     */
262 2
    public function getRow($number)
263
    {
264 2
        $this->seek($number);
265
266 2
        return $this->current();
267
    }
268
269
    /**
270
     * Get rows that have an invalid number of columns
271
     *
272
     * @return array
273
     */
274 5
    public function getErrors()
275
    {
276 5
        if (0 === $this->key()) {
277
            // Iterator has not yet been processed, so do that now
278 5
            foreach ($this as $row) { /* noop */ }
279 5
        }
280
281 5
        return $this->errors;
282
    }
283
284
    /**
285
     * Does the reader contain any invalid rows?
286
     *
287
     * @return boolean
288
     */
289 5
    public function hasErrors()
290
    {
291 5
        return count($this->getErrors()) > 0;
292
    }
293
294
    /**
295
     * Should the reader use strict parsing?
296
     *
297
     * @return boolean
298
     */
299 11
    public function isStrict()
300
    {
301 11
        return $this->strict;
302
    }
303
304
    /**
305
     * Set strict parsing
306
     *
307
     * @param boolean $strict
308
     */
309 6
    public function setStrict($strict)
310
    {
311 6
        $this->strict = $strict;
312 6
    }
313
314
    /**
315
     * Read header row from CSV file
316
     *
317
     * @param integer $rowNumber Row number
318
     *
319
     * @return array
320
     *
321
     * @throws DuplicateHeadersException
322
     */
323 11
    protected function readHeaderRow($rowNumber)
324
    {
325 11
        $this->file->seek($rowNumber);
326 11
        $headers = $this->file->current();
327
328
        // Test for duplicate column headers
329 11
        $diff = array_diff_assoc($headers, array_unique($headers));
330 11
        if (count($diff) > 0) {
331 3
            switch ($this->duplicateHeadersFlag) {
332 3
                case self::DUPLICATE_HEADERS_INCREMENT:
333 1
                    $headers = $this->incrementHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers can also be of type string; however, Port\Csv\CsvReader::incrementHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
334
                    // Fall through
335 3
                case self::DUPLICATE_HEADERS_MERGE:
336 2
                    break;
337 1
                default:
338 1
                    throw new DuplicateHeadersException($diff);
339 3
            }
340 2
        }
341
342 10
        return $headers;
343
    }
344
345
    /**
346
     * Add an increment to duplicate headers
347
     *
348
     * So the following line:
349
     * |duplicate|duplicate|duplicate|
350
     * |first    |second   |third    |
351
     *
352
     * Yields value:
353
     * $duplicate => 'first', $duplicate1 => 'second', $duplicate2 => 'third'
354
     *
355
     * @param array $headers
356
     *
357
     * @return array
358
     */
359 1
    protected function incrementHeaders(array $headers)
360
    {
361 1
        $incrementedHeaders = [];
362 1
        foreach (array_count_values($headers) as $header => $count) {
363 1
            if ($count > 1) {
364 1
                $incrementedHeaders[] = $header;
365 1
                for ($i = 1; $i < $count; $i++) {
366 1
                    $incrementedHeaders[] = $header . $i;
367 1
                }
368 1
            } else {
369 1
                $incrementedHeaders[] = $header;
370
            }
371 1
        }
372
373 1
        return $incrementedHeaders;
374
    }
375
376
    /**
377
     * Merges values for duplicate headers into an array
378
     *
379
     * So the following line:
380
     * |duplicate|duplicate|duplicate|
381
     * |first    |second   |third    |
382
     *
383
     * Yields value:
384
     * $duplicate => ['first', 'second', 'third']
385
     *
386
     * @param array $line
387
     *
388
     * @return array
389
     */
390 1
    protected function mergeDuplicates(array $line)
391
    {
392 1
        $values = [];
393
394 1
        $i = 0;
395 1
        foreach ($this->columnHeaders as $count) {
396 1
            if (1 === $count) {
397 1
                $values[] = $line[$i];
398 1
            } else {
399 1
                $values[] = array_slice($line, $i, $count);
400
            }
401
402 1
            $i += $count;
403 1
        }
404
405 1
        return $values;
406
    }
407
}
408