CsvReader::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 17
Code Lines 12

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.4285
c 0
b 0
f 0
ccs 13
cts 13
cp 1
cc 1
eloc 12
nc 1
nop 4
crap 1
1
<?php
2
3
namespace Ddeboer\DataImport\Reader;
4
5
use Ddeboer\DataImport\Exception\DuplicateHeadersException;
6
7
/**
8
 * Reads a CSV file, using as little memory as possible
9
 *
10
 * @author David de Boer <[email protected]>
11
 */
12
class CsvReader implements CountableReader, \SeekableIterator
13
{
14
    const DUPLICATE_HEADERS_INCREMENT = 1;
15
    const DUPLICATE_HEADERS_MERGE     = 2;
16
17
    /**
18
     * Number of the row that contains the column names
19
     *
20
     * @var integer
21
     */
22
    protected $headerRowNumber;
23
24
    /**
25
     * CSV file
26
     *
27
     * @var \SplFileObject
28
     */
29
    protected $file;
30
31
    /**
32
     * Column headers as read from the CSV file
33
     *
34
     * @var array
35
     */
36
    protected $columnHeaders = [];
37
38
    /**
39
     * Number of column headers, stored and re-used for performance
40
     *
41
     * In case of duplicate headers, this is always the number of unmerged headers.
42
     *
43
     * @var integer
44
     */
45
    protected $headersCount;
46
47
    /**
48
     * Total number of rows in the CSV file
49
     *
50
     * @var integer
51
     */
52
    protected $count;
53
54
    /**
55
     * Faulty CSV rows
56
     *
57
     * @var array
58
     */
59
    protected $errors = [];
60
61
    /**
62
     * Strict parsing - skip any lines mismatching header length
63
     *
64
     * @var boolean
65
     */
66
    protected $strict = true;
67
68
    /**
69
     * How to handle duplicate headers
70
     *
71
     * @var integer
72
     */
73
    protected $duplicateHeadersFlag;
74
75
    /**
76
     * @param \SplFileObject $file
77
     * @param string         $delimiter
78
     * @param string         $enclosure
79
     * @param string         $escape
80
     */
81 19
    public function __construct(\SplFileObject $file, $delimiter = ',', $enclosure = '"', $escape = '\\')
82
    {
83 19
        ini_set('auto_detect_line_endings', true);
84
85 19
        $this->file = $file;
86 19
        $this->file->setFlags(
87 19
            \SplFileObject::READ_CSV |
88 19
            \SplFileObject::SKIP_EMPTY |
89 19
            \SplFileObject::READ_AHEAD |
90
            \SplFileObject::DROP_NEW_LINE
91 19
        );
92 19
        $this->file->setCsvControl(
93 19
            $delimiter,
94 19
            $enclosure,
95
            $escape
96 19
        );
97 19
    }
98
99
    /**
100
     * Return the current row as an array
101
     *
102
     * If a header row has been set, an associative array will be returned
103
     *
104
     * @return array
105
     */
106 11
    public function current()
107
    {
108
        // If the CSV has no column headers just return the line
109 11
        if (empty($this->columnHeaders)) {
110
            return $this->file->current();
111
        }
112
113
        // Since the CSV has column headers use them to construct an associative array for the columns in this line
114
        do {
115 11
            $line = $this->file->current();
116
117
            // In non-strict mode pad/slice the line to match the column headers
118 11
            if (!$this->isStrict()) {
119 3
                if ($this->headersCount > count($line)) {
120 2
                    $line = array_pad($line, $this->headersCount, null); // Line too short
121 2
                } else {
122 3
                    $line = array_slice($line, 0, $this->headersCount); // Line too long
123
                }
124 3
            }
125
126
            // See if values for duplicate headers should be merged
127 11
            if (self::DUPLICATE_HEADERS_MERGE === $this->duplicateHeadersFlag) {
128 1
                $line = $this->mergeDuplicates($line);
0 ignored issues
show
Bug introduced by
It seems like $line can also be of type string; however, Ddeboer\DataImport\Reade...ader::mergeDuplicates() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
129 1
            }
130
131
            // Count the number of elements in both: they must be equal.
132 11
            if (count($this->columnHeaders) === count($line)) {
133 10
                return array_combine(array_keys($this->columnHeaders), $line);
134
            }
135
136
            // They are not equal, so log the row as error and skip it.
137 3
            if ($this->valid()) {
138 3
                $this->errors[$this->key()] = $line;
139 3
                $this->next();
140 3
            }
141 3
        } while($this->valid());
142
143 2
        return null;
144
    }
145
146
    /**
147
     * Get column headers
148
     *
149
     * @return array
150
     */
151 4
    public function getColumnHeaders()
152
    {
153 4
        return array_keys($this->columnHeaders);
154
    }
155
156
    /**
157
     * Set column headers
158
     *
159
     * @param array $columnHeaders
160
     */
161 15
    public function setColumnHeaders(array $columnHeaders)
162
    {
163 15
        $this->columnHeaders = array_count_values($columnHeaders);
164 15
        $this->headersCount = count($columnHeaders);
165 15
    }
166
167
    /**
168
     * Set header row number
169
     *
170
     * @param integer $rowNumber  Number of the row that contains column header names
171
     * @param integer $duplicates How to handle duplicates (optional). One of:
172
     *                        - CsvReader::DUPLICATE_HEADERS_INCREMENT;
173
     *                        increments duplicates (dup, dup1, dup2 etc.)
174
     *                        - CsvReader::DUPLICATE_HEADERS_MERGE; merges
175
     *                        values for duplicate headers into an array
176
     *                        (dup => [value1, value2, value3])
177
     *
178
     * @throws DuplicateHeadersException If duplicate headers are encountered
179
     *                                   and no duplicate handling has been
180
     *                                   specified
181
     */
182 11
    public function setHeaderRowNumber($rowNumber, $duplicates = null)
183
    {
184 11
        $this->duplicateHeadersFlag = $duplicates;
185 11
        $this->headerRowNumber = $rowNumber;
186 11
        $headers = $this->readHeaderRow($rowNumber);
187
188 10
        $this->setColumnHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers defined by $this->readHeaderRow($rowNumber) on line 186 can also be of type string; however, Ddeboer\DataImport\Reade...der::setColumnHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
189 10
    }
190
191
    /**
192
     * Rewind the file pointer
193
     *
194
     * If a header row has been set, the pointer is set just below the header
195
     * row. That way, when you iterate over the rows, that header row is
196
     * skipped.
197
     */
198 17
    public function rewind()
199
    {
200 17
        $this->file->rewind();
201 17
        if (null !== $this->headerRowNumber) {
202 10
            $this->file->seek($this->headerRowNumber + 1);
203 10
        }
204 17
    }
205
206
    /**
207
     * {@inheritdoc}
208
     */
209 9
    public function count()
210
    {
211 9
        if (null === $this->count) {
212 9
            $position = $this->key();
213
214 9
            $this->count = iterator_count($this);
215
216 9
            $this->seek($position);
217 9
        }
218
219 9
        return $this->count;
220
    }
221
222
    /**
223
     * {@inheritdoc}
224
     */
225 15
    public function next()
226
    {
227 15
        $this->file->next();
228 15
    }
229
230
    /**
231
     * {@inheritdoc}
232
     */
233 15
    public function valid()
234
    {
235 15
        return $this->file->valid();
236
    }
237
238
    /**
239
     * {@inheritdoc}
240
     */
241 12
    public function key()
242
    {
243 12
        return $this->file->key();
244
    }
245
246
    /**
247
     * {@inheritdoc}
248
     */
249 10
    public function seek($pointer)
250
    {
251 10
        $this->file->seek($pointer);
252 10
    }
253
254
    /**
255
     * {@inheritdoc}
256
     */
257 1
    public function getFields()
258
    {
259 1
        return $this->getColumnHeaders();
260
    }
261
262
    /**
263
     * Get a row
264
     *
265
     * @param integer $number Row number
266
     *
267
     * @return array
268
     */
269 2
    public function getRow($number)
270
    {
271 2
        $this->seek($number);
272
273 2
        return $this->current();
274
    }
275
276
    /**
277
     * Get rows that have an invalid number of columns
278
     *
279
     * @return array
280
     */
281 5
    public function getErrors()
282
    {
283 5
        if (0 === $this->key()) {
284
            // Iterator has not yet been processed, so do that now
285 5
            foreach ($this as $row) { /* noop */ }
0 ignored issues
show
Unused Code introduced by
This foreach statement is empty and can be removed.

This check looks for foreach loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

Consider removing the loop.

Loading history...
286 5
        }
287
288 5
        return $this->errors;
289
    }
290
291
    /**
292
     * Does the reader contain any invalid rows?
293
     *
294
     * @return boolean
295
     */
296 5
    public function hasErrors()
297
    {
298 5
        return count($this->getErrors()) > 0;
299
    }
300
301
    /**
302
     * Should the reader use strict parsing?
303
     *
304
     * @return boolean
305
     */
306 11
    public function isStrict()
307
    {
308 11
        return $this->strict;
309
    }
310
311
    /**
312
     * Set strict parsing
313
     *
314
     * @param boolean $strict
315
     */
316 6
    public function setStrict($strict)
317
    {
318 6
        $this->strict = $strict;
319 6
    }
320
321
    /**
322
     * Read header row from CSV file
323
     *
324
     * @param integer $rowNumber Row number
325
     *
326
     * @return array
327
     *
328
     * @throws DuplicateHeadersException
329
     */
330 11
    protected function readHeaderRow($rowNumber)
331
    {
332 11
        $this->file->seek($rowNumber);
333 11
        $headers = $this->file->current();
334
335
        // Test for duplicate column headers
336 11
        $diff = array_diff_assoc($headers, array_unique($headers));
337 11
        if (count($diff) > 0) {
338 3
            switch ($this->duplicateHeadersFlag) {
339 3
                case self::DUPLICATE_HEADERS_INCREMENT:
340 1
                    $headers = $this->incrementHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers can also be of type string; however, Ddeboer\DataImport\Reade...der::incrementHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
341
                    // Fall through
342 3
                case self::DUPLICATE_HEADERS_MERGE:
343 2
                    break;
344 1
                default:
345 1
                    throw new DuplicateHeadersException($diff);
346 3
            }
347 2
        }
348
349 10
        return $headers;
350
    }
351
352
    /**
353
     * Add an increment to duplicate headers
354
     *
355
     * So the following line:
356
     * |duplicate|duplicate|duplicate|
357
     * |first    |second   |third    |
358
     *
359
     * Yields value:
360
     * $duplicate => 'first', $duplicate1 => 'second', $duplicate2 => 'third'
361
     *
362
     * @param array $headers
363
     *
364
     * @return array
365
     */
366 1
    protected function incrementHeaders(array $headers)
367
    {
368 1
        $incrementedHeaders = [];
369 1
        foreach (array_count_values($headers) as $header => $count) {
370 1
            if ($count > 1) {
371 1
                $incrementedHeaders[] = $header;
372 1
                for ($i = 1; $i < $count; $i++) {
373 1
                    $incrementedHeaders[] = $header . $i;
374 1
                }
375 1
            } else {
376 1
                $incrementedHeaders[] = $header;
377
            }
378 1
        }
379
380 1
        return $incrementedHeaders;
381
    }
382
383
    /**
384
     * Merges values for duplicate headers into an array
385
     *
386
     * So the following line:
387
     * |duplicate|duplicate|duplicate|
388
     * |first    |second   |third    |
389
     *
390
     * Yields value:
391
     * $duplicate => ['first', 'second', 'third']
392
     *
393
     * @param array $line
394
     *
395
     * @return array
396
     */
397 1
    protected function mergeDuplicates(array $line)
398
    {
399 1
        $values = [];
400
401 1
        $i = 0;
402 1
        foreach ($this->columnHeaders as $count) {
403 1
            if (1 === $count) {
404 1
                $values[] = $line[$i];
405 1
            } else {
406 1
                $values[] = array_slice($line, $i, $count);
407
            }
408
409 1
            $i += $count;
410 1
        }
411
412 1
        return $values;
413
    }
414
}
415