Completed
Push — master ( b70e9f...baecac )
by David de
05:17
created

CsvReader::getFields()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 1
1
<?php
2
3
namespace Port\Csv;
4
5
use Port\Exception\DuplicateHeadersException;
6
use Port\Reader\CountableReader;
7
8
/**
9
 * Reads a CSV file, using as little memory as possible
10
 *
11
 * @author David de Boer <[email protected]>
12
 */
13
class CsvReader implements CountableReader, \SeekableIterator
0 ignored issues
show
Bug introduced by
There is one abstract method getFields in this class; you could implement it, or declare this class as abstract.
Loading history...
14
{
15
    const DUPLICATE_HEADERS_INCREMENT = 1;
16
    const DUPLICATE_HEADERS_MERGE     = 2;
17
18
    /**
19
     * Number of the row that contains the column names
20
     *
21
     * @var integer
22
     */
23
    protected $headerRowNumber;
24
25
    /**
26
     * CSV file
27
     *
28
     * @var \SplFileObject
29
     */
30
    protected $file;
31
32
    /**
33
     * Column headers as read from the CSV file
34
     *
35
     * @var array
36
     */
37
    protected $columnHeaders = [];
38
39
    /**
40
     * Number of column headers, stored and re-used for performance
41
     *
42
     * In case of duplicate headers, this is always the number of unmerged headers.
43
     *
44
     * @var integer
45
     */
46
    protected $headersCount;
47
48
    /**
49
     * Total number of rows in the CSV file
50
     *
51
     * @var integer
52
     */
53
    protected $count;
54
55
    /**
56
     * Faulty CSV rows
57
     *
58
     * @var array
59
     */
60
    protected $errors = [];
61
62
    /**
63
     * Strict parsing - skip any lines mismatching header length
64
     *
65
     * @var boolean
66
     */
67
    protected $strict = true;
68
69
    /**
70
     * How to handle duplicate headers
71
     *
72
     * @var integer
73
     */
74
    protected $duplicateHeadersFlag;
75
76
    /**
77
     * @param \SplFileObject $file
78
     * @param string         $delimiter
79
     * @param string         $enclosure
80
     * @param string         $escape
81
     */
82 19
    public function __construct(\SplFileObject $file, $delimiter = ',', $enclosure = '"', $escape = '\\')
83
    {
84 19
        ini_set('auto_detect_line_endings', true);
85
86 19
        $this->file = $file;
87 19
        $this->file->setFlags(
88 19
            \SplFileObject::READ_CSV |
89 19
            \SplFileObject::SKIP_EMPTY |
90 19
            \SplFileObject::READ_AHEAD |
91
            \SplFileObject::DROP_NEW_LINE
92 19
        );
93 19
        $this->file->setCsvControl(
94 19
            $delimiter,
95 19
            $enclosure,
96
            $escape
97 19
        );
98 19
    }
99
100
    /**
101
     * Return the current row as an array
102
     *
103
     * If a header row has been set, an associative array will be returned
104
     *
105
     * @return array
106
     */
107 11
    public function current()
108
    {
109
        // If the CSV has no column headers just return the line
110 11
        if (empty($this->columnHeaders)) {
111
            return $this->file->current();
112
        }
113
114
        // Since the CSV has column headers use them to construct an associative array for the columns in this line
115
        do {
116 11
            $line = $this->file->current();
117
118
            // In non-strict mode pad/slice the line to match the column headers
119 11
            if (!$this->isStrict()) {
120 3
                if ($this->headersCount > count($line)) {
121 2
                    $line = array_pad($line, $this->headersCount, null); // Line too short
122 2
                } else {
123 3
                    $line = array_slice($line, 0, $this->headersCount); // Line too long
124
                }
125 3
            }
126
127
            // See if values for duplicate headers should be merged
128 11
            if (self::DUPLICATE_HEADERS_MERGE === $this->duplicateHeadersFlag) {
129 1
                $line = $this->mergeDuplicates($line);
0 ignored issues
show
Bug introduced by
It seems like $line can also be of type string; however, Port\Csv\CsvReader::mergeDuplicates() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
130 1
            }
131
132
            // Count the number of elements in both: they must be equal.
133 11
            if (count($this->columnHeaders) === count($line)) {
134 10
                return array_combine(array_keys($this->columnHeaders), $line);
135
            }
136
137
            // They are not equal, so log the row as error and skip it.
138 3
            if ($this->valid()) {
139 3
                $this->errors[$this->key()] = $line;
140 3
                $this->next();
141 3
            }
142 3
        } while($this->valid());
143
144 2
        return null;
145
    }
146
147
    /**
148
     * Get column headers
149
     *
150
     * @return array
151
     */
152 3
    public function getColumnHeaders()
153
    {
154 3
        return array_keys($this->columnHeaders);
155
    }
156
157
    /**
158
     * Set column headers
159
     *
160
     * @param array $columnHeaders
161
     */
162 15
    public function setColumnHeaders(array $columnHeaders)
163
    {
164 15
        $this->columnHeaders = array_count_values($columnHeaders);
165 15
        $this->headersCount = count($columnHeaders);
166 15
    }
167
168
    /**
169
     * Set header row number
170
     *
171
     * @param integer $rowNumber  Number of the row that contains column header names
172
     * @param integer $duplicates How to handle duplicates (optional). One of:
173
     *                        - CsvReader::DUPLICATE_HEADERS_INCREMENT;
174
     *                        increments duplicates (dup, dup1, dup2 etc.)
175
     *                        - CsvReader::DUPLICATE_HEADERS_MERGE; merges
176
     *                        values for duplicate headers into an array
177
     *                        (dup => [value1, value2, value3])
178
     *
179
     * @throws DuplicateHeadersException If duplicate headers are encountered
180
     *                                   and no duplicate handling has been
181
     *                                   specified
182
     */
183 11
    public function setHeaderRowNumber($rowNumber, $duplicates = null)
184
    {
185 11
        $this->duplicateHeadersFlag = $duplicates;
186 11
        $this->headerRowNumber = $rowNumber;
187 11
        $headers = $this->readHeaderRow($rowNumber);
188
189 10
        $this->setColumnHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers defined by $this->readHeaderRow($rowNumber) on line 187 can also be of type string; however, Port\Csv\CsvReader::setColumnHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
190 10
    }
191
192
    /**
193
     * Rewind the file pointer
194
     *
195
     * If a header row has been set, the pointer is set just below the header
196
     * row. That way, when you iterate over the rows, that header row is
197
     * skipped.
198
     */
199 17
    public function rewind()
200
    {
201 17
        $this->file->rewind();
202 17
        if (null !== $this->headerRowNumber) {
203 10
            $this->file->seek($this->headerRowNumber + 1);
204 10
        }
205 17
    }
206
207
    /**
208
     * {@inheritdoc}
209
     */
210 9
    public function count()
211
    {
212 9
        if (null === $this->count) {
213 9
            $position = $this->key();
214
215 9
            $this->count = iterator_count($this);
216
217 9
            $this->seek($position);
218 9
        }
219
220 9
        return $this->count;
221
    }
222
223
    /**
224
     * {@inheritdoc}
225
     */
226 15
    public function next()
227
    {
228 15
        $this->file->next();
229 15
    }
230
231
    /**
232
     * {@inheritdoc}
233
     */
234 15
    public function valid()
235
    {
236 15
        return $this->file->valid();
237
    }
238
239
    /**
240
     * {@inheritdoc}
241
     */
242 12
    public function key()
243
    {
244 12
        return $this->file->key();
245
    }
246
247
    /**
248
     * {@inheritdoc}
249
     */
250 10
    public function seek($pointer)
251
    {
252 10
        $this->file->seek($pointer);
253 10
    }
254
255
    /**
256
     * Get a row
257
     *
258
     * @param integer $number Row number
259
     *
260
     * @return array
261
     */
262 2
    public function getRow($number)
263
    {
264 2
        $this->seek($number);
265
266 2
        return $this->current();
267
    }
268
269
    /**
270
     * Get rows that have an invalid number of columns
271
     *
272
     * @return array
273
     */
274 5
    public function getErrors()
275
    {
276 5
        if (0 === $this->key()) {
277
            // Iterator has not yet been processed, so do that now
278 5
            foreach ($this as $row) { /* noop */ }
0 ignored issues
show
Unused Code introduced by
This foreach statement is empty and can be removed.

This check looks for foreach loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

Consider removing the loop.

Loading history...
279 5
        }
280
281 5
        return $this->errors;
282
    }
283
284
    /**
285
     * Does the reader contain any invalid rows?
286
     *
287
     * @return boolean
288
     */
289 5
    public function hasErrors()
290
    {
291 5
        return count($this->getErrors()) > 0;
292
    }
293
294
    /**
295
     * Should the reader use strict parsing?
296
     *
297
     * @return boolean
298
     */
299 11
    public function isStrict()
300
    {
301 11
        return $this->strict;
302
    }
303
304
    /**
305
     * Set strict parsing
306
     *
307
     * @param boolean $strict
308
     */
309 6
    public function setStrict($strict)
310
    {
311 6
        $this->strict = $strict;
312 6
    }
313
314
    /**
315
     * Read header row from CSV file
316
     *
317
     * @param integer $rowNumber Row number
318
     *
319
     * @return array
320
     *
321
     * @throws DuplicateHeadersException
322
     */
323 11
    protected function readHeaderRow($rowNumber)
324
    {
325 11
        $this->file->seek($rowNumber);
326 11
        $headers = $this->file->current();
327
328
        // Test for duplicate column headers
329 11
        $diff = array_diff_assoc($headers, array_unique($headers));
330 11
        if (count($diff) > 0) {
331 3
            switch ($this->duplicateHeadersFlag) {
332 3
                case self::DUPLICATE_HEADERS_INCREMENT:
333 1
                    $headers = $this->incrementHeaders($headers);
0 ignored issues
show
Bug introduced by
It seems like $headers can also be of type string; however, Port\Csv\CsvReader::incrementHeaders() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
334
                    // Fall through
335 3
                case self::DUPLICATE_HEADERS_MERGE:
336 2
                    break;
337 1
                default:
338 1
                    throw new DuplicateHeadersException($diff);
339 3
            }
340 2
        }
341
342 10
        return $headers;
343
    }
344
345
    /**
346
     * Add an increment to duplicate headers
347
     *
348
     * So the following line:
349
     * |duplicate|duplicate|duplicate|
350
     * |first    |second   |third    |
351
     *
352
     * Yields value:
353
     * $duplicate => 'first', $duplicate1 => 'second', $duplicate2 => 'third'
354
     *
355
     * @param array $headers
356
     *
357
     * @return array
358
     */
359 1
    protected function incrementHeaders(array $headers)
360
    {
361 1
        $incrementedHeaders = [];
362 1
        foreach (array_count_values($headers) as $header => $count) {
363 1
            if ($count > 1) {
364 1
                $incrementedHeaders[] = $header;
365 1
                for ($i = 1; $i < $count; $i++) {
366 1
                    $incrementedHeaders[] = $header . $i;
367 1
                }
368 1
            } else {
369 1
                $incrementedHeaders[] = $header;
370
            }
371 1
        }
372
373 1
        return $incrementedHeaders;
374
    }
375
376
    /**
377
     * Merges values for duplicate headers into an array
378
     *
379
     * So the following line:
380
     * |duplicate|duplicate|duplicate|
381
     * |first    |second   |third    |
382
     *
383
     * Yields value:
384
     * $duplicate => ['first', 'second', 'third']
385
     *
386
     * @param array $line
387
     *
388
     * @return array
389
     */
390 1
    protected function mergeDuplicates(array $line)
391
    {
392 1
        $values = [];
393
394 1
        $i = 0;
395 1
        foreach ($this->columnHeaders as $count) {
396 1
            if (1 === $count) {
397 1
                $values[] = $line[$i];
398 1
            } else {
399 1
                $values[] = array_slice($line, $i, $count);
400
            }
401
402 1
            $i += $count;
403 1
        }
404
405 1
        return $values;
406
    }
407
}
408