Passed
Push — develop ( 4e8e0d...79d86e )
by Adrien
35:58 queued 26:47
created

Csv::setInputEncoding()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 5
ccs 0
cts 3
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8
9
class Csv extends BaseReader
10
{
11
    /**
12
     * Input encoding.
13
     *
14
     * @var string
15
     */
16
    private $inputEncoding = 'UTF-8';
17
18
    /**
19
     * Delimiter.
20
     *
21
     * @var string
22
     */
23
    private $delimiter;
24
25
    /**
26
     * Enclosure.
27
     *
28
     * @var string
29
     */
30
    private $enclosure = '"';
31
32
    /**
33
     * Sheet index to read.
34
     *
35
     * @var int
36
     */
37
    private $sheetIndex = 0;
38
39
    /**
40
     * Load rows contiguously.
41
     *
42
     * @var bool
43
     */
44
    private $contiguous = false;
45
46
    /**
47
     * Row counter for loading rows contiguously.
48
     *
49
     * @var int
50
     */
51
    private $contiguousRow = -1;
52
53
    /**
54
     * The character that can escape the enclosure.
55
     *
56
     * @var string
57
     */
58
    private $escapeCharacter = '\\';
59
60
    /**
61
     * Create a new CSV Reader instance.
62
     */
63 25
    public function __construct()
64
    {
65 25
        $this->readFilter = new DefaultReadFilter();
66 25
    }
67
68
    /**
69
     * Set input encoding.
70
     *
71
     * @param string $pValue Input encoding, eg: 'UTF-8'
72
     *
73
     * @return Csv
74
     */
75
    public function setInputEncoding($pValue)
76
    {
77
        $this->inputEncoding = $pValue;
78
79
        return $this;
80
    }
81
82
    /**
83
     * Get input encoding.
84
     *
85
     * @return string
86
     */
87
    public function getInputEncoding()
88
    {
89
        return $this->inputEncoding;
90
    }
91
92
    /**
93
     * Move filepointer past any BOM marker.
94
     */
95 14
    protected function skipBOM()
96
    {
97 14
        rewind($this->fileHandle);
98
99 14
        switch ($this->inputEncoding) {
100 14
            case 'UTF-8':
101 14
                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
102 14
                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
103
104 14
                break;
105
            case 'UTF-16LE':
106
                fgets($this->fileHandle, 3) == "\xFF\xFE" ?
107
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
108
109
                break;
110
            case 'UTF-16BE':
111
                fgets($this->fileHandle, 3) == "\xFE\xFF" ?
112
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
113
114
                break;
115
            case 'UTF-32LE':
116
                fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
117
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
118
119
                break;
120
            case 'UTF-32BE':
121
                fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
122
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
123
124
                break;
125
            default:
126
                break;
127
        }
128 14
    }
129
130
    /**
131
     * Identify any separator that is explicitly set in the file.
132
     */
133 14
    protected function checkSeparator()
134
    {
135 14
        $line = fgets($this->fileHandle);
136 14
        if ($line === false) {
137 1
            return;
138
        }
139
140 13
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
141
            $this->delimiter = substr($line, 4, 1);
142
143
            return;
144
        }
145
146 13
        return $this->skipBOM();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->skipBOM() targeting PhpOffice\PhpSpreadsheet\Reader\Csv::skipBOM() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
147
    }
148
149
    /**
150
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
151
     */
152 14
    protected function inferSeparator()
153
    {
154 14
        if ($this->delimiter !== null) {
155 4
            return;
156
        }
157
158 12
        $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
159 12
        $counts = [];
160 12
        foreach ($potentialDelimiters as $delimiter) {
161 12
            $counts[$delimiter] = [];
162
        }
163
164
        // Count how many times each of the potential delimiters appears in each line
165 12
        $numberLines = 0;
166 12
        while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
167 11
            $countLine = [];
168 11
            for ($i = strlen($line) - 1; $i >= 0; --$i) {
169 11
                $char = $line[$i];
170 11
                if (isset($counts[$char])) {
171 9
                    if (!isset($countLine[$char])) {
172 9
                        $countLine[$char] = 0;
173
                    }
174 9
                    ++$countLine[$char];
175
                }
176
            }
177 11
            foreach ($potentialDelimiters as $delimiter) {
178 11
                $counts[$delimiter][] = isset($countLine[$delimiter])
179 9
                    ? $countLine[$delimiter]
180 11
                    : 0;
181
            }
182
        }
183
184
        // If number of lines is 0, nothing to infer : fall back to the default
185 12
        if ($numberLines === 0) {
186 1
            $this->delimiter = reset($potentialDelimiters);
187
188 1
            return $this->skipBOM();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->skipBOM() targeting PhpOffice\PhpSpreadsheet\Reader\Csv::skipBOM() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
189
        }
190
191
        // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
192 11
        $meanSquareDeviations = [];
193 11
        $middleIdx = floor(($numberLines - 1) / 2);
194
195 11
        foreach ($potentialDelimiters as $delimiter) {
196 11
            $series = $counts[$delimiter];
197 11
            sort($series);
198
199 11
            $median = ($numberLines % 2)
200 7
                ? $series[$middleIdx]
201 11
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
202
203 11
            if ($median === 0) {
204 11
                continue;
205
            }
206
207 9
            $meanSquareDeviations[$delimiter] = array_reduce(
208 9
                $series,
209 9
                function ($sum, $value) use ($median) {
210 9
                    return $sum + pow($value - $median, 2);
211 9
                }
212 9
            ) / count($series);
213
        }
214
215
        // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
216 11
        $min = INF;
217 11
        foreach ($potentialDelimiters as $delimiter) {
218 11
            if (!isset($meanSquareDeviations[$delimiter])) {
219 11
                continue;
220
            }
221
222 9
            if ($meanSquareDeviations[$delimiter] < $min) {
223 9
                $min = $meanSquareDeviations[$delimiter];
224 9
                $this->delimiter = $delimiter;
225
            }
226
        }
227
228
        // If no delimiter could be detected, fall back to the default
229 11
        if ($this->delimiter === null) {
230 2
            $this->delimiter = reset($potentialDelimiters);
231
        }
232
233 11
        return $this->skipBOM();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->skipBOM() targeting PhpOffice\PhpSpreadsheet\Reader\Csv::skipBOM() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
234
    }
235
236
    /**
237
     * Get the next full line from the file.
238
     *
239
     * @param string $line
240
     *
241
     * @return bool|string
242
     */
243 12
    private function getNextLine($line = '')
244
    {
245
        // Get the next line in the file
246 12
        $newLine = fgets($this->fileHandle);
247
248
        // Return false if there is no next line
249 12
        if ($newLine === false) {
250 12
            return false;
251
        }
252
253
        // Add the new line to the line passed in
254 11
        $line = $line . $newLine;
255
256
        // Drop everything that is enclosed to avoid counting false positives in enclosures
257 11
        $enclosure = preg_quote($this->enclosure, '/');
258 11
        $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
259
260
        // See if we have any enclosures left in the line
261 11
        $matches = [];
262 11
        preg_match('/(' . $enclosure . ')/', $line, $matches);
263
264
        // if we still have an enclosure then we need to read the next line aswell
265 11
        if (count($matches) > 0) {
266 1
            $line = $this->getNextLine($line);
267
        }
268
269 11
        return $line;
270
    }
271
272
    /**
273
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
274
     *
275
     * @param string $pFilename
276
     *
277
     * @throws Exception
278
     *
279
     * @return array
280
     */
281
    public function listWorksheetInfo($pFilename)
282
    {
283
        // Open file
284
        if (!$this->canRead($pFilename)) {
285
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
286
        }
287
        $this->openFile($pFilename);
288
        $fileHandle = $this->fileHandle;
289
290
        // Skip BOM, if any
291
        $this->skipBOM();
292
        $this->checkSeparator();
293
        $this->inferSeparator();
294
295
        $worksheetInfo = [];
296
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
297
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
298
        $worksheetInfo[0]['lastColumnIndex'] = 0;
299
        $worksheetInfo[0]['totalRows'] = 0;
300
        $worksheetInfo[0]['totalColumns'] = 0;
301
302
        // Loop through each line of the file in turn
303
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

303
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
304
            ++$worksheetInfo[0]['totalRows'];
305
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
306
        }
307
308
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
309
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
310
311
        // Close file
312
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

312
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
313
314
        return $worksheetInfo;
315
    }
316
317
    /**
318
     * Loads Spreadsheet from file.
319
     *
320
     * @param string $pFilename
321
     *
322
     * @throws Exception
323
     *
324
     * @return Spreadsheet
325
     */
326 13
    public function load($pFilename)
327
    {
328
        // Create new Spreadsheet
329 13
        $spreadsheet = new Spreadsheet();
330
331
        // Load into this instance
332 13
        return $this->loadIntoExisting($pFilename, $spreadsheet);
333
    }
334
335
    /**
336
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
337
     *
338
     * @param string $pFilename
339
     * @param Spreadsheet $spreadsheet
340
     *
341
     * @throws Exception
342
     *
343
     * @return Spreadsheet
344
     */
345 14
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
346
    {
347 14
        $lineEnding = ini_get('auto_detect_line_endings');
348 14
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

348
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
349
350
        // Open file
351 14
        if (!$this->canRead($pFilename)) {
352
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
353
        }
354 14
        $this->openFile($pFilename);
355 14
        $fileHandle = $this->fileHandle;
356
357
        // Skip BOM, if any
358 14
        $this->skipBOM();
359 14
        $this->checkSeparator();
360 14
        $this->inferSeparator();
361
362
        // Create new PhpSpreadsheet object
363 14
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
364 2
            $spreadsheet->createSheet();
365
        }
366 14
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
367
368
        // Set our starting row based on whether we're in contiguous mode or not
369 14
        $currentRow = 1;
370 14
        if ($this->contiguous) {
371 1
            $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
372
        }
373
374
        // Loop through each line of the file in turn
375 14
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

375
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
376 13
            $columnLetter = 'A';
377 13
            foreach ($rowData as $rowDatum) {
378 13
                if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
379
                    // Convert encoding if necessary
380 13
                    if ($this->inputEncoding !== 'UTF-8') {
381
                        $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
382
                    }
383
384
                    // Set cell value
385 13
                    $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
386
                }
387 13
                ++$columnLetter;
388
            }
389 13
            ++$currentRow;
390
        }
391
392
        // Close file
393 14
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

393
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
394
395 14
        if ($this->contiguous) {
396 1
            $this->contiguousRow = $currentRow;
397
        }
398
399 14
        ini_set('auto_detect_line_endings', $lineEnding);
400
401
        // Return
402 14
        return $spreadsheet;
403
    }
404
405
    /**
406
     * Get delimiter.
407
     *
408
     * @return string
409
     */
410 8
    public function getDelimiter()
411
    {
412 8
        return $this->delimiter;
413
    }
414
415
    /**
416
     * Set delimiter.
417
     *
418
     * @param string $delimiter Delimiter, eg: ','
419
     *
420
     * @return CSV
421
     */
422 2
    public function setDelimiter($delimiter)
423
    {
424 2
        $this->delimiter = $delimiter;
425
426 2
        return $this;
427
    }
428
429
    /**
430
     * Get enclosure.
431
     *
432
     * @return string
433
     */
434
    public function getEnclosure()
435
    {
436
        return $this->enclosure;
437
    }
438
439
    /**
440
     * Set enclosure.
441
     *
442
     * @param string $enclosure Enclosure, defaults to "
443
     *
444
     * @return CSV
445
     */
446 1
    public function setEnclosure($enclosure)
447
    {
448 1
        if ($enclosure == '') {
449
            $enclosure = '"';
450
        }
451 1
        $this->enclosure = $enclosure;
452
453 1
        return $this;
454
    }
455
456
    /**
457
     * Get sheet index.
458
     *
459
     * @return int
460
     */
461
    public function getSheetIndex()
462
    {
463
        return $this->sheetIndex;
464
    }
465
466
    /**
467
     * Set sheet index.
468
     *
469
     * @param int $pValue Sheet index
470
     *
471
     * @return CSV
472
     */
473 3
    public function setSheetIndex($pValue)
474
    {
475 3
        $this->sheetIndex = $pValue;
476
477 3
        return $this;
478
    }
479
480
    /**
481
     * Set Contiguous.
482
     *
483
     * @param bool $contiguous
484
     *
485
     * @return Csv
486
     */
487 1
    public function setContiguous($contiguous)
488
    {
489 1
        $this->contiguous = (bool) $contiguous;
490 1
        if (!$contiguous) {
491
            $this->contiguousRow = -1;
492
        }
493
494 1
        return $this;
495
    }
496
497
    /**
498
     * Get Contiguous.
499
     *
500
     * @return bool
501
     */
502
    public function getContiguous()
503
    {
504
        return $this->contiguous;
505
    }
506
507
    /**
508
     * Set escape backslashes.
509
     *
510
     * @param string $escapeCharacter
511
     *
512
     * @return $this
513
     */
514 1
    public function setEscapeCharacter($escapeCharacter)
515
    {
516 1
        $this->escapeCharacter = $escapeCharacter;
517
518 1
        return $this;
519
    }
520
521
    /**
522
     * Get escape backslashes.
523
     *
524
     * @return string
525
     */
526 1
    public function getEscapeCharacter()
527
    {
528 1
        return $this->escapeCharacter;
529
    }
530
531
    /**
532
     * Can the current IReader read the file?
533
     *
534
     * @param string $pFilename
535
     *
536
     * @return bool
537
     */
538 24
    public function canRead($pFilename)
539
    {
540
        // Check if file exists
541
        try {
542 24
            $this->openFile($pFilename);
543
        } catch (Exception $e) {
544
            return false;
545
        }
546
547 24
        fclose($this->fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

547
        fclose(/** @scrutinizer ignore-type */ $this->fileHandle);
Loading history...
548
549
        // Trust file extension if any
550 24
        if (strtolower(pathinfo($pFilename, PATHINFO_EXTENSION)) === 'csv') {
551 19
            return true;
552
        }
553
554
        // Attempt to guess mimetype
555 5
        $type = mime_content_type($pFilename);
556
        $supportedTypes = [
557 5
            'text/csv',
558
            'text/plain',
559
            'inode/x-empty',
560
        ];
561
562 5
        return in_array($type, $supportedTypes, true);
563
    }
564
}
565