Completed
Push — master ( 3090c1...7517cd )
by Adrien
12:19
created

Csv::checkSeparator()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
cc 4
eloc 7
nc 3
nop 0
dl 0
loc 14
ccs 8
cts 8
cp 1
crap 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8
9
class Csv extends BaseReader
10
{
11
    /**
12
     * Input encoding.
13
     *
14
     * @var string
15
     */
16
    private $inputEncoding = 'UTF-8';
17
18
    /**
19
     * Delimiter.
20
     *
21
     * @var string
22
     */
23
    private $delimiter;
24
25
    /**
26
     * Enclosure.
27
     *
28
     * @var string
29
     */
30
    private $enclosure = '"';
31
32
    /**
33
     * Sheet index to read.
34
     *
35
     * @var int
36
     */
37
    private $sheetIndex = 0;
38
39
    /**
40
     * Load rows contiguously.
41
     *
42
     * @var bool
43
     */
44
    private $contiguous = false;
45
46
    /**
47
     * The character that can escape the enclosure.
48
     *
49
     * @var string
50
     */
51
    private $escapeCharacter = '\\';
52
53
    /**
54
     * Create a new CSV Reader instance.
55
     */
56 50
    public function __construct()
57
    {
58 50
        parent::__construct();
59 50
    }
60
61
    /**
62
     * Set input encoding.
63
     *
64
     * @param string $pValue Input encoding, eg: 'UTF-8'
65
     *
66
     * @return $this
67
     */
68 15
    public function setInputEncoding($pValue)
69
    {
70 15
        $this->inputEncoding = $pValue;
71
72 15
        return $this;
73
    }
74
75
    /**
76
     * Get input encoding.
77
     *
78
     * @return string
79
     */
80 1
    public function getInputEncoding()
81
    {
82 1
        return $this->inputEncoding;
83
    }
84
85
    /**
86
     * Move filepointer past any BOM marker.
87
     */
88 35
    protected function skipBOM()
89
    {
90 35
        rewind($this->fileHandle);
91
92 35
        switch ($this->inputEncoding) {
93 35
            case 'UTF-8':
94 24
                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
95 24
                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
96
97 24
                break;
98
        }
99 35
    }
100
101
    /**
102
     * Identify any separator that is explicitly set in the file.
103
     */
104 35
    protected function checkSeparator()
105
    {
106 35
        $line = fgets($this->fileHandle);
107 35
        if ($line === false) {
108 1
            return;
109
        }
110
111 34
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
112 1
            $this->delimiter = substr($line, 4, 1);
113
114 1
            return;
115
        }
116
117 33
        $this->skipBOM();
118 33
    }
119
120
    /**
121
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
122
     */
123 35
    protected function inferSeparator()
124
    {
125 35
        if ($this->delimiter !== null) {
126 6
            return;
127
        }
128
129 32
        $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~'];
130 32
        $counts = [];
131 32
        foreach ($potentialDelimiters as $delimiter) {
132 32
            $counts[$delimiter] = [];
133
        }
134
135
        // Count how many times each of the potential delimiters appears in each line
136 32
        $numberLines = 0;
137 32
        while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) {
138 31
            $countLine = [];
139 31
            for ($i = strlen($line) - 1; $i >= 0; --$i) {
140 31
                $char = $line[$i];
141 31
                if (isset($counts[$char])) {
142 28
                    if (!isset($countLine[$char])) {
143 28
                        $countLine[$char] = 0;
144
                    }
145 28
                    ++$countLine[$char];
146
                }
147
            }
148 31
            foreach ($potentialDelimiters as $delimiter) {
149 31
                $counts[$delimiter][] = $countLine[$delimiter]
150 31
                    ?? 0;
151
            }
152
        }
153
154
        // If number of lines is 0, nothing to infer : fall back to the default
155 32
        if ($numberLines === 0) {
156 1
            $this->delimiter = reset($potentialDelimiters);
157 1
            $this->skipBOM();
158
159 1
            return;
160
        }
161
162
        // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
163 31
        $meanSquareDeviations = [];
164 31
        $middleIdx = floor(($numberLines - 1) / 2);
165
166 31
        foreach ($potentialDelimiters as $delimiter) {
167 31
            $series = $counts[$delimiter];
168 31
            sort($series);
169
170 31
            $median = ($numberLines % 2)
171 10
                ? $series[$middleIdx]
172 31
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
173
174 31
            if ($median === 0) {
175 31
                continue;
176
            }
177
178 28
            $meanSquareDeviations[$delimiter] = array_reduce(
179
                $series,
180
                function ($sum, $value) use ($median) {
181 28
                    return $sum + pow($value - $median, 2);
182 28
                }
183 28
            ) / count($series);
184
        }
185
186
        // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
187 31
        $min = INF;
188 31
        foreach ($potentialDelimiters as $delimiter) {
189 31
            if (!isset($meanSquareDeviations[$delimiter])) {
190 31
                continue;
191
            }
192
193 28
            if ($meanSquareDeviations[$delimiter] < $min) {
194 28
                $min = $meanSquareDeviations[$delimiter];
195 28
                $this->delimiter = $delimiter;
196
            }
197
        }
198
199
        // If no delimiter could be detected, fall back to the default
200 31
        if ($this->delimiter === null) {
201 3
            $this->delimiter = reset($potentialDelimiters);
202
        }
203
204 31
        $this->skipBOM();
205 31
    }
206
207
    /**
208
     * Get the next full line from the file.
209
     *
210
     * @return false|string
211
     */
212 32
    private function getNextLine()
213
    {
214 32
        $line = '';
215 32
        $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' . preg_quote($this->enclosure, '/');
216
217
        do {
218
            // Get the next line in the file
219 32
            $newLine = fgets($this->fileHandle);
220
221
            // Return false if there is no next line
222 32
            if ($newLine === false) {
223 32
                return false;
224
            }
225
226
            // Add the new line to the line passed in
227 31
            $line = $line . $newLine;
228
229
            // Drop everything that is enclosed to avoid counting false positives in enclosures
230 31
            $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line);
231
232
            // See if we have any enclosures left in the line
233
            // if we still have an enclosure then we need to read the next line as well
234 31
        } while (preg_match('/(' . $enclosure . ')/', $line) > 0);
235
236 31
        return $line;
237
    }
238
239
    /**
240
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
241
     *
242
     * @param string $pFilename
243
     *
244
     * @return array
245
     */
246 8
    public function listWorksheetInfo($pFilename)
247
    {
248
        // Open file
249 8
        $this->openFileOrMemory($pFilename);
250 7
        $fileHandle = $this->fileHandle;
251
252
        // Skip BOM, if any
253 7
        $this->skipBOM();
254 7
        $this->checkSeparator();
255 7
        $this->inferSeparator();
256
257 7
        $worksheetInfo = [];
258 7
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
259 7
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
260 7
        $worksheetInfo[0]['lastColumnIndex'] = 0;
261 7
        $worksheetInfo[0]['totalRows'] = 0;
262 7
        $worksheetInfo[0]['totalColumns'] = 0;
263
264
        // Loop through each line of the file in turn
265 7
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

265
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
266 7
            ++$worksheetInfo[0]['totalRows'];
267 7
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
268
        }
269
270 7
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
271 7
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
272
273
        // Close file
274 7
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

274
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
275
276 7
        return $worksheetInfo;
277
    }
278
279
    /**
280
     * Loads Spreadsheet from file.
281
     *
282
     * @param string $pFilename
283
     *
284
     * @return Spreadsheet
285
     */
286 27
    public function load($pFilename)
287
    {
288
        // Create new Spreadsheet
289 27
        $spreadsheet = new Spreadsheet();
290
291
        // Load into this instance
292 27
        return $this->loadIntoExisting($pFilename, $spreadsheet);
293
    }
294
295 38
    private function openFileOrMemory($pFilename)
296
    {
297
        // Open file
298 38
        $fhandle = $this->canRead($pFilename);
299 38
        if (!$fhandle) {
300 3
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
301
        }
302 35
        $this->openFile($pFilename);
303 35
        if ($this->inputEncoding !== 'UTF-8') {
304 11
            fclose($this->fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

304
            fclose(/** @scrutinizer ignore-type */ $this->fileHandle);
Loading history...
305 11
            $entireFile = file_get_contents($pFilename);
306 11
            $this->fileHandle = fopen('php://memory', 'r+');
307 11
            $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
308 11
            fwrite($this->fileHandle, $data);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of fwrite() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

308
            fwrite(/** @scrutinizer ignore-type */ $this->fileHandle, $data);
Loading history...
309 11
            rewind($this->fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of rewind() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

309
            rewind(/** @scrutinizer ignore-type */ $this->fileHandle);
Loading history...
310
        }
311 35
    }
312
313
    /**
314
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
315
     *
316
     * @param string $pFilename
317
     * @param Spreadsheet $spreadsheet
318
     *
319
     * @return Spreadsheet
320
     */
321 30
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
322
    {
323 30
        $lineEnding = ini_get('auto_detect_line_endings');
324 30
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

324
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
325
326
        // Open file
327 30
        $this->openFileOrMemory($pFilename);
328 28
        $fileHandle = $this->fileHandle;
329
330
        // Skip BOM, if any
331 28
        $this->skipBOM();
332 28
        $this->checkSeparator();
333 28
        $this->inferSeparator();
334
335
        // Create new PhpSpreadsheet object
336 28
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
337 4
            $spreadsheet->createSheet();
338
        }
339 28
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
340
341
        // Set our starting row based on whether we're in contiguous mode or not
342 28
        $currentRow = 1;
343 28
        $outRow = 0;
344
345
        // Loop through each line of the file in turn
346 28
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

346
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
347 27
            $noOutputYet = true;
348 27
            $columnLetter = 'A';
349 27
            foreach ($rowData as $rowDatum) {
350 27
                if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
351 27
                    if ($this->contiguous) {
352 3
                        if ($noOutputYet) {
353 3
                            $noOutputYet = false;
354 3
                            ++$outRow;
355
                        }
356
                    } else {
357 24
                        $outRow = $currentRow;
358
                    }
359
                    // Set cell value
360 27
                    $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
361
                }
362 27
                ++$columnLetter;
363
            }
364 27
            ++$currentRow;
365
        }
366
367
        // Close file
368 28
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

368
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
369
370 28
        ini_set('auto_detect_line_endings', $lineEnding);
371
372
        // Return
373 28
        return $spreadsheet;
374
    }
375
376
    /**
377
     * Get delimiter.
378
     *
379
     * @return string
380
     */
381 10
    public function getDelimiter()
382
    {
383 10
        return $this->delimiter;
384
    }
385
386
    /**
387
     * Set delimiter.
388
     *
389
     * @param string $delimiter Delimiter, eg: ','
390
     *
391
     * @return $this
392
     */
393 2
    public function setDelimiter($delimiter)
394
    {
395 2
        $this->delimiter = $delimiter;
396
397 2
        return $this;
398
    }
399
400
    /**
401
     * Get enclosure.
402
     *
403
     * @return string
404
     */
405 1
    public function getEnclosure()
406
    {
407 1
        return $this->enclosure;
408
    }
409
410
    /**
411
     * Set enclosure.
412
     *
413
     * @param string $enclosure Enclosure, defaults to "
414
     *
415
     * @return $this
416
     */
417 2
    public function setEnclosure($enclosure)
418
    {
419 2
        if ($enclosure == '') {
420 1
            $enclosure = '"';
421
        }
422 2
        $this->enclosure = $enclosure;
423
424 2
        return $this;
425
    }
426
427
    /**
428
     * Get sheet index.
429
     *
430
     * @return int
431
     */
432 1
    public function getSheetIndex()
433
    {
434 1
        return $this->sheetIndex;
435
    }
436
437
    /**
438
     * Set sheet index.
439
     *
440
     * @param int $pValue Sheet index
441
     *
442
     * @return $this
443
     */
444 5
    public function setSheetIndex($pValue)
445
    {
446 5
        $this->sheetIndex = $pValue;
447
448 5
        return $this;
449
    }
450
451
    /**
452
     * Set Contiguous.
453
     *
454
     * @param bool $contiguous
455
     *
456
     * @return $this
457
     */
458 3
    public function setContiguous($contiguous)
459
    {
460 3
        $this->contiguous = (bool) $contiguous;
461
462 3
        return $this;
463
    }
464
465
    /**
466
     * Get Contiguous.
467
     *
468
     * @return bool
469
     */
470 1
    public function getContiguous()
471
    {
472 1
        return $this->contiguous;
473
    }
474
475
    /**
476
     * Set escape backslashes.
477
     *
478
     * @param string $escapeCharacter
479
     *
480
     * @return $this
481
     */
482 1
    public function setEscapeCharacter($escapeCharacter)
483
    {
484 1
        $this->escapeCharacter = $escapeCharacter;
485
486 1
        return $this;
487
    }
488
489
    /**
490
     * Get escape backslashes.
491
     *
492
     * @return string
493
     */
494 1
    public function getEscapeCharacter()
495
    {
496 1
        return $this->escapeCharacter;
497
    }
498
499
    /**
500
     * Can the current IReader read the file?
501
     *
502
     * @param string $pFilename
503
     *
504
     * @return bool
505
     */
506 48
    public function canRead($pFilename)
507
    {
508
        // Check if file exists
509
        try {
510 48
            $this->openFile($pFilename);
511 3
        } catch (\InvalidArgumentException $e) {
512 3
            return false;
513
        }
514
515 45
        fclose($this->fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

515
        fclose(/** @scrutinizer ignore-type */ $this->fileHandle);
Loading history...
516
517
        // Trust file extension if any
518 45
        $extension = strtolower(pathinfo($pFilename, PATHINFO_EXTENSION));
519 45
        if (in_array($extension, ['csv', 'tsv'])) {
520 40
            return true;
521
        }
522
523
        // Attempt to guess mimetype
524 5
        $type = mime_content_type($pFilename);
525
        $supportedTypes = [
526 5
            'text/csv',
527
            'text/plain',
528
            'inode/x-empty',
529
        ];
530
531 5
        return in_array($type, $supportedTypes, true);
532
    }
533
}
534