Failed Conditions
Push — master ( 7635b3...69fc93 )
by Adrien
36s queued 21s
created

Csv::guessEncodingBom()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 9
nc 2
nop 1
dl 0
loc 13
ccs 10
cts 10
cp 1
crap 2
rs 9.9666
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6
use PhpOffice\PhpSpreadsheet\Reader\Csv\Delimiter;
7
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
8
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
9
use PhpOffice\PhpSpreadsheet\Spreadsheet;
10
11
class Csv extends BaseReader
12
{
13
    const DEFAULT_FALLBACK_ENCODING = 'CP1252';
14
    const GUESS_ENCODING = 'guess';
15
    const UTF8_BOM = "\xEF\xBB\xBF";
16
    const UTF8_BOM_LEN = 3;
17
    const UTF16BE_BOM = "\xfe\xff";
18
    const UTF16BE_BOM_LEN = 2;
19
    const UTF16BE_LF = "\x00\x0a";
20
    const UTF16LE_BOM = "\xff\xfe";
21
    const UTF16LE_BOM_LEN = 2;
22
    const UTF16LE_LF = "\x0a\x00";
23
    const UTF32BE_BOM = "\x00\x00\xfe\xff";
24
    const UTF32BE_BOM_LEN = 4;
25
    const UTF32BE_LF = "\x00\x00\x00\x0a";
26
    const UTF32LE_BOM = "\xff\xfe\x00\x00";
27
    const UTF32LE_BOM_LEN = 4;
28
    const UTF32LE_LF = "\x0a\x00\x00\x00";
29
30
    /**
31
     * Input encoding.
32
     *
33
     * @var string
34
     */
35
    private $inputEncoding = 'UTF-8';
36
37
    /**
38
     * Fallback encoding if guess strikes out.
39
     *
40
     * @var string
41
     */
42
    private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING;
43
44
    /**
45
     * Delimiter.
46
     *
47
     * @var ?string
48
     */
49
    private $delimiter;
50
51
    /**
52
     * Enclosure.
53
     *
54
     * @var string
55
     */
56
    private $enclosure = '"';
57
58
    /**
59
     * Sheet index to read.
60
     *
61
     * @var int
62
     */
63
    private $sheetIndex = 0;
64
65
    /**
66
     * Load rows contiguously.
67
     *
68
     * @var bool
69
     */
70
    private $contiguous = false;
71
72
    /**
73
     * The character that can escape the enclosure.
74
     *
75
     * @var string
76
     */
77
    private $escapeCharacter = '\\';
78
79
    /**
80
     * Callback for setting defaults in construction.
81
     *
82
     * @var ?callable
83
     */
84
    private static $constructorCallback;
85
86
    /**
87
     * Create a new CSV Reader instance.
88
     */
89 101
    public function __construct()
90
    {
91 101
        parent::__construct();
92 101
        $callback = self::$constructorCallback;
93 101
        if ($callback !== null) {
94 5
            $callback($this);
95
        }
96 101
    }
97
98
    /**
99
     * Set a callback to change the defaults.
100
     *
101
     * The callback must accept the Csv Reader object as the first parameter,
102
     * and it should return void.
103
     */
104 6
    public static function setConstructorCallback(?callable $callback): void
105
    {
106 6
        self::$constructorCallback = $callback;
107 6
    }
108
109 1
    public static function getConstructorCallback(): ?callable
110
    {
111 1
        return self::$constructorCallback;
112
    }
113
114 45
    public function setInputEncoding(string $encoding): self
115
    {
116 45
        $this->inputEncoding = $encoding;
117
118 45
        return $this;
119
    }
120
121 1
    public function getInputEncoding(): string
122
    {
123 1
        return $this->inputEncoding;
124
    }
125
126 5
    public function setFallbackEncoding(string $pValue): self
127
    {
128 5
        $this->fallbackEncoding = $pValue;
129
130 5
        return $this;
131
    }
132
133 1
    public function getFallbackEncoding(): string
134
    {
135 1
        return $this->fallbackEncoding;
136
    }
137
138
    /**
139
     * Move filepointer past any BOM marker.
140
     */
141 82
    protected function skipBOM(): void
142
    {
143 82
        rewind($this->fileHandle);
144
145 82
        if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
146 68
            rewind($this->fileHandle);
147
        }
148 82
    }
149
150
    /**
151
     * Identify any separator that is explicitly set in the file.
152
     */
153 82
    protected function checkSeparator(): void
154
    {
155 82
        $line = fgets($this->fileHandle);
156 82
        if ($line === false) {
157 1
            return;
158
        }
159
160 81
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
161 2
            $this->delimiter = substr($line, 4, 1);
162
163 2
            return;
164
        }
165
166 79
        $this->skipBOM();
167 79
    }
168
169
    /**
170
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
171
     */
172 82
    protected function inferSeparator(): void
173
    {
174 82
        if ($this->delimiter !== null) {
175 14
            return;
176
        }
177
178 71
        $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
179
180
        // If number of lines is 0, nothing to infer : fall back to the default
181 71
        if ($inferenceEngine->linesCounted() === 0) {
182 1
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
183 1
            $this->skipBOM();
184
185 1
            return;
186
        }
187
188 70
        $this->delimiter = $inferenceEngine->infer();
189
190
        // If no delimiter could be detected, fall back to the default
191 70
        if ($this->delimiter === null) {
192 6
            $this->delimiter = $inferenceEngine->getDefaultDelimiter();
193
        }
194
195 70
        $this->skipBOM();
196 70
    }
197
198
    /**
199
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
200
     */
201 11
    public function listWorksheetInfo(string $filename): array
202
    {
203
        // Open file
204 11
        $this->openFileOrMemory($filename);
205 10
        $fileHandle = $this->fileHandle;
206
207
        // Skip BOM, if any
208 10
        $this->skipBOM();
209 10
        $this->checkSeparator();
210 10
        $this->inferSeparator();
211
212 10
        $worksheetInfo = [];
213 10
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
214 10
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
215 10
        $worksheetInfo[0]['lastColumnIndex'] = 0;
216 10
        $worksheetInfo[0]['totalRows'] = 0;
217 10
        $worksheetInfo[0]['totalColumns'] = 0;
218
219
        // Loop through each line of the file in turn
220 10
        $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
221 10
        while (is_array($rowData)) {
222 10
            ++$worksheetInfo[0]['totalRows'];
223 10
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
224 10
            $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
225
        }
226
227 10
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
228 10
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
229
230
        // Close file
231 10
        fclose($fileHandle);
232
233 10
        return $worksheetInfo;
234
    }
235
236
    /**
237
     * Loads Spreadsheet from file.
238
     *
239
     * @return Spreadsheet
240
     */
241 71
    public function load(string $filename, int $flags = 0)
242
    {
243 71
        $this->processFlags($flags);
244
245
        // Create new Spreadsheet
246 71
        $spreadsheet = new Spreadsheet();
247
248
        // Load into this instance
249 71
        return $this->loadIntoExisting($filename, $spreadsheet);
250
    }
251
252 85
    private function openFileOrMemory(string $filename): void
253
    {
254
        // Open file
255 85
        $fhandle = $this->canRead($filename);
256 85
        if (!$fhandle) {
257 3
            throw new Exception($filename . ' is an Invalid Spreadsheet file.');
258
        }
259 82
        if ($this->inputEncoding === self::GUESS_ENCODING) {
260 17
            $this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
261
        }
262 82
        $this->openFile($filename);
263 82
        if ($this->inputEncoding !== 'UTF-8') {
264 36
            fclose($this->fileHandle);
265 36
            $entireFile = file_get_contents($filename);
266 36
            $this->fileHandle = fopen('php://memory', 'r+b');
267 36
            if ($this->fileHandle !== false && $entireFile !== false) {
268 36
                $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
269 36
                fwrite($this->fileHandle, $data);
270 36
                $this->skipBOM();
271
            }
272
        }
273 82
    }
274
275 74
    private static function setAutoDetect(?string $value): ?string
276
    {
277 74
        $retVal = null;
278 74
        if ($value !== null) {
279 74
            $retVal2 = @ini_set('auto_detect_line_endings', $value);
280 74
            if (is_string($retVal2)) {
281 74
                $retVal = $retVal2;
282
            }
283
        }
284
285 74
        return $retVal;
286
    }
287
288
    /**
289
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
290
     */
291 74
    public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet
292
    {
293
        // Deprecated in Php8.1
294 74
        $iniset = self::setAutoDetect('1');
295
296
        // Open file
297 74
        $this->openFileOrMemory($filename);
298 72
        $fileHandle = $this->fileHandle;
299
300
        // Skip BOM, if any
301 72
        $this->skipBOM();
302 72
        $this->checkSeparator();
303 72
        $this->inferSeparator();
304
305
        // Create new PhpSpreadsheet object
306 72
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
307 4
            $spreadsheet->createSheet();
308
        }
309 72
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
310
311
        // Set our starting row based on whether we're in contiguous mode or not
312 72
        $currentRow = 1;
313 72
        $outRow = 0;
314
315
        // Loop through each line of the file in turn
316 72
        $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
317 72
        while (is_array($rowData)) {
318 71
            $noOutputYet = true;
319 71
            $columnLetter = 'A';
320 71
            foreach ($rowData as $rowDatum) {
321 71
                self::convertBoolean($rowDatum);
322 71
                if ($rowDatum !== '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
323 71
                    if ($this->contiguous) {
324 3
                        if ($noOutputYet) {
325 3
                            $noOutputYet = false;
326 3
                            ++$outRow;
327
                        }
328
                    } else {
329 68
                        $outRow = $currentRow;
330
                    }
331
                    // Set cell value
332 71
                    $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum);
333
                }
334 71
                ++$columnLetter;
335
            }
336 71
            $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter);
337 71
            ++$currentRow;
338
        }
339
340
        // Close file
341 72
        fclose($fileHandle);
342
343 72
        self::setAutoDetect($iniset);
344
345
        // Return
346 72
        return $spreadsheet;
347
    }
348
349
    /**
350
     * Convert string true/false to boolean, and null to null-string.
351
     *
352
     * @param mixed $rowDatum
353
     */
354 71
    private static function convertBoolean(&$rowDatum): void
355
    {
356 71
        if (is_string($rowDatum)) {
357 71
            if (strcasecmp('true', $rowDatum) === 0) {
358 5
                $rowDatum = true;
359 71
            } elseif (strcasecmp('false', $rowDatum) === 0) {
360 71
                $rowDatum = false;
361
            }
362
        } elseif ($rowDatum === null) {
363
            $rowDatum = '';
364
        }
365 71
    }
366
367 14
    public function getDelimiter(): ?string
368
    {
369 14
        return $this->delimiter;
370
    }
371
372 10
    public function setDelimiter(?string $delimiter): self
373
    {
374 10
        $this->delimiter = $delimiter;
375
376 10
        return $this;
377
    }
378
379 2
    public function getEnclosure(): string
380
    {
381 2
        return $this->enclosure;
382
    }
383
384 9
    public function setEnclosure(string $enclosure): self
385
    {
386 9
        if ($enclosure == '') {
387 3
            $enclosure = '"';
388
        }
389 9
        $this->enclosure = $enclosure;
390
391 9
        return $this;
392
    }
393
394 1
    public function getSheetIndex(): int
395
    {
396 1
        return $this->sheetIndex;
397
    }
398
399 5
    public function setSheetIndex(int $indexValue): self
400
    {
401 5
        $this->sheetIndex = $indexValue;
402
403 5
        return $this;
404
    }
405
406 3
    public function setContiguous(bool $contiguous): self
407
    {
408 3
        $this->contiguous = (bool) $contiguous;
409
410 3
        return $this;
411
    }
412
413 1
    public function getContiguous(): bool
414
    {
415 1
        return $this->contiguous;
416
    }
417
418 8
    public function setEscapeCharacter(string $escapeCharacter): self
419
    {
420 8
        $this->escapeCharacter = $escapeCharacter;
421
422 8
        return $this;
423
    }
424
425 1
    public function getEscapeCharacter(): string
426
    {
427 1
        return $this->escapeCharacter;
428
    }
429
430
    /**
431
     * Can the current IReader read the file?
432
     */
433 98
    public function canRead(string $filename): bool
434
    {
435
        // Check if file exists
436
        try {
437 98
            $this->openFile($filename);
438 3
        } catch (ReaderException $e) {
439 3
            return false;
440
        }
441
442 95
        fclose($this->fileHandle);
443
444
        // Trust file extension if any
445 95
        $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
1 ignored issue
show
Bug introduced by
It seems like pathinfo($filename, PhpO...der\PATHINFO_EXTENSION) can also be of type array; however, parameter $string of strtolower() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

445
        $extension = strtolower(/** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_EXTENSION));
Loading history...
446 95
        if (in_array($extension, ['csv', 'tsv'])) {
447 77
            return true;
448
        }
449
450
        // Attempt to guess mimetype
451 18
        $type = mime_content_type($filename);
452
        $supportedTypes = [
453 18
            'application/csv',
454
            'text/csv',
455
            'text/plain',
456
            'inode/x-empty',
457
        ];
458
459 18
        return in_array($type, $supportedTypes, true);
460
    }
461
462 19
    private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
463
    {
464 19
        if ($encoding === '') {
465 19
            $pos = strpos($contents, $compare);
466 19
            if ($pos !== false && $pos % strlen($compare) === 0) {
467 9
                $encoding = $setEncoding;
468
            }
469
        }
470 19
    }
471
472 19
    private static function guessEncodingNoBom(string $filename): string
473
    {
474 19
        $encoding = '';
475 19
        $contents = file_get_contents($filename);
476 19
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
477 19
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
478 19
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
479 19
        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
480 19
        if ($encoding === '' && preg_match('//u', $contents) === 1) {
481 3
            $encoding = 'UTF-8';
482
        }
483
484 19
        return $encoding;
485
    }
486
487 29
    private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
488
    {
489 29
        if ($encoding === '') {
490 29
            if ($compare === substr($first4, 0, strlen($compare))) {
491 10
                $encoding = $setEncoding;
492
            }
493
        }
494 29
    }
495
496 29
    private static function guessEncodingBom(string $filename): string
497
    {
498 29
        $encoding = '';
499 29
        $first4 = file_get_contents($filename, false, null, 0, 4);
500 29
        if ($first4 !== false) {
501 29
            self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
502 29
            self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
503 29
            self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
504 29
            self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
505 29
            self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
506
        }
507
508 29
        return $encoding;
509
    }
510
511 29
    public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string
512
    {
513 29
        $encoding = self::guessEncodingBom($filename);
514 29
        if ($encoding === '') {
515 19
            $encoding = self::guessEncodingNoBom($filename);
516
        }
517
518 29
        return ($encoding === '') ? $dflt : $encoding;
519
    }
520
}
521