Completed
Push — develop ( 0c7df7...c72383 )
by Adrien
68:50 queued 61:57
created

Csv::setEnclosure()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2.032

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 1
dl 0
loc 8
ccs 4
cts 5
cp 0.8
crap 2.032
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8
9
class Csv extends BaseReader
10
{
11
    /**
12
     * Input encoding.
13
     *
14
     * @var string
15
     */
16
    private $inputEncoding = 'UTF-8';
17
18
    /**
19
     * Delimiter.
20
     *
21
     * @var string
22
     */
23
    private $delimiter;
24
25
    /**
26
     * Enclosure.
27
     *
28
     * @var string
29
     */
30
    private $enclosure = '"';
31
32
    /**
33
     * Sheet index to read.
34
     *
35
     * @var int
36
     */
37
    private $sheetIndex = 0;
38
39
    /**
40
     * Load rows contiguously.
41
     *
42
     * @var bool
43
     */
44
    private $contiguous = false;
45
46
    /**
47
     * Row counter for loading rows contiguously.
48
     *
49
     * @var int
50
     */
51
    private $contiguousRow = -1;
52
53
    /**
54
     * The character that can escape the enclosure.
55
     *
56
     * @var string
57
     */
58
    private $escapeCharacter = '\\';
59
60
    /**
61
     * Create a new CSV Reader instance.
62
     */
63 20
    public function __construct()
64
    {
65 20
        $this->readFilter = new DefaultReadFilter();
66 20
    }
67
68
    /**
69
     * Set input encoding.
70
     *
71
     * @param string $pValue Input encoding, eg: 'UTF-8'
72
     *
73
     * @return Csv
74
     */
75
    public function setInputEncoding($pValue)
76
    {
77
        $this->inputEncoding = $pValue;
78
79
        return $this;
80
    }
81
82
    /**
83
     * Get input encoding.
84
     *
85
     * @return string
86
     */
87
    public function getInputEncoding()
88
    {
89
        return $this->inputEncoding;
90
    }
91
92
    /**
93
     * Move filepointer past any BOM marker.
94
     */
95 11
    protected function skipBOM()
96
    {
97 11
        rewind($this->fileHandle);
98
99 11
        switch ($this->inputEncoding) {
100 11
            case 'UTF-8':
101 11
                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
102 11
                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
103
104 11
                break;
105
            case 'UTF-16LE':
106
                fgets($this->fileHandle, 3) == "\xFF\xFE" ?
107
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
108
109
                break;
110
            case 'UTF-16BE':
111
                fgets($this->fileHandle, 3) == "\xFE\xFF" ?
112
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
113
114
                break;
115
            case 'UTF-32LE':
116
                fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
117
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
118
119
                break;
120
            case 'UTF-32BE':
121
                fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
122
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
123
124
                break;
125
            default:
126
                break;
127
        }
128 11
    }
129
130
    /**
131
     * Identify any separator that is explicitly set in the file.
132
     */
133 11
    protected function checkSeparator()
134
    {
135 11
        $line = fgets($this->fileHandle);
136 11
        if ($line === false) {
137
            return;
138
        }
139
140 11
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
141
            $this->delimiter = substr($line, 4, 1);
142
143
            return;
144
        }
145
146 11
        return $this->skipBOM();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->skipBOM() targeting PhpOffice\PhpSpreadsheet\Reader\Csv::skipBOM() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
147
    }
148
149
    /**
150
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
151
     */
152 11
    protected function inferSeparator()
153
    {
154 11
        if ($this->delimiter !== null) {
155 4
            return;
156
        }
157
158 9
        $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
159 9
        $counts = [];
160 9
        foreach ($potentialDelimiters as $delimiter) {
161 9
            $counts[$delimiter] = [];
162
        }
163
164
        // Count how many times each of the potential delimiters appears in each line
165 9
        $numberLines = 0;
166 9
        while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
167
            // Drop everything that is enclosed to avoid counting false positives in enclosures
168 9
            $enclosure = preg_quote($this->enclosure, '/');
169 9
            $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
170
171 9
            $countLine = [];
172 9
            for ($i = strlen($line) - 1; $i >= 0; --$i) {
173 9
                $char = $line[$i];
174 9
                if (isset($counts[$char])) {
175 8
                    if (!isset($countLine[$char])) {
176 8
                        $countLine[$char] = 0;
177
                    }
178 8
                    ++$countLine[$char];
179
                }
180
            }
181 9
            foreach ($potentialDelimiters as $delimiter) {
182 9
                $counts[$delimiter][] = isset($countLine[$delimiter])
183 8
                    ? $countLine[$delimiter]
184 9
                    : 0;
185
            }
186
        }
187
188
        // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
189 9
        $meanSquareDeviations = [];
190 9
        $middleIdx = floor(($numberLines - 1) / 2);
191
192 9
        foreach ($potentialDelimiters as $delimiter) {
193 9
            $series = $counts[$delimiter];
194 9
            sort($series);
195
196 9
            $median = ($numberLines % 2)
197 5
                ? $series[$middleIdx]
198 9
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
199
200 9
            if ($median === 0) {
201 9
                continue;
202
            }
203
204 8
            $meanSquareDeviations[$delimiter] = array_reduce(
205 8
                $series,
206 8
                function ($sum, $value) use ($median) {
207 8
                    return $sum + pow($value - $median, 2);
208 8
                }
209 8
            ) / count($series);
210
        }
211
212
        // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
213 9
        $min = INF;
214 9
        foreach ($potentialDelimiters as $delimiter) {
215 9
            if (!isset($meanSquareDeviations[$delimiter])) {
216 9
                continue;
217
            }
218
219 8
            if ($meanSquareDeviations[$delimiter] < $min) {
220 8
                $min = $meanSquareDeviations[$delimiter];
221 8
                $this->delimiter = $delimiter;
222
            }
223
        }
224
225
        // If no delimiter could be detected, fall back to the default
226 9
        if ($this->delimiter === null) {
227 1
            $this->delimiter = reset($potentialDelimiters);
228
        }
229
230 9
        return $this->skipBOM();
0 ignored issues
show
Bug introduced by
Are you sure the usage of $this->skipBOM() targeting PhpOffice\PhpSpreadsheet\Reader\Csv::skipBOM() seems to always return null.

This check looks for function or method calls that always return null and whose return value is used.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
if ($a->getObject()) {

The method getObject() can return nothing but null, so it makes no sense to use the return value.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
231
    }
232
233
    /**
234
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
235
     *
236
     * @param string $pFilename
237
     *
238
     * @throws Exception
239
     *
240
     * @return array
241
     */
242
    public function listWorksheetInfo($pFilename)
243
    {
244
        // Open file
245
        if (!$this->canRead($pFilename)) {
246
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
247
        }
248
        $this->openFile($pFilename);
249
        $fileHandle = $this->fileHandle;
250
251
        // Skip BOM, if any
252
        $this->skipBOM();
253
        $this->checkSeparator();
254
        $this->inferSeparator();
255
256
        $worksheetInfo = [];
257
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
258
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
259
        $worksheetInfo[0]['lastColumnIndex'] = 0;
260
        $worksheetInfo[0]['totalRows'] = 0;
261
        $worksheetInfo[0]['totalColumns'] = 0;
262
263
        // Loop through each line of the file in turn
264
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

264
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
265
            ++$worksheetInfo[0]['totalRows'];
266
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
267
        }
268
269
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
270
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
271
272
        // Close file
273
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

273
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
274
275
        return $worksheetInfo;
276
    }
277
278
    /**
279
     * Loads Spreadsheet from file.
280
     *
281
     * @param string $pFilename
282
     *
283
     * @throws Exception
284
     *
285
     * @return Spreadsheet
286
     */
287 10
    public function load($pFilename)
288
    {
289
        // Create new Spreadsheet
290 10
        $spreadsheet = new Spreadsheet();
291
292
        // Load into this instance
293 10
        return $this->loadIntoExisting($pFilename, $spreadsheet);
294
    }
295
296
    /**
297
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
298
     *
299
     * @param string $pFilename
300
     * @param Spreadsheet $spreadsheet
301
     *
302
     * @throws Exception
303
     *
304
     * @return Spreadsheet
305
     */
306 11
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
307
    {
308 11
        $lineEnding = ini_get('auto_detect_line_endings');
309 11
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

309
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
310
311
        // Open file
312 11
        if (!$this->canRead($pFilename)) {
313
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
314
        }
315 11
        $this->openFile($pFilename);
316 11
        $fileHandle = $this->fileHandle;
317
318
        // Skip BOM, if any
319 11
        $this->skipBOM();
320 11
        $this->checkSeparator();
321 11
        $this->inferSeparator();
322
323
        // Create new PhpSpreadsheet object
324 11
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
325 2
            $spreadsheet->createSheet();
326
        }
327 11
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
328
329
        // Set our starting row based on whether we're in contiguous mode or not
330 11
        $currentRow = 1;
331 11
        if ($this->contiguous) {
332 1
            $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
333
        }
334
335
        // Loop through each line of the file in turn
336 11
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fgetcsv() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

336
        while (($rowData = fgetcsv(/** @scrutinizer ignore-type */ $fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) {
Loading history...
337 11
            $columnLetter = 'A';
338 11
            foreach ($rowData as $rowDatum) {
339 11
                if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
340
                    // Convert encoding if necessary
341 11
                    if ($this->inputEncoding !== 'UTF-8') {
342
                        $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
343
                    }
344
345
                    // Set cell value
346 11
                    $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
347
                }
348 11
                ++$columnLetter;
349
            }
350 11
            ++$currentRow;
351
        }
352
353
        // Close file
354 11
        fclose($fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

354
        fclose(/** @scrutinizer ignore-type */ $fileHandle);
Loading history...
355
356 11
        if ($this->contiguous) {
357 1
            $this->contiguousRow = $currentRow;
358
        }
359
360 11
        ini_set('auto_detect_line_endings', $lineEnding);
361
362
        // Return
363 11
        return $spreadsheet;
364
    }
365
366
    /**
367
     * Get delimiter.
368
     *
369
     * @return string
370
     */
371 5
    public function getDelimiter()
372
    {
373 5
        return $this->delimiter;
374
    }
375
376
    /**
377
     * Set delimiter.
378
     *
379
     * @param string $delimiter Delimiter, eg: ','
380
     *
381
     * @return CSV
382
     */
383 2
    public function setDelimiter($delimiter)
384
    {
385 2
        $this->delimiter = $delimiter;
386
387 2
        return $this;
388
    }
389
390
    /**
391
     * Get enclosure.
392
     *
393
     * @return string
394
     */
395
    public function getEnclosure()
396
    {
397
        return $this->enclosure;
398
    }
399
400
    /**
401
     * Set enclosure.
402
     *
403
     * @param string $enclosure Enclosure, defaults to "
404
     *
405
     * @return CSV
406
     */
407 1
    public function setEnclosure($enclosure)
408
    {
409 1
        if ($enclosure == '') {
410
            $enclosure = '"';
411
        }
412 1
        $this->enclosure = $enclosure;
413
414 1
        return $this;
415
    }
416
417
    /**
418
     * Get sheet index.
419
     *
420
     * @return int
421
     */
422
    public function getSheetIndex()
423
    {
424
        return $this->sheetIndex;
425
    }
426
427
    /**
428
     * Set sheet index.
429
     *
430
     * @param int $pValue Sheet index
431
     *
432
     * @return CSV
433
     */
434 3
    public function setSheetIndex($pValue)
435
    {
436 3
        $this->sheetIndex = $pValue;
437
438 3
        return $this;
439
    }
440
441
    /**
442
     * Set Contiguous.
443
     *
444
     * @param bool $contiguous
445
     *
446
     * @return Csv
447
     */
448 1
    public function setContiguous($contiguous)
449
    {
450 1
        $this->contiguous = (bool) $contiguous;
451 1
        if (!$contiguous) {
452
            $this->contiguousRow = -1;
453
        }
454
455 1
        return $this;
456
    }
457
458
    /**
459
     * Get Contiguous.
460
     *
461
     * @return bool
462
     */
463
    public function getContiguous()
464
    {
465
        return $this->contiguous;
466
    }
467
468
    /**
469
     * Set escape backslashes.
470
     *
471
     * @param string $escapeCharacter
472
     *
473
     * @return $this
474
     */
475 1
    public function setEscapeCharacter($escapeCharacter)
476
    {
477 1
        $this->escapeCharacter = $escapeCharacter;
478
479 1
        return $this;
480
    }
481
482
    /**
483
     * Get escape backslashes.
484
     *
485
     * @return string
486
     */
487 1
    public function getEscapeCharacter()
488
    {
489 1
        return $this->escapeCharacter;
490
    }
491
492
    /**
493
     * Can the current IReader read the file?
494
     *
495
     * @param string $pFilename
496
     *
497
     * @return bool
498
     */
499 19
    public function canRead($pFilename)
500
    {
501
        // Check if file exists
502
        try {
503 19
            $this->openFile($pFilename);
504
        } catch (Exception $e) {
505
            return false;
506
        }
507
508 19
        fclose($this->fileHandle);
1 ignored issue
show
Bug introduced by
It seems like $this->fileHandle can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

508
        fclose(/** @scrutinizer ignore-type */ $this->fileHandle);
Loading history...
509
510 19
        $type = mime_content_type($pFilename);
511
        $supportedTypes = [
512 19
            'text/csv',
513
            'text/plain',
514
            'inode/x-empty',
515
        ];
516
517 19
        return in_array($type, $supportedTypes, true);
518
    }
519
}
520