Completed
Push — develop ( e31878...02e176 )
by Adrien
58:36 queued 52:53
created

Csv::setInputEncoding()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 5
ccs 0
cts 3
cp 0
crap 2
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
6
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8
9
class Csv extends BaseReader
10
{
11
    /**
12
     * Input encoding.
13
     *
14
     * @var string
15
     */
16
    private $inputEncoding = 'UTF-8';
17
18
    /**
19
     * Delimiter.
20
     *
21
     * @var string
22
     */
23
    private $delimiter;
24
25
    /**
26
     * Enclosure.
27
     *
28
     * @var string
29
     */
30
    private $enclosure = '"';
31
32
    /**
33
     * Sheet index to read.
34
     *
35
     * @var int
36
     */
37
    private $sheetIndex = 0;
38
39
    /**
40
     * Load rows contiguously.
41
     *
42
     * @var bool
43
     */
44
    private $contiguous = false;
45
46
    /**
47
     * Row counter for loading rows contiguously.
48
     *
49
     * @var int
50
     */
51
    private $contiguousRow = -1;
52
53
    /**
54
     * Create a new CSV Reader instance.
55
     */
56 19
    public function __construct()
57
    {
58 19
        $this->readFilter = new DefaultReadFilter();
59 19
    }
60
61
    /**
62
     * Set input encoding.
63
     *
64
     * @param string $pValue Input encoding, eg: 'UTF-8'
65
     *
66
     * @return Csv
67
     */
68
    public function setInputEncoding($pValue)
69
    {
70
        $this->inputEncoding = $pValue;
71
72
        return $this;
73
    }
74
75
    /**
76
     * Get input encoding.
77
     *
78
     * @return string
79
     */
80
    public function getInputEncoding()
81
    {
82
        return $this->inputEncoding;
83
    }
84
85
    /**
86
     * Move filepointer past any BOM marker.
87
     */
88 10
    protected function skipBOM()
89
    {
90 10
        rewind($this->fileHandle);
91
92 10
        switch ($this->inputEncoding) {
93 10
            case 'UTF-8':
94 10
                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
95 10
                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
96
97 10
                break;
98
            case 'UTF-16LE':
99
                fgets($this->fileHandle, 3) == "\xFF\xFE" ?
100
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
101
102
                break;
103
            case 'UTF-16BE':
104
                fgets($this->fileHandle, 3) == "\xFE\xFF" ?
105
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
106
107
                break;
108
            case 'UTF-32LE':
109
                fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
110
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
111
112
                break;
113
            case 'UTF-32BE':
114
                fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
115
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
116
117
                break;
118
            default:
119
                break;
120
        }
121 10
    }
122
123
    /**
124
     * Identify any separator that is explicitly set in the file.
125
     */
126 10
    protected function checkSeparator()
127
    {
128 10
        $line = fgets($this->fileHandle);
129 10
        if ($line === false) {
0 ignored issues
show
introduced by
The condition $line === false can never be true.
Loading history...
130
            return;
131
        }
132
133 10
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
134
            $this->delimiter = substr($line, 4, 1);
135
136
            return;
137
        }
138
139 10
        return $this->skipBOM();
140
    }
141
142
    /**
143
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
144
     */
145 10
    protected function inferSeparator()
146
    {
147 10
        if ($this->delimiter !== null) {
0 ignored issues
show
introduced by
The condition $this->delimiter !== null can never be false.
Loading history...
148 4
            return;
149
        }
150
151 8
        $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
152 8
        $counts = [];
153 8
        foreach ($potentialDelimiters as $delimiter) {
154 8
            $counts[$delimiter] = [];
155
        }
156
157
        // Count how many times each of the potential delimiters appears in each line
158 8
        $numberLines = 0;
159 8
        while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
160
            // Drop everything that is enclosed to avoid counting false positives in enclosures
161 8
            $enclosure = preg_quote($this->enclosure, '/');
162 8
            $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line);
163
164 8
            $countLine = [];
165 8
            for ($i = strlen($line) - 1; $i >= 0; --$i) {
166 8
                $char = $line[$i];
167 8
                if (isset($counts[$char])) {
168 7
                    if (!isset($countLine[$char])) {
169 7
                        $countLine[$char] = 0;
170
                    }
171 7
                    ++$countLine[$char];
172
                }
173
            }
174 8
            foreach ($potentialDelimiters as $delimiter) {
175 8
                $counts[$delimiter][] = isset($countLine[$delimiter])
176 7
                    ? $countLine[$delimiter]
177 8
                    : 0;
178
            }
179
        }
180
181
        // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
182 8
        $meanSquareDeviations = [];
183 8
        $middleIdx = floor(($numberLines - 1) / 2);
184
185 8
        foreach ($potentialDelimiters as $delimiter) {
186 8
            $series = $counts[$delimiter];
187 8
            sort($series);
188
189 8
            $median = ($numberLines % 2)
190 5
                ? $series[$middleIdx]
191 8
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
192
193 8
            if ($median === 0) {
194 8
                continue;
195
            }
196
197 7
            $meanSquareDeviations[$delimiter] = array_reduce(
198 7
                $series,
199 7
                function ($sum, $value) use ($median) {
200 7
                    return $sum + pow($value - $median, 2);
201 7
                }
202 7
            ) / count($series);
203
        }
204
205
        // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
206 8
        $min = INF;
207 8
        foreach ($potentialDelimiters as $delimiter) {
208 8
            if (!isset($meanSquareDeviations[$delimiter])) {
209 8
                continue;
210
            }
211
212 7
            if ($meanSquareDeviations[$delimiter] < $min) {
213 7
                $min = $meanSquareDeviations[$delimiter];
214 7
                $this->delimiter = $delimiter;
215
            }
216
        }
217
218
        // If no delimiter could be detected, fall back to the default
219 8
        if ($this->delimiter === null) {
220 1
            $this->delimiter = reset($potentialDelimiters);
221
        }
222
223 8
        return $this->skipBOM();
224
    }
225
226
    /**
227
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
228
     *
229
     * @param string $pFilename
230
     *
231
     * @throws Exception
232
     *
233
     * @return array
234
     */
235
    public function listWorksheetInfo($pFilename)
236
    {
237
        // Open file
238
        if (!$this->canRead($pFilename)) {
239
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
240
        }
241
        $this->openFile($pFilename);
242
        $fileHandle = $this->fileHandle;
243
244
        // Skip BOM, if any
245
        $this->skipBOM();
246
        $this->checkSeparator();
247
        $this->inferSeparator();
248
249
        $worksheetInfo = [];
250
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
251
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
252
        $worksheetInfo[0]['lastColumnIndex'] = 0;
253
        $worksheetInfo[0]['totalRows'] = 0;
254
        $worksheetInfo[0]['totalColumns'] = 0;
255
256
        // Loop through each line of the file in turn
257
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
258
            ++$worksheetInfo[0]['totalRows'];
259
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
260
        }
261
262
        $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
263
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
264
265
        // Close file
266
        fclose($fileHandle);
267
268
        return $worksheetInfo;
269
    }
270
271
    /**
272
     * Loads Spreadsheet from file.
273
     *
274
     * @param string $pFilename
275
     *
276
     * @throws Exception
277
     *
278
     * @return Spreadsheet
279
     */
280 9
    public function load($pFilename)
281
    {
282
        // Create new Spreadsheet
283 9
        $spreadsheet = new Spreadsheet();
284
285
        // Load into this instance
286 9
        return $this->loadIntoExisting($pFilename, $spreadsheet);
287
    }
288
289
    /**
290
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
291
     *
292
     * @param string $pFilename
293
     * @param Spreadsheet $spreadsheet
294
     *
295
     * @throws Exception
296
     *
297
     * @return Spreadsheet
298
     */
299 10
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
300
    {
301 10
        $lineEnding = ini_get('auto_detect_line_endings');
302 10
        ini_set('auto_detect_line_endings', true);
303
304
        // Open file
305 10
        if (!$this->canRead($pFilename)) {
306
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
307
        }
308 10
        $this->openFile($pFilename);
309 10
        $fileHandle = $this->fileHandle;
310
311
        // Skip BOM, if any
312 10
        $this->skipBOM();
313 10
        $this->checkSeparator();
314 10
        $this->inferSeparator();
315
316
        // Create new PhpSpreadsheet object
317 10
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
318 2
            $spreadsheet->createSheet();
319
        }
320 10
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
321
322
        // Set our starting row based on whether we're in contiguous mode or not
323 10
        $currentRow = 1;
324 10
        if ($this->contiguous) {
325 1
            $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
326
        }
327
328
        // Loop through each line of the file in turn
329 10
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
330 10
            $columnLetter = 'A';
331 10
            foreach ($rowData as $rowDatum) {
332 10
                if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
333
                    // Convert encoding if necessary
334 10
                    if ($this->inputEncoding !== 'UTF-8') {
335
                        $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
336
                    }
337
338
                    // Set cell value
339 10
                    $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
340
                }
341 10
                ++$columnLetter;
342
            }
343 10
            ++$currentRow;
344
        }
345
346
        // Close file
347 10
        fclose($fileHandle);
348
349 10
        if ($this->contiguous) {
350 1
            $this->contiguousRow = $currentRow;
351
        }
352
353 10
        ini_set('auto_detect_line_endings', $lineEnding);
354
355
        // Return
356 10
        return $spreadsheet;
357
    }
358
359
    /**
360
     * Get delimiter.
361
     *
362
     * @return string
363
     */
364 5
    public function getDelimiter()
365
    {
366 5
        return $this->delimiter;
367
    }
368
369
    /**
370
     * Set delimiter.
371
     *
372
     * @param string $delimiter Delimiter, eg: ','
373
     *
374
     * @return CSV
375
     */
376 2
    public function setDelimiter($delimiter)
377
    {
378 2
        $this->delimiter = $delimiter;
379
380 2
        return $this;
381
    }
382
383
    /**
384
     * Get enclosure.
385
     *
386
     * @return string
387
     */
388
    public function getEnclosure()
389
    {
390
        return $this->enclosure;
391
    }
392
393
    /**
394
     * Set enclosure.
395
     *
396
     * @param string $enclosure Enclosure, defaults to "
397
     *
398
     * @return CSV
399
     */
400 1
    public function setEnclosure($enclosure)
401
    {
402 1
        if ($enclosure == '') {
403
            $enclosure = '"';
404
        }
405 1
        $this->enclosure = $enclosure;
406
407 1
        return $this;
408
    }
409
410
    /**
411
     * Get sheet index.
412
     *
413
     * @return int
414
     */
415
    public function getSheetIndex()
416
    {
417
        return $this->sheetIndex;
418
    }
419
420
    /**
421
     * Set sheet index.
422
     *
423
     * @param int $pValue Sheet index
424
     *
425
     * @return CSV
426
     */
427 3
    public function setSheetIndex($pValue)
428
    {
429 3
        $this->sheetIndex = $pValue;
430
431 3
        return $this;
432
    }
433
434
    /**
435
     * Set Contiguous.
436
     *
437
     * @param bool $contiguous
438
     *
439
     * @return Csv
440
     */
441 1
    public function setContiguous($contiguous)
442
    {
443 1
        $this->contiguous = (bool) $contiguous;
444 1
        if (!$contiguous) {
445
            $this->contiguousRow = -1;
446
        }
447
448 1
        return $this;
449
    }
450
451
    /**
452
     * Get Contiguous.
453
     *
454
     * @return bool
455
     */
456
    public function getContiguous()
457
    {
458
        return $this->contiguous;
459
    }
460
461
    /**
462
     * Can the current IReader read the file?
463
     *
464
     * @param string $pFilename
465
     *
466
     * @return bool
467
     */
468 18
    public function canRead($pFilename)
469
    {
470
        // Check if file exists
471
        try {
472 18
            $this->openFile($pFilename);
473
        } catch (Exception $e) {
474
            return false;
475
        }
476
477 18
        fclose($this->fileHandle);
478
479 18
        $type = mime_content_type($pFilename);
480
        $supportedTypes = [
481 18
            'text/csv',
482
            'text/plain',
483
            'inode/x-empty',
484
        ];
485
486 18
        return in_array($type, $supportedTypes, true);
487
    }
488
}
489