Completed
Push — develop ( 4fd8e7...d3e769 )
by Adrien
14:12 queued 07:04
created

Csv::setDelimiter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 3
nc 1
nop 1
dl 0
loc 6
ccs 0
cts 3
cp 0
crap 2
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace PhpOffice\PhpSpreadsheet\Reader;
4
5
use PhpOffice\PhpSpreadsheet\Cell;
6
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
7
use PhpOffice\PhpSpreadsheet\Spreadsheet;
8
9
class Csv extends BaseReader implements IReader
10
{
11
    /**
12
     * Input encoding.
13
     *
14
     * @var string
15
     */
16
    private $inputEncoding = 'UTF-8';
17
18
    /**
19
     * Delimiter.
20
     *
21
     * @var string
22
     */
23
    private $delimiter;
24
25
    /**
26
     * Enclosure.
27
     *
28
     * @var string
29
     */
30
    private $enclosure = '"';
31
32
    /**
33
     * Sheet index to read.
34
     *
35
     * @var int
36
     */
37
    private $sheetIndex = 0;
38
39
    /**
40
     * Load rows contiguously.
41
     *
42
     * @var bool
43
     */
44
    private $contiguous = false;
45
46
    /**
47
     * Row counter for loading rows contiguously.
48
     *
49
     * @var int
50
     */
51
    private $contiguousRow = -1;
52
53
    /**
54
     * Create a new CSV Reader instance.
55
     */
56 3
    public function __construct()
57
    {
58 3
        $this->readFilter = new DefaultReadFilter();
59 3
    }
60
61
    /**
62
     * Set input encoding.
63
     *
64
     * @param string $pValue Input encoding, eg: 'UTF-8'
65
     */
66
    public function setInputEncoding($pValue)
67
    {
68
        $this->inputEncoding = $pValue;
69
70
        return $this;
71
    }
72
73
    /**
74
     * Get input encoding.
75
     *
76
     * @return string
77
     */
78
    public function getInputEncoding()
79
    {
80
        return $this->inputEncoding;
81
    }
82
83
    /**
84
     * Move filepointer past any BOM marker.
85
     */
86
    protected function skipBOM()
87
    {
88
        rewind($this->fileHandle);
89
90
        switch ($this->inputEncoding) {
91 View Code Duplication
            case 'UTF-8':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
92
                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
93
                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
94
95
                break;
96 View Code Duplication
            case 'UTF-16LE':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
97
                fgets($this->fileHandle, 3) == "\xFF\xFE" ?
98
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
99
100
                break;
101 View Code Duplication
            case 'UTF-16BE':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
102
                fgets($this->fileHandle, 3) == "\xFE\xFF" ?
103
                    fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0);
104
105
                break;
106 View Code Duplication
            case 'UTF-32LE':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
107
                fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ?
108
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
109
110
                break;
111 View Code Duplication
            case 'UTF-32BE':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
112
                fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ?
113
                    fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0);
114
115
                break;
116
            default:
117
                break;
118
        }
119
    }
120
121
    /**
122
     * Identify any separator that is explicitly set in the file.
123
     */
124
    protected function checkSeparator()
125
    {
126
        $line = fgets($this->fileHandle);
127
        if ($line === false) {
128
            return;
129
        }
130
131
        if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
132
            $this->delimiter = substr($line, 4, 1);
133
134
            return;
135
        }
136
137
        return $this->skipBOM();
138
    }
139
140
    /**
141
     * Infer the separator if it isn't explicitly set in the file or specified by the user.
142
     */
143
    protected function inferSeparator()
144
    {
145
        if ($this->delimiter !== null) {
146
            return;
147
        }
148
149
        $potentialDelimiters = [',', ';', "\t", '|', ':', ' '];
150
        $counts = [];
151
        foreach ($potentialDelimiters as $delimiter) {
152
            $counts[$delimiter] = [];
153
        }
154
155
        // Count how many times each of the potential delimiters appears in each line
156
        $numberLines = 0;
157
        while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) {
158
            $countLine = [];
159
            for ($i = strlen($line) - 1; $i >= 0; --$i) {
160
                $char = $line[$i];
161
                if (isset($counts[$char])) {
162
                    if (!isset($countLine[$char])) {
163
                        $countLine[$char] = 0;
164
                    }
165
                    ++$countLine[$char];
166
                }
167
            }
168
            foreach ($potentialDelimiters as $delimiter) {
169
                $counts[$delimiter][] = isset($countLine[$delimiter])
170
                    ? $countLine[$delimiter]
171
                    : 0;
172
            }
173
        }
174
175
        // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently)
176
        $meanSquareDeviations = [];
177
        $middleIdx = floor(($numberLines - 1) / 2);
178
179
        foreach ($potentialDelimiters as $delimiter) {
180
            $series = $counts[$delimiter];
181
            sort($series);
182
183
            $median = ($numberLines % 2)
184
                ? $series[$middleIdx]
185
                : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2;
186
187
            if ($median === 0) {
188
                continue;
189
            }
190
191
            $meanSquareDeviations[$delimiter] = array_reduce(
192
                $series,
193
                function ($sum, $value) use ($median) {
194
                    return $sum + pow($value - $median, 2);
195
                }
196
            ) / count($series);
197
        }
198
199
        // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected)
200
        $min = INF;
201
        foreach ($potentialDelimiters as $delimiter) {
202
            if (!isset($meanSquareDeviations[$delimiter])) {
203
                continue;
204
            }
205
206
            if ($meanSquareDeviations[$delimiter] < $min) {
207
                $min = $meanSquareDeviations[$delimiter];
208
                $this->delimiter = $delimiter;
209
            }
210
        }
211
212
        // If no delimiter could be detected, fall back to the default
213
        if ($this->delimiter === null) {
214
            $this->delimiter = reset($potentialDelimiters);
0 ignored issues
show
Documentation Bug introduced by
It seems like reset($potentialDelimiters) can also be of type false. However, the property $delimiter is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
215
        }
216
217
        return $this->skipBOM();
218
    }
219
220
    /**
221
     * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns).
222
     *
223
     * @param string $pFilename
224
     *
225
     * @throws Exception
226
     */
227
    public function listWorksheetInfo($pFilename)
228
    {
229
        // Open file
230
        if (!$this->canRead($pFilename)) {
231
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
232
        }
233
        $this->openFile($pFilename);
234
        $fileHandle = $this->fileHandle;
235
236
        // Skip BOM, if any
237
        $this->skipBOM();
238
        $this->checkSeparator();
239
        $this->inferSeparator();
240
241
        $worksheetInfo = [];
242
        $worksheetInfo[0]['worksheetName'] = 'Worksheet';
243
        $worksheetInfo[0]['lastColumnLetter'] = 'A';
244
        $worksheetInfo[0]['lastColumnIndex'] = 0;
245
        $worksheetInfo[0]['totalRows'] = 0;
246
        $worksheetInfo[0]['totalColumns'] = 0;
247
248
        // Loop through each line of the file in turn
249
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
250
            ++$worksheetInfo[0]['totalRows'];
251
            $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
252
        }
253
254
        $worksheetInfo[0]['lastColumnLetter'] = Cell::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex']);
255
        $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1;
256
257
        // Close file
258
        fclose($fileHandle);
259
260
        return $worksheetInfo;
261
    }
262
263
    /**
264
     * Loads Spreadsheet from file.
265
     *
266
     * @param string $pFilename
267
     *
268
     * @throws Exception
269
     *
270
     * @return Spreadsheet
271
     */
272 2
    public function load($pFilename)
273
    {
274
        // Create new Spreadsheet
275 2
        $spreadsheet = new Spreadsheet();
276
277
        // Load into this instance
278
        return $this->loadIntoExisting($pFilename, $spreadsheet);
279
    }
280
281
    /**
282
     * Loads PhpSpreadsheet from file into PhpSpreadsheet instance.
283
     *
284
     * @param string $pFilename
285
     * @param Spreadsheet $spreadsheet
286
     *
287
     * @throws Exception
288
     *
289
     * @return Spreadsheet
290
     */
291
    public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet)
292
    {
293
        $lineEnding = ini_get('auto_detect_line_endings');
294
        ini_set('auto_detect_line_endings', true);
295
296
        // Open file
297
        if (!$this->canRead($pFilename)) {
298
            throw new Exception($pFilename . ' is an Invalid Spreadsheet file.');
299
        }
300
        $this->openFile($pFilename);
301
        $fileHandle = $this->fileHandle;
302
303
        // Skip BOM, if any
304
        $this->skipBOM();
305
        $this->checkSeparator();
306
        $this->inferSeparator();
307
308
        // Create new PhpSpreadsheet object
309
        while ($spreadsheet->getSheetCount() <= $this->sheetIndex) {
310
            $spreadsheet->createSheet();
311
        }
312
        $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex);
313
314
        // Set our starting row based on whether we're in contiguous mode or not
315
        $currentRow = 1;
316
        if ($this->contiguous) {
317
            $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow;
318
        }
319
320
        // Loop through each line of the file in turn
321
        while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
322
            $columnLetter = 'A';
323
            foreach ($rowData as $rowDatum) {
324
                if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) {
325
                    // Convert encoding if necessary
326
                    if ($this->inputEncoding !== 'UTF-8') {
327
                        $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding);
328
                    }
329
330
                    // Set cell value
331
                    $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum);
332
                }
333
                ++$columnLetter;
334
            }
335
            ++$currentRow;
336
        }
337
338
        // Close file
339
        fclose($fileHandle);
340
341
        if ($this->contiguous) {
342
            $this->contiguousRow = $currentRow;
343
        }
344
345
        ini_set('auto_detect_line_endings', $lineEnding);
346
347
        // Return
348
        return $spreadsheet;
349
    }
350
351
    /**
352
     * Get delimiter.
353
     *
354
     * @return string
355
     */
356 1
    public function getDelimiter()
357
    {
358 1
        return $this->delimiter;
359
    }
360
361
    /**
362
     * Set delimiter.
363
     *
364
     * @param string $delimiter Delimiter, eg: ','
365
     *
366
     * @return CSV
367
     */
368
    public function setDelimiter($delimiter)
369
    {
370
        $this->delimiter = $delimiter;
371
372
        return $this;
373
    }
374
375
    /**
376
     * Get enclosure.
377
     *
378
     * @return string
379
     */
380
    public function getEnclosure()
381
    {
382
        return $this->enclosure;
383
    }
384
385
    /**
386
     * Set enclosure.
387
     *
388
     * @param string $enclosure Enclosure, defaults to "
389
     *
390
     * @return CSV
391
     */
392
    public function setEnclosure($enclosure)
393
    {
394
        if ($enclosure == '') {
395
            $enclosure = '"';
396
        }
397
        $this->enclosure = $enclosure;
398
399
        return $this;
400
    }
401
402
    /**
403
     * Get sheet index.
404
     *
405
     * @return int
406
     */
407
    public function getSheetIndex()
408
    {
409
        return $this->sheetIndex;
410
    }
411
412
    /**
413
     * Set sheet index.
414
     *
415
     * @param int $pValue Sheet index
416
     *
417
     * @return CSV
418
     */
419
    public function setSheetIndex($pValue)
420
    {
421
        $this->sheetIndex = $pValue;
422
423
        return $this;
424
    }
425
426
    /**
427
     * Set Contiguous.
428
     *
429
     * @param bool $contiguous
430
     */
431 1
    public function setContiguous($contiguous)
432
    {
433 1
        $this->contiguous = (bool) $contiguous;
434 1
        if (!$contiguous) {
435
            $this->contiguousRow = -1;
436
        }
437
438 1
        return $this;
439
    }
440
441
    /**
442
     * Get Contiguous.
443
     *
444
     * @return bool
445
     */
446
    public function getContiguous()
447
    {
448
        return $this->contiguous;
449
    }
450
451
    /**
452
     * Can the current IReader read the file?
453
     *
454
     * @param string $pFilename
455
     *
456
     * @throws Exception
457
     *
458
     * @return bool
459
     */
460
    public function canRead($pFilename)
461
    {
462
        // Check if file exists
463
        try {
464
            $this->openFile($pFilename);
465
        } catch (Exception $e) {
466
            return false;
467
        }
468
469
        fclose($this->fileHandle);
470
471
        return true;
472
    }
473
}
474