1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
6
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\StringHelper; |
7
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
8
|
|
|
|
9
|
|
|
class Csv extends BaseReader |
10
|
|
|
{ |
11
|
|
|
/** |
12
|
|
|
* Input encoding. |
13
|
|
|
* |
14
|
|
|
* @var string |
15
|
|
|
*/ |
16
|
|
|
private $inputEncoding = 'UTF-8'; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* Delimiter. |
20
|
|
|
* |
21
|
|
|
* @var string |
22
|
|
|
*/ |
23
|
|
|
private $delimiter; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* Enclosure. |
27
|
|
|
* |
28
|
|
|
* @var string |
29
|
|
|
*/ |
30
|
|
|
private $enclosure = '"'; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Sheet index to read. |
34
|
|
|
* |
35
|
|
|
* @var int |
36
|
|
|
*/ |
37
|
|
|
private $sheetIndex = 0; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Load rows contiguously. |
41
|
|
|
* |
42
|
|
|
* @var bool |
43
|
|
|
*/ |
44
|
|
|
private $contiguous = false; |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* Row counter for loading rows contiguously. |
48
|
|
|
* |
49
|
|
|
* @var int |
50
|
|
|
*/ |
51
|
|
|
private $contiguousRow = -1; |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* The character that can escape the enclosure. |
55
|
|
|
* |
56
|
|
|
* @var string |
57
|
|
|
*/ |
58
|
|
|
private $escapeCharacter = '\\'; |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Create a new CSV Reader instance. |
62
|
|
|
*/ |
63
|
20 |
|
public function __construct() |
64
|
|
|
{ |
65
|
20 |
|
$this->readFilter = new DefaultReadFilter(); |
66
|
20 |
|
} |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Set input encoding. |
70
|
|
|
* |
71
|
|
|
* @param string $pValue Input encoding, eg: 'UTF-8' |
72
|
|
|
* |
73
|
|
|
* @return Csv |
74
|
|
|
*/ |
75
|
|
|
public function setInputEncoding($pValue) |
76
|
|
|
{ |
77
|
|
|
$this->inputEncoding = $pValue; |
78
|
|
|
|
79
|
|
|
return $this; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* Get input encoding. |
84
|
|
|
* |
85
|
|
|
* @return string |
86
|
|
|
*/ |
87
|
|
|
public function getInputEncoding() |
88
|
|
|
{ |
89
|
|
|
return $this->inputEncoding; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Move filepointer past any BOM marker. |
94
|
|
|
*/ |
95
|
11 |
|
protected function skipBOM() |
96
|
|
|
{ |
97
|
11 |
|
rewind($this->fileHandle); |
98
|
|
|
|
99
|
11 |
|
switch ($this->inputEncoding) { |
100
|
11 |
|
case 'UTF-8': |
101
|
11 |
|
fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? |
102
|
11 |
|
fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); |
103
|
|
|
|
104
|
11 |
|
break; |
105
|
|
|
case 'UTF-16LE': |
106
|
|
|
fgets($this->fileHandle, 3) == "\xFF\xFE" ? |
107
|
|
|
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
108
|
|
|
|
109
|
|
|
break; |
110
|
|
|
case 'UTF-16BE': |
111
|
|
|
fgets($this->fileHandle, 3) == "\xFE\xFF" ? |
112
|
|
|
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
113
|
|
|
|
114
|
|
|
break; |
115
|
|
|
case 'UTF-32LE': |
116
|
|
|
fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? |
117
|
|
|
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
118
|
|
|
|
119
|
|
|
break; |
120
|
|
|
case 'UTF-32BE': |
121
|
|
|
fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? |
122
|
|
|
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
123
|
|
|
|
124
|
|
|
break; |
125
|
|
|
default: |
126
|
|
|
break; |
127
|
|
|
} |
128
|
11 |
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Identify any separator that is explicitly set in the file. |
132
|
|
|
*/ |
133
|
11 |
|
protected function checkSeparator() |
134
|
|
|
{ |
135
|
11 |
|
$line = fgets($this->fileHandle); |
136
|
11 |
|
if ($line === false) { |
137
|
|
|
return; |
138
|
|
|
} |
139
|
|
|
|
140
|
11 |
|
if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { |
141
|
|
|
$this->delimiter = substr($line, 4, 1); |
142
|
|
|
|
143
|
|
|
return; |
144
|
|
|
} |
145
|
|
|
|
146
|
11 |
|
return $this->skipBOM(); |
|
|
|
|
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
/** |
150
|
|
|
* Infer the separator if it isn't explicitly set in the file or specified by the user. |
151
|
|
|
*/ |
152
|
11 |
|
protected function inferSeparator() |
153
|
|
|
{ |
154
|
11 |
|
if ($this->delimiter !== null) { |
155
|
4 |
|
return; |
156
|
|
|
} |
157
|
|
|
|
158
|
9 |
|
$potentialDelimiters = [',', ';', "\t", '|', ':', ' ']; |
159
|
9 |
|
$counts = []; |
160
|
9 |
|
foreach ($potentialDelimiters as $delimiter) { |
161
|
9 |
|
$counts[$delimiter] = []; |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
// Count how many times each of the potential delimiters appears in each line |
165
|
9 |
|
$numberLines = 0; |
166
|
9 |
|
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { |
167
|
|
|
// Drop everything that is enclosed to avoid counting false positives in enclosures |
168
|
9 |
|
$enclosure = preg_quote($this->enclosure, '/'); |
169
|
9 |
|
$line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); |
170
|
|
|
|
171
|
9 |
|
$countLine = []; |
172
|
9 |
|
for ($i = strlen($line) - 1; $i >= 0; --$i) { |
173
|
9 |
|
$char = $line[$i]; |
174
|
9 |
|
if (isset($counts[$char])) { |
175
|
8 |
|
if (!isset($countLine[$char])) { |
176
|
8 |
|
$countLine[$char] = 0; |
177
|
|
|
} |
178
|
8 |
|
++$countLine[$char]; |
179
|
|
|
} |
180
|
|
|
} |
181
|
9 |
|
foreach ($potentialDelimiters as $delimiter) { |
182
|
9 |
|
$counts[$delimiter][] = isset($countLine[$delimiter]) |
183
|
8 |
|
? $countLine[$delimiter] |
184
|
9 |
|
: 0; |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
// Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) |
189
|
9 |
|
$meanSquareDeviations = []; |
190
|
9 |
|
$middleIdx = floor(($numberLines - 1) / 2); |
191
|
|
|
|
192
|
9 |
|
foreach ($potentialDelimiters as $delimiter) { |
193
|
9 |
|
$series = $counts[$delimiter]; |
194
|
9 |
|
sort($series); |
195
|
|
|
|
196
|
9 |
|
$median = ($numberLines % 2) |
197
|
5 |
|
? $series[$middleIdx] |
198
|
9 |
|
: ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; |
199
|
|
|
|
200
|
9 |
|
if ($median === 0) { |
201
|
9 |
|
continue; |
202
|
|
|
} |
203
|
|
|
|
204
|
8 |
|
$meanSquareDeviations[$delimiter] = array_reduce( |
205
|
8 |
|
$series, |
206
|
8 |
|
function ($sum, $value) use ($median) { |
207
|
8 |
|
return $sum + pow($value - $median, 2); |
208
|
8 |
|
} |
209
|
8 |
|
) / count($series); |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
// ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) |
213
|
9 |
|
$min = INF; |
214
|
9 |
|
foreach ($potentialDelimiters as $delimiter) { |
215
|
9 |
|
if (!isset($meanSquareDeviations[$delimiter])) { |
216
|
9 |
|
continue; |
217
|
|
|
} |
218
|
|
|
|
219
|
8 |
|
if ($meanSquareDeviations[$delimiter] < $min) { |
220
|
8 |
|
$min = $meanSquareDeviations[$delimiter]; |
221
|
8 |
|
$this->delimiter = $delimiter; |
222
|
|
|
} |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
// If no delimiter could be detected, fall back to the default |
226
|
9 |
|
if ($this->delimiter === null) { |
227
|
1 |
|
$this->delimiter = reset($potentialDelimiters); |
228
|
|
|
} |
229
|
|
|
|
230
|
9 |
|
return $this->skipBOM(); |
|
|
|
|
231
|
|
|
} |
232
|
|
|
|
233
|
|
|
/** |
234
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
235
|
|
|
* |
236
|
|
|
* @param string $pFilename |
237
|
|
|
* |
238
|
|
|
* @throws Exception |
239
|
|
|
* |
240
|
|
|
* @return array |
241
|
|
|
*/ |
242
|
|
|
public function listWorksheetInfo($pFilename) |
243
|
|
|
{ |
244
|
|
|
// Open file |
245
|
|
|
if (!$this->canRead($pFilename)) { |
246
|
|
|
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
247
|
|
|
} |
248
|
|
|
$this->openFile($pFilename); |
249
|
|
|
$fileHandle = $this->fileHandle; |
250
|
|
|
|
251
|
|
|
// Skip BOM, if any |
252
|
|
|
$this->skipBOM(); |
253
|
|
|
$this->checkSeparator(); |
254
|
|
|
$this->inferSeparator(); |
255
|
|
|
|
256
|
|
|
$worksheetInfo = []; |
257
|
|
|
$worksheetInfo[0]['worksheetName'] = 'Worksheet'; |
258
|
|
|
$worksheetInfo[0]['lastColumnLetter'] = 'A'; |
259
|
|
|
$worksheetInfo[0]['lastColumnIndex'] = 0; |
260
|
|
|
$worksheetInfo[0]['totalRows'] = 0; |
261
|
|
|
$worksheetInfo[0]['totalColumns'] = 0; |
262
|
|
|
|
263
|
|
|
// Loop through each line of the file in turn |
264
|
|
|
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
|
|
|
|
265
|
|
|
++$worksheetInfo[0]['totalRows']; |
266
|
|
|
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); |
270
|
|
|
$worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; |
271
|
|
|
|
272
|
|
|
// Close file |
273
|
|
|
fclose($fileHandle); |
|
|
|
|
274
|
|
|
|
275
|
|
|
return $worksheetInfo; |
276
|
|
|
} |
277
|
|
|
|
278
|
|
|
/** |
279
|
|
|
* Loads Spreadsheet from file. |
280
|
|
|
* |
281
|
|
|
* @param string $pFilename |
282
|
|
|
* |
283
|
|
|
* @throws Exception |
284
|
|
|
* |
285
|
|
|
* @return Spreadsheet |
286
|
|
|
*/ |
287
|
10 |
|
public function load($pFilename) |
288
|
|
|
{ |
289
|
|
|
// Create new Spreadsheet |
290
|
10 |
|
$spreadsheet = new Spreadsheet(); |
291
|
|
|
|
292
|
|
|
// Load into this instance |
293
|
10 |
|
return $this->loadIntoExisting($pFilename, $spreadsheet); |
294
|
|
|
} |
295
|
|
|
|
296
|
|
|
/** |
297
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
298
|
|
|
* |
299
|
|
|
* @param string $pFilename |
300
|
|
|
* @param Spreadsheet $spreadsheet |
301
|
|
|
* |
302
|
|
|
* @throws Exception |
303
|
|
|
* |
304
|
|
|
* @return Spreadsheet |
305
|
|
|
*/ |
306
|
11 |
|
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
307
|
|
|
{ |
308
|
11 |
|
$lineEnding = ini_get('auto_detect_line_endings'); |
309
|
11 |
|
ini_set('auto_detect_line_endings', true); |
|
|
|
|
310
|
|
|
|
311
|
|
|
// Open file |
312
|
11 |
|
if (!$this->canRead($pFilename)) { |
313
|
|
|
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
314
|
|
|
} |
315
|
11 |
|
$this->openFile($pFilename); |
316
|
11 |
|
$fileHandle = $this->fileHandle; |
317
|
|
|
|
318
|
|
|
// Skip BOM, if any |
319
|
11 |
|
$this->skipBOM(); |
320
|
11 |
|
$this->checkSeparator(); |
321
|
11 |
|
$this->inferSeparator(); |
322
|
|
|
|
323
|
|
|
// Create new PhpSpreadsheet object |
324
|
11 |
|
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
325
|
2 |
|
$spreadsheet->createSheet(); |
326
|
|
|
} |
327
|
11 |
|
$sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
328
|
|
|
|
329
|
|
|
// Set our starting row based on whether we're in contiguous mode or not |
330
|
11 |
|
$currentRow = 1; |
331
|
11 |
|
if ($this->contiguous) { |
332
|
1 |
|
$currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
// Loop through each line of the file in turn |
336
|
11 |
|
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
|
|
|
|
337
|
11 |
|
$columnLetter = 'A'; |
338
|
11 |
|
foreach ($rowData as $rowDatum) { |
339
|
11 |
|
if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
340
|
|
|
// Convert encoding if necessary |
341
|
11 |
|
if ($this->inputEncoding !== 'UTF-8') { |
342
|
|
|
$rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
// Set cell value |
346
|
11 |
|
$sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); |
347
|
|
|
} |
348
|
11 |
|
++$columnLetter; |
349
|
|
|
} |
350
|
11 |
|
++$currentRow; |
351
|
|
|
} |
352
|
|
|
|
353
|
|
|
// Close file |
354
|
11 |
|
fclose($fileHandle); |
|
|
|
|
355
|
|
|
|
356
|
11 |
|
if ($this->contiguous) { |
357
|
1 |
|
$this->contiguousRow = $currentRow; |
358
|
|
|
} |
359
|
|
|
|
360
|
11 |
|
ini_set('auto_detect_line_endings', $lineEnding); |
361
|
|
|
|
362
|
|
|
// Return |
363
|
11 |
|
return $spreadsheet; |
364
|
|
|
} |
365
|
|
|
|
366
|
|
|
/** |
367
|
|
|
* Get delimiter. |
368
|
|
|
* |
369
|
|
|
* @return string |
370
|
|
|
*/ |
371
|
5 |
|
public function getDelimiter() |
372
|
|
|
{ |
373
|
5 |
|
return $this->delimiter; |
374
|
|
|
} |
375
|
|
|
|
376
|
|
|
/** |
377
|
|
|
* Set delimiter. |
378
|
|
|
* |
379
|
|
|
* @param string $delimiter Delimiter, eg: ',' |
380
|
|
|
* |
381
|
|
|
* @return CSV |
382
|
|
|
*/ |
383
|
2 |
|
public function setDelimiter($delimiter) |
384
|
|
|
{ |
385
|
2 |
|
$this->delimiter = $delimiter; |
386
|
|
|
|
387
|
2 |
|
return $this; |
388
|
|
|
} |
389
|
|
|
|
390
|
|
|
/** |
391
|
|
|
* Get enclosure. |
392
|
|
|
* |
393
|
|
|
* @return string |
394
|
|
|
*/ |
395
|
|
|
public function getEnclosure() |
396
|
|
|
{ |
397
|
|
|
return $this->enclosure; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
/** |
401
|
|
|
* Set enclosure. |
402
|
|
|
* |
403
|
|
|
* @param string $enclosure Enclosure, defaults to " |
404
|
|
|
* |
405
|
|
|
* @return CSV |
406
|
|
|
*/ |
407
|
1 |
|
public function setEnclosure($enclosure) |
408
|
|
|
{ |
409
|
1 |
|
if ($enclosure == '') { |
410
|
|
|
$enclosure = '"'; |
411
|
|
|
} |
412
|
1 |
|
$this->enclosure = $enclosure; |
413
|
|
|
|
414
|
1 |
|
return $this; |
415
|
|
|
} |
416
|
|
|
|
417
|
|
|
/** |
418
|
|
|
* Get sheet index. |
419
|
|
|
* |
420
|
|
|
* @return int |
421
|
|
|
*/ |
422
|
|
|
public function getSheetIndex() |
423
|
|
|
{ |
424
|
|
|
return $this->sheetIndex; |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* Set sheet index. |
429
|
|
|
* |
430
|
|
|
* @param int $pValue Sheet index |
431
|
|
|
* |
432
|
|
|
* @return CSV |
433
|
|
|
*/ |
434
|
3 |
|
public function setSheetIndex($pValue) |
435
|
|
|
{ |
436
|
3 |
|
$this->sheetIndex = $pValue; |
437
|
|
|
|
438
|
3 |
|
return $this; |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
/** |
442
|
|
|
* Set Contiguous. |
443
|
|
|
* |
444
|
|
|
* @param bool $contiguous |
445
|
|
|
* |
446
|
|
|
* @return Csv |
447
|
|
|
*/ |
448
|
1 |
|
public function setContiguous($contiguous) |
449
|
|
|
{ |
450
|
1 |
|
$this->contiguous = (bool) $contiguous; |
451
|
1 |
|
if (!$contiguous) { |
452
|
|
|
$this->contiguousRow = -1; |
453
|
|
|
} |
454
|
|
|
|
455
|
1 |
|
return $this; |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* Get Contiguous. |
460
|
|
|
* |
461
|
|
|
* @return bool |
462
|
|
|
*/ |
463
|
|
|
public function getContiguous() |
464
|
|
|
{ |
465
|
|
|
return $this->contiguous; |
466
|
|
|
} |
467
|
|
|
|
468
|
|
|
/** |
469
|
|
|
* Set escape backslashes. |
470
|
|
|
* |
471
|
|
|
* @param string $escapeCharacter |
472
|
|
|
* |
473
|
|
|
* @return $this |
474
|
|
|
*/ |
475
|
1 |
|
public function setEscapeCharacter($escapeCharacter) |
476
|
|
|
{ |
477
|
1 |
|
$this->escapeCharacter = $escapeCharacter; |
478
|
|
|
|
479
|
1 |
|
return $this; |
480
|
|
|
} |
481
|
|
|
|
482
|
|
|
/** |
483
|
|
|
* Get escape backslashes. |
484
|
|
|
* |
485
|
|
|
* @return string |
486
|
|
|
*/ |
487
|
1 |
|
public function getEscapeCharacter() |
488
|
|
|
{ |
489
|
1 |
|
return $this->escapeCharacter; |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
/** |
493
|
|
|
* Can the current IReader read the file? |
494
|
|
|
* |
495
|
|
|
* @param string $pFilename |
496
|
|
|
* |
497
|
|
|
* @return bool |
498
|
|
|
*/ |
499
|
19 |
|
public function canRead($pFilename) |
500
|
|
|
{ |
501
|
|
|
// Check if file exists |
502
|
|
|
try { |
503
|
19 |
|
$this->openFile($pFilename); |
504
|
|
|
} catch (Exception $e) { |
505
|
|
|
return false; |
506
|
|
|
} |
507
|
|
|
|
508
|
19 |
|
fclose($this->fileHandle); |
|
|
|
|
509
|
|
|
|
510
|
19 |
|
$type = mime_content_type($pFilename); |
511
|
|
|
$supportedTypes = [ |
512
|
19 |
|
'text/csv', |
513
|
|
|
'text/plain', |
514
|
|
|
'inode/x-empty', |
515
|
|
|
]; |
516
|
|
|
|
517
|
19 |
|
return in_array($type, $supportedTypes, true); |
518
|
|
|
} |
519
|
|
|
} |
520
|
|
|
|
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.