1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Reader; |
4
|
|
|
|
5
|
|
|
use PhpOffice\PhpSpreadsheet\Cell; |
6
|
|
|
use PhpOffice\PhpSpreadsheet\Shared\StringHelper; |
7
|
|
|
use PhpOffice\PhpSpreadsheet\Spreadsheet; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* Copyright (c) 2006 - 2016 PhpSpreadsheet. |
11
|
|
|
* |
12
|
|
|
* This library is free software; you can redistribute it and/or |
13
|
|
|
* modify it under the terms of the GNU Lesser General Public |
14
|
|
|
* License as published by the Free Software Foundation; either |
15
|
|
|
* version 2.1 of the License, or (at your option) any later version. |
16
|
|
|
* |
17
|
|
|
* This library is distributed in the hope that it will be useful, |
18
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
19
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20
|
|
|
* Lesser General Public License for more details. |
21
|
|
|
* |
22
|
|
|
* You should have received a copy of the GNU Lesser General Public |
23
|
|
|
* License along with this library; if not, write to the Free Software |
24
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
25
|
|
|
* |
26
|
|
|
* @category PhpSpreadsheet |
27
|
|
|
* |
28
|
|
|
* @copyright Copyright (c) 2006 - 2016 PhpSpreadsheet (https://github.com/PHPOffice/PhpSpreadsheet) |
29
|
|
|
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL |
30
|
|
|
*/ |
31
|
|
|
class Csv extends BaseReader implements IReader |
32
|
|
|
{ |
33
|
|
|
/** |
34
|
|
|
* Input encoding. |
35
|
|
|
* |
36
|
|
|
* @var string |
37
|
|
|
*/ |
38
|
|
|
private $inputEncoding = 'UTF-8'; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* Delimiter. |
42
|
|
|
* |
43
|
|
|
* @var string |
44
|
|
|
*/ |
45
|
|
|
private $delimiter = null; |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* Enclosure. |
49
|
|
|
* |
50
|
|
|
* @var string |
51
|
|
|
*/ |
52
|
|
|
private $enclosure = '"'; |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Sheet index to read. |
56
|
|
|
* |
57
|
|
|
* @var int |
58
|
|
|
*/ |
59
|
|
|
private $sheetIndex = 0; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* Load rows contiguously. |
63
|
|
|
* |
64
|
|
|
* @var bool |
65
|
|
|
*/ |
66
|
|
|
private $contiguous = false; |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* Row counter for loading rows contiguously. |
70
|
|
|
* |
71
|
|
|
* @var int |
72
|
|
|
*/ |
73
|
|
|
private $contiguousRow = -1; |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Create a new CSV Reader instance. |
77
|
|
|
*/ |
78
|
3 |
|
public function __construct() |
79
|
|
|
{ |
80
|
3 |
|
$this->readFilter = new DefaultReadFilter(); |
81
|
3 |
|
} |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Set input encoding. |
85
|
|
|
* |
86
|
|
|
* @param string $pValue Input encoding, eg: 'UTF-8' |
87
|
|
|
*/ |
88
|
|
|
public function setInputEncoding($pValue) |
89
|
|
|
{ |
90
|
|
|
$this->inputEncoding = $pValue; |
91
|
|
|
|
92
|
|
|
return $this; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Get input encoding. |
97
|
|
|
* |
98
|
|
|
* @return string |
99
|
|
|
*/ |
100
|
|
|
public function getInputEncoding() |
101
|
|
|
{ |
102
|
|
|
return $this->inputEncoding; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Move filepointer past any BOM marker. |
107
|
|
|
*/ |
108
|
3 |
|
protected function skipBOM() |
109
|
|
|
{ |
110
|
3 |
|
rewind($this->fileHandle); |
111
|
|
|
|
112
|
3 |
|
switch ($this->inputEncoding) { |
113
|
3 |
View Code Duplication |
case 'UTF-8': |
|
|
|
|
114
|
3 |
|
fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? |
115
|
3 |
|
fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); |
116
|
3 |
|
break; |
117
|
|
View Code Duplication |
case 'UTF-16LE': |
|
|
|
|
118
|
|
|
fgets($this->fileHandle, 3) == "\xFF\xFE" ? |
119
|
|
|
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
120
|
|
|
break; |
121
|
|
View Code Duplication |
case 'UTF-16BE': |
|
|
|
|
122
|
|
|
fgets($this->fileHandle, 3) == "\xFE\xFF" ? |
123
|
|
|
fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
124
|
|
|
break; |
125
|
|
View Code Duplication |
case 'UTF-32LE': |
|
|
|
|
126
|
|
|
fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? |
127
|
|
|
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
128
|
|
|
break; |
129
|
|
View Code Duplication |
case 'UTF-32BE': |
|
|
|
|
130
|
|
|
fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? |
131
|
|
|
fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
132
|
|
|
break; |
133
|
|
|
default: |
134
|
|
|
break; |
135
|
|
|
} |
136
|
3 |
|
} |
137
|
|
|
|
138
|
|
|
/** |
139
|
|
|
* Identify any separator that is explicitly set in the file. |
140
|
|
|
*/ |
141
|
3 |
|
protected function checkSeparator() |
142
|
|
|
{ |
143
|
3 |
|
$line = fgets($this->fileHandle); |
144
|
3 |
|
if ($line === false) { |
145
|
|
|
return; |
146
|
|
|
} |
147
|
|
|
|
148
|
3 |
|
if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { |
149
|
|
|
$this->delimiter = substr($line, 4, 1); |
150
|
|
|
|
151
|
|
|
return; |
152
|
|
|
} |
153
|
|
|
|
154
|
3 |
|
return $this->skipBOM(); |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
/** |
158
|
|
|
* Infer the separator if it isn't explicitly set in the file or specified by the user. |
159
|
|
|
*/ |
160
|
3 |
|
protected function inferSeparator() |
161
|
|
|
{ |
162
|
3 |
|
if ($this->delimiter !== null) { |
163
|
1 |
|
return; |
164
|
|
|
} |
165
|
|
|
|
166
|
2 |
|
$potentialDelimiters = [',', ';', "\t", '|', ':', ' ']; |
167
|
2 |
|
$counts = []; |
168
|
2 |
|
foreach ($potentialDelimiters as $delimiter) { |
169
|
2 |
|
$counts[$delimiter] = []; |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
// Count how many times each of the potential delimiters appears in each line |
173
|
2 |
|
$numberLines = 0; |
174
|
2 |
|
while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { |
175
|
2 |
|
$countLine = []; |
176
|
2 |
|
for ($i = strlen($line) - 1; $i >= 0; --$i) { |
177
|
2 |
|
$char = $line[$i]; |
178
|
2 |
|
if (isset($counts[$char])) { |
179
|
2 |
|
if (!isset($countLine[$char])) { |
180
|
2 |
|
$countLine[$char] = 0; |
181
|
|
|
} |
182
|
2 |
|
++$countLine[$char]; |
183
|
|
|
} |
184
|
|
|
} |
185
|
2 |
|
foreach ($potentialDelimiters as $delimiter) { |
186
|
2 |
|
$counts[$delimiter][] = isset($countLine[$delimiter]) |
187
|
2 |
|
? $countLine[$delimiter] |
188
|
2 |
|
: 0; |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
// Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) |
193
|
2 |
|
$meanSquareDeviations = []; |
194
|
2 |
|
$middleIdx = floor(($numberLines - 1) / 2); |
195
|
|
|
|
196
|
2 |
|
foreach ($potentialDelimiters as $delimiter) { |
197
|
2 |
|
$series = $counts[$delimiter]; |
198
|
2 |
|
sort($series); |
199
|
|
|
|
200
|
2 |
|
$median = ($numberLines % 2) |
201
|
2 |
|
? $series[$middleIdx] |
202
|
2 |
|
: ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; |
203
|
|
|
|
204
|
2 |
|
if ($median === 0) { |
205
|
2 |
|
continue; |
206
|
|
|
} |
207
|
|
|
|
208
|
2 |
|
$meanSquareDeviations[$delimiter] = array_reduce( |
209
|
2 |
|
$series, |
210
|
2 |
|
function ($sum, $value) use ($median) { |
211
|
2 |
|
return $sum + pow($value - $median, 2); |
212
|
2 |
|
} |
213
|
2 |
|
) / count($series); |
214
|
|
|
} |
215
|
|
|
|
216
|
|
|
// ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) |
217
|
2 |
|
$min = INF; |
218
|
2 |
|
foreach ($potentialDelimiters as $delimiter) { |
219
|
2 |
|
if (!isset($meanSquareDeviations[$delimiter])) { |
220
|
2 |
|
continue; |
221
|
|
|
} |
222
|
|
|
|
223
|
2 |
|
if ($meanSquareDeviations[$delimiter] < $min) { |
224
|
2 |
|
$min = $meanSquareDeviations[$delimiter]; |
225
|
2 |
|
$this->delimiter = $delimiter; |
226
|
|
|
} |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
// If no delimiter could be detected, fall back to the default |
230
|
2 |
|
if ($this->delimiter === null) { |
231
|
|
|
$this->delimiter = reset($potentialDelimiters); |
|
|
|
|
232
|
|
|
} |
233
|
|
|
|
234
|
2 |
|
return $this->skipBOM(); |
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
/** |
238
|
|
|
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
239
|
|
|
* |
240
|
|
|
* @param string $pFilename |
241
|
|
|
* |
242
|
|
|
* @throws Exception |
243
|
|
|
*/ |
244
|
|
|
public function listWorksheetInfo($pFilename) |
245
|
|
|
{ |
246
|
|
|
// Open file |
247
|
|
|
if (!$this->canRead($pFilename)) { |
248
|
|
|
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
249
|
|
|
} |
250
|
|
|
$this->openFile($pFilename); |
251
|
|
|
$fileHandle = $this->fileHandle; |
252
|
|
|
|
253
|
|
|
// Skip BOM, if any |
254
|
|
|
$this->skipBOM(); |
255
|
|
|
$this->checkSeparator(); |
256
|
|
|
$this->inferSeparator(); |
257
|
|
|
|
258
|
|
|
$worksheetInfo = []; |
259
|
|
|
$worksheetInfo[0]['worksheetName'] = 'Worksheet'; |
260
|
|
|
$worksheetInfo[0]['lastColumnLetter'] = 'A'; |
261
|
|
|
$worksheetInfo[0]['lastColumnIndex'] = 0; |
262
|
|
|
$worksheetInfo[0]['totalRows'] = 0; |
263
|
|
|
$worksheetInfo[0]['totalColumns'] = 0; |
264
|
|
|
|
265
|
|
|
// Loop through each line of the file in turn |
266
|
|
|
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) { |
267
|
|
|
++$worksheetInfo[0]['totalRows']; |
268
|
|
|
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); |
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
$worksheetInfo[0]['lastColumnLetter'] = Cell::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex']); |
272
|
|
|
$worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; |
273
|
|
|
|
274
|
|
|
// Close file |
275
|
|
|
fclose($fileHandle); |
276
|
|
|
|
277
|
|
|
return $worksheetInfo; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
/** |
281
|
|
|
* Loads Spreadsheet from file. |
282
|
|
|
* |
283
|
|
|
* @param string $pFilename |
284
|
|
|
* |
285
|
|
|
* @throws Exception |
286
|
|
|
* |
287
|
|
|
* @return Spreadsheet |
288
|
|
|
*/ |
289
|
3 |
|
public function load($pFilename) |
290
|
|
|
{ |
291
|
|
|
// Create new Spreadsheet |
292
|
3 |
|
$spreadsheet = new Spreadsheet(); |
293
|
|
|
|
294
|
|
|
// Load into this instance |
295
|
3 |
|
return $this->loadIntoExisting($pFilename, $spreadsheet); |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
/** |
299
|
|
|
* Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
300
|
|
|
* |
301
|
|
|
* @param string $pFilename |
302
|
|
|
* @param Spreadsheet $spreadsheet |
303
|
|
|
* |
304
|
|
|
* @throws Exception |
305
|
|
|
* |
306
|
|
|
* @return Spreadsheet |
307
|
|
|
*/ |
308
|
3 |
|
public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
309
|
|
|
{ |
310
|
3 |
|
$lineEnding = ini_get('auto_detect_line_endings'); |
311
|
3 |
|
ini_set('auto_detect_line_endings', true); |
312
|
|
|
|
313
|
|
|
// Open file |
314
|
3 |
|
if (!$this->canRead($pFilename)) { |
315
|
|
|
throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
316
|
|
|
} |
317
|
3 |
|
$this->openFile($pFilename); |
318
|
3 |
|
$fileHandle = $this->fileHandle; |
319
|
|
|
|
320
|
|
|
// Skip BOM, if any |
321
|
3 |
|
$this->skipBOM(); |
322
|
3 |
|
$this->checkSeparator(); |
323
|
3 |
|
$this->inferSeparator(); |
324
|
|
|
|
325
|
|
|
// Create new PhpSpreadsheet object |
326
|
3 |
|
while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
327
|
|
|
$spreadsheet->createSheet(); |
328
|
|
|
} |
329
|
3 |
|
$sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
330
|
|
|
|
331
|
|
|
// Set our starting row based on whether we're in contiguous mode or not |
332
|
3 |
|
$currentRow = 1; |
333
|
3 |
|
if ($this->contiguous) { |
334
|
|
|
$currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; |
335
|
|
|
} |
336
|
|
|
|
337
|
|
|
// Loop through each line of the file in turn |
338
|
3 |
|
while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) { |
339
|
3 |
|
$columnLetter = 'A'; |
340
|
3 |
|
foreach ($rowData as $rowDatum) { |
341
|
3 |
|
if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
342
|
|
|
// Convert encoding if necessary |
343
|
3 |
|
if ($this->inputEncoding !== 'UTF-8') { |
344
|
|
|
$rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); |
345
|
|
|
} |
346
|
|
|
|
347
|
|
|
// Set cell value |
348
|
3 |
|
$sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); |
349
|
|
|
} |
350
|
3 |
|
++$columnLetter; |
351
|
|
|
} |
352
|
3 |
|
++$currentRow; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
// Close file |
356
|
3 |
|
fclose($fileHandle); |
357
|
|
|
|
358
|
3 |
|
if ($this->contiguous) { |
359
|
|
|
$this->contiguousRow = $currentRow; |
360
|
|
|
} |
361
|
|
|
|
362
|
3 |
|
ini_set('auto_detect_line_endings', $lineEnding); |
363
|
|
|
|
364
|
|
|
// Return |
365
|
3 |
|
return $spreadsheet; |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
/** |
369
|
|
|
* Get delimiter. |
370
|
|
|
* |
371
|
|
|
* @return string |
372
|
|
|
*/ |
373
|
1 |
|
public function getDelimiter() |
374
|
|
|
{ |
375
|
1 |
|
return $this->delimiter; |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
/** |
379
|
|
|
* Set delimiter. |
380
|
|
|
* |
381
|
|
|
* @param string $delimiter Delimiter, eg: ',' |
382
|
|
|
* |
383
|
|
|
* @return CSV |
384
|
|
|
*/ |
385
|
1 |
|
public function setDelimiter($delimiter) |
386
|
|
|
{ |
387
|
1 |
|
$this->delimiter = $delimiter; |
388
|
|
|
|
389
|
1 |
|
return $this; |
390
|
|
|
} |
391
|
|
|
|
392
|
|
|
/** |
393
|
|
|
* Get enclosure. |
394
|
|
|
* |
395
|
|
|
* @return string |
396
|
|
|
*/ |
397
|
|
|
public function getEnclosure() |
398
|
|
|
{ |
399
|
|
|
return $this->enclosure; |
400
|
|
|
} |
401
|
|
|
|
402
|
|
|
/** |
403
|
|
|
* Set enclosure. |
404
|
|
|
* |
405
|
|
|
* @param string $enclosure Enclosure, defaults to " |
406
|
|
|
* |
407
|
|
|
* @return CSV |
408
|
|
|
*/ |
409
|
1 |
|
public function setEnclosure($enclosure) |
410
|
|
|
{ |
411
|
1 |
|
if ($enclosure == '') { |
412
|
|
|
$enclosure = '"'; |
413
|
|
|
} |
414
|
1 |
|
$this->enclosure = $enclosure; |
415
|
|
|
|
416
|
1 |
|
return $this; |
417
|
|
|
} |
418
|
|
|
|
419
|
|
|
/** |
420
|
|
|
* Get sheet index. |
421
|
|
|
* |
422
|
|
|
* @return int |
423
|
|
|
*/ |
424
|
|
|
public function getSheetIndex() |
425
|
|
|
{ |
426
|
|
|
return $this->sheetIndex; |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
/** |
430
|
|
|
* Set sheet index. |
431
|
|
|
* |
432
|
|
|
* @param int $pValue Sheet index |
433
|
|
|
* |
434
|
|
|
* @return CSV |
435
|
|
|
*/ |
436
|
1 |
|
public function setSheetIndex($pValue) |
437
|
|
|
{ |
438
|
1 |
|
$this->sheetIndex = $pValue; |
439
|
|
|
|
440
|
1 |
|
return $this; |
441
|
|
|
} |
442
|
|
|
|
443
|
|
|
/** |
444
|
|
|
* Set Contiguous. |
445
|
|
|
* |
446
|
|
|
* @param bool $contiguous |
447
|
|
|
*/ |
448
|
|
|
public function setContiguous($contiguous) |
449
|
|
|
{ |
450
|
|
|
$this->contiguous = (bool) $contiguous; |
451
|
|
|
if (!$contiguous) { |
452
|
|
|
$this->contiguousRow = -1; |
453
|
|
|
} |
454
|
|
|
|
455
|
|
|
return $this; |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* Get Contiguous. |
460
|
|
|
* |
461
|
|
|
* @return bool |
462
|
|
|
*/ |
463
|
|
|
public function getContiguous() |
464
|
|
|
{ |
465
|
|
|
return $this->contiguous; |
466
|
|
|
} |
467
|
|
|
|
468
|
|
|
/** |
469
|
|
|
* Can the current IReader read the file? |
470
|
|
|
* |
471
|
|
|
* @param string $pFilename |
472
|
|
|
* |
473
|
|
|
* @throws Exception |
474
|
|
|
* |
475
|
|
|
* @return bool |
476
|
|
|
*/ |
477
|
3 |
|
public function canRead($pFilename) |
478
|
|
|
{ |
479
|
|
|
// Check if file exists |
480
|
|
|
try { |
481
|
3 |
|
$this->openFile($pFilename); |
482
|
|
|
} catch (Exception $e) { |
483
|
|
|
return false; |
484
|
|
|
} |
485
|
|
|
|
486
|
3 |
|
fclose($this->fileHandle); |
487
|
|
|
|
488
|
3 |
|
return true; |
489
|
|
|
} |
490
|
|
|
} |
491
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.