Total Complexity | 72 |
Total Lines | 508 |
Duplicated Lines | 0 % |
Coverage | 99.05% |
Changes | 0 |
Complex classes like Csv often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Csv, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
11 | class Csv extends BaseReader |
||
12 | { |
||
13 | const DEFAULT_FALLBACK_ENCODING = 'CP1252'; |
||
14 | const GUESS_ENCODING = 'guess'; |
||
15 | const UTF8_BOM = "\xEF\xBB\xBF"; |
||
16 | const UTF8_BOM_LEN = 3; |
||
17 | const UTF16BE_BOM = "\xfe\xff"; |
||
18 | const UTF16BE_BOM_LEN = 2; |
||
19 | const UTF16BE_LF = "\x00\x0a"; |
||
20 | const UTF16LE_BOM = "\xff\xfe"; |
||
21 | const UTF16LE_BOM_LEN = 2; |
||
22 | const UTF16LE_LF = "\x0a\x00"; |
||
23 | const UTF32BE_BOM = "\x00\x00\xfe\xff"; |
||
24 | const UTF32BE_BOM_LEN = 4; |
||
25 | const UTF32BE_LF = "\x00\x00\x00\x0a"; |
||
26 | const UTF32LE_BOM = "\xff\xfe\x00\x00"; |
||
27 | const UTF32LE_BOM_LEN = 4; |
||
28 | const UTF32LE_LF = "\x0a\x00\x00\x00"; |
||
29 | |||
30 | /** |
||
31 | * Input encoding. |
||
32 | * |
||
33 | * @var string |
||
34 | */ |
||
35 | private $inputEncoding = 'UTF-8'; |
||
36 | |||
37 | /** |
||
38 | * Fallback encoding if guess strikes out. |
||
39 | * |
||
40 | * @var string |
||
41 | */ |
||
42 | private $fallbackEncoding = self::DEFAULT_FALLBACK_ENCODING; |
||
43 | |||
44 | /** |
||
45 | * Delimiter. |
||
46 | * |
||
47 | * @var ?string |
||
48 | */ |
||
49 | private $delimiter; |
||
50 | |||
51 | /** |
||
52 | * Enclosure. |
||
53 | * |
||
54 | * @var string |
||
55 | */ |
||
56 | private $enclosure = '"'; |
||
57 | |||
58 | /** |
||
59 | * Sheet index to read. |
||
60 | * |
||
61 | * @var int |
||
62 | */ |
||
63 | private $sheetIndex = 0; |
||
64 | |||
65 | /** |
||
66 | * Load rows contiguously. |
||
67 | * |
||
68 | * @var bool |
||
69 | */ |
||
70 | private $contiguous = false; |
||
71 | |||
72 | /** |
||
73 | * The character that can escape the enclosure. |
||
74 | * |
||
75 | * @var string |
||
76 | */ |
||
77 | private $escapeCharacter = '\\'; |
||
78 | |||
79 | /** |
||
80 | * Callback for setting defaults in construction. |
||
81 | * |
||
82 | * @var ?callable |
||
83 | */ |
||
84 | private static $constructorCallback; |
||
85 | |||
86 | /** |
||
87 | * Create a new CSV Reader instance. |
||
88 | */ |
||
89 | 101 | public function __construct() |
|
90 | { |
||
91 | 101 | parent::__construct(); |
|
92 | 101 | $callback = self::$constructorCallback; |
|
93 | 101 | if ($callback !== null) { |
|
94 | 5 | $callback($this); |
|
95 | } |
||
96 | 101 | } |
|
97 | |||
98 | /** |
||
99 | * Set a callback to change the defaults. |
||
100 | * |
||
101 | * The callback must accept the Csv Reader object as the first parameter, |
||
102 | * and it should return void. |
||
103 | */ |
||
104 | 6 | public static function setConstructorCallback(?callable $callback): void |
|
105 | { |
||
106 | 6 | self::$constructorCallback = $callback; |
|
107 | 6 | } |
|
108 | |||
109 | 1 | public static function getConstructorCallback(): ?callable |
|
112 | } |
||
113 | |||
114 | 45 | public function setInputEncoding(string $encoding): self |
|
115 | { |
||
116 | 45 | $this->inputEncoding = $encoding; |
|
117 | |||
118 | 45 | return $this; |
|
119 | } |
||
120 | |||
121 | 1 | public function getInputEncoding(): string |
|
122 | { |
||
123 | 1 | return $this->inputEncoding; |
|
124 | } |
||
125 | |||
126 | 5 | public function setFallbackEncoding(string $pValue): self |
|
127 | { |
||
128 | 5 | $this->fallbackEncoding = $pValue; |
|
129 | |||
130 | 5 | return $this; |
|
131 | } |
||
132 | |||
133 | 1 | public function getFallbackEncoding(): string |
|
134 | { |
||
135 | 1 | return $this->fallbackEncoding; |
|
136 | } |
||
137 | |||
138 | /** |
||
139 | * Move filepointer past any BOM marker. |
||
140 | */ |
||
141 | 82 | protected function skipBOM(): void |
|
142 | { |
||
143 | 82 | rewind($this->fileHandle); |
|
144 | |||
145 | 82 | if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) { |
|
146 | 68 | rewind($this->fileHandle); |
|
147 | } |
||
148 | 82 | } |
|
149 | |||
150 | /** |
||
151 | * Identify any separator that is explicitly set in the file. |
||
152 | */ |
||
153 | 82 | protected function checkSeparator(): void |
|
154 | { |
||
155 | 82 | $line = fgets($this->fileHandle); |
|
156 | 82 | if ($line === false) { |
|
157 | 1 | return; |
|
158 | } |
||
159 | |||
160 | 81 | if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { |
|
161 | 2 | $this->delimiter = substr($line, 4, 1); |
|
162 | |||
163 | 2 | return; |
|
164 | } |
||
165 | |||
166 | 79 | $this->skipBOM(); |
|
167 | 79 | } |
|
168 | |||
169 | /** |
||
170 | * Infer the separator if it isn't explicitly set in the file or specified by the user. |
||
171 | */ |
||
172 | 82 | protected function inferSeparator(): void |
|
173 | { |
||
174 | 82 | if ($this->delimiter !== null) { |
|
175 | 14 | return; |
|
176 | } |
||
177 | |||
178 | 71 | $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure); |
|
179 | |||
180 | // If number of lines is 0, nothing to infer : fall back to the default |
||
181 | 71 | if ($inferenceEngine->linesCounted() === 0) { |
|
182 | 1 | $this->delimiter = $inferenceEngine->getDefaultDelimiter(); |
|
183 | 1 | $this->skipBOM(); |
|
184 | |||
185 | 1 | return; |
|
186 | } |
||
187 | |||
188 | 70 | $this->delimiter = $inferenceEngine->infer(); |
|
189 | |||
190 | // If no delimiter could be detected, fall back to the default |
||
191 | 70 | if ($this->delimiter === null) { |
|
192 | 6 | $this->delimiter = $inferenceEngine->getDefaultDelimiter(); |
|
193 | } |
||
194 | |||
195 | 70 | $this->skipBOM(); |
|
196 | 70 | } |
|
197 | |||
198 | /** |
||
199 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
||
200 | */ |
||
201 | 11 | public function listWorksheetInfo(string $filename): array |
|
234 | } |
||
235 | |||
236 | /** |
||
237 | * Loads Spreadsheet from file. |
||
238 | * |
||
239 | * @return Spreadsheet |
||
240 | */ |
||
241 | 71 | public function load(string $filename, int $flags = 0) |
|
242 | { |
||
243 | 71 | $this->processFlags($flags); |
|
244 | |||
245 | // Create new Spreadsheet |
||
246 | 71 | $spreadsheet = new Spreadsheet(); |
|
247 | |||
248 | // Load into this instance |
||
249 | 71 | return $this->loadIntoExisting($filename, $spreadsheet); |
|
250 | } |
||
251 | |||
252 | 85 | private function openFileOrMemory(string $filename): void |
|
271 | } |
||
272 | } |
||
273 | 82 | } |
|
274 | |||
275 | 74 | private static function setAutoDetect(?string $value): ?string |
|
276 | { |
||
277 | 74 | $retVal = null; |
|
278 | 74 | if ($value !== null) { |
|
279 | 74 | $retVal2 = @ini_set('auto_detect_line_endings', $value); |
|
280 | 74 | if (is_string($retVal2)) { |
|
281 | 74 | $retVal = $retVal2; |
|
282 | } |
||
283 | } |
||
284 | |||
285 | 74 | return $retVal; |
|
286 | } |
||
287 | |||
288 | /** |
||
289 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
290 | */ |
||
291 | 74 | public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Spreadsheet |
|
292 | { |
||
293 | // Deprecated in Php8.1 |
||
294 | 74 | $iniset = self::setAutoDetect('1'); |
|
295 | |||
296 | // Open file |
||
297 | 74 | $this->openFileOrMemory($filename); |
|
298 | 72 | $fileHandle = $this->fileHandle; |
|
299 | |||
300 | // Skip BOM, if any |
||
301 | 72 | $this->skipBOM(); |
|
302 | 72 | $this->checkSeparator(); |
|
303 | 72 | $this->inferSeparator(); |
|
304 | |||
305 | // Create new PhpSpreadsheet object |
||
306 | 72 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
307 | 4 | $spreadsheet->createSheet(); |
|
308 | } |
||
309 | 72 | $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
310 | |||
311 | // Set our starting row based on whether we're in contiguous mode or not |
||
312 | 72 | $currentRow = 1; |
|
313 | 72 | $outRow = 0; |
|
314 | |||
315 | // Loop through each line of the file in turn |
||
316 | 72 | $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); |
|
317 | 72 | while (is_array($rowData)) { |
|
318 | 71 | $noOutputYet = true; |
|
319 | 71 | $columnLetter = 'A'; |
|
320 | 71 | foreach ($rowData as $rowDatum) { |
|
321 | 71 | self::convertBoolean($rowDatum); |
|
322 | 71 | if ($rowDatum !== '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
|
323 | 71 | if ($this->contiguous) { |
|
324 | 3 | if ($noOutputYet) { |
|
325 | 3 | $noOutputYet = false; |
|
326 | 3 | ++$outRow; |
|
327 | } |
||
328 | } else { |
||
329 | 68 | $outRow = $currentRow; |
|
330 | } |
||
331 | // Set cell value |
||
332 | 71 | $sheet->getCell($columnLetter . $outRow)->setValue($rowDatum); |
|
333 | } |
||
334 | 71 | ++$columnLetter; |
|
335 | } |
||
336 | 71 | $rowData = fgetcsv($fileHandle, 0, $this->delimiter ?? '', $this->enclosure, $this->escapeCharacter); |
|
337 | 71 | ++$currentRow; |
|
338 | } |
||
339 | |||
340 | // Close file |
||
341 | 72 | fclose($fileHandle); |
|
342 | |||
343 | 72 | self::setAutoDetect($iniset); |
|
344 | |||
345 | // Return |
||
346 | 72 | return $spreadsheet; |
|
347 | } |
||
348 | |||
349 | /** |
||
350 | * Convert string true/false to boolean, and null to null-string. |
||
351 | * |
||
352 | * @param mixed $rowDatum |
||
353 | */ |
||
354 | 71 | private static function convertBoolean(&$rowDatum): void |
|
355 | { |
||
356 | 71 | if (is_string($rowDatum)) { |
|
357 | 71 | if (strcasecmp('true', $rowDatum) === 0) { |
|
358 | 5 | $rowDatum = true; |
|
359 | 71 | } elseif (strcasecmp('false', $rowDatum) === 0) { |
|
360 | 71 | $rowDatum = false; |
|
361 | } |
||
362 | } elseif ($rowDatum === null) { |
||
363 | $rowDatum = ''; |
||
364 | } |
||
365 | 71 | } |
|
366 | |||
367 | 14 | public function getDelimiter(): ?string |
|
370 | } |
||
371 | |||
372 | 10 | public function setDelimiter(?string $delimiter): self |
|
373 | { |
||
374 | 10 | $this->delimiter = $delimiter; |
|
375 | |||
376 | 10 | return $this; |
|
377 | } |
||
378 | |||
379 | 2 | public function getEnclosure(): string |
|
380 | { |
||
381 | 2 | return $this->enclosure; |
|
382 | } |
||
383 | |||
384 | 9 | public function setEnclosure(string $enclosure): self |
|
385 | { |
||
386 | 9 | if ($enclosure == '') { |
|
387 | 3 | $enclosure = '"'; |
|
388 | } |
||
389 | 9 | $this->enclosure = $enclosure; |
|
390 | |||
391 | 9 | return $this; |
|
392 | } |
||
393 | |||
394 | 1 | public function getSheetIndex(): int |
|
395 | { |
||
396 | 1 | return $this->sheetIndex; |
|
397 | } |
||
398 | |||
399 | 5 | public function setSheetIndex(int $indexValue): self |
|
400 | { |
||
401 | 5 | $this->sheetIndex = $indexValue; |
|
402 | |||
403 | 5 | return $this; |
|
404 | } |
||
405 | |||
406 | 3 | public function setContiguous(bool $contiguous): self |
|
407 | { |
||
408 | 3 | $this->contiguous = (bool) $contiguous; |
|
409 | |||
410 | 3 | return $this; |
|
411 | } |
||
412 | |||
413 | 1 | public function getContiguous(): bool |
|
414 | { |
||
415 | 1 | return $this->contiguous; |
|
416 | } |
||
417 | |||
418 | 8 | public function setEscapeCharacter(string $escapeCharacter): self |
|
423 | } |
||
424 | |||
425 | 1 | public function getEscapeCharacter(): string |
|
428 | } |
||
429 | |||
430 | /** |
||
431 | * Can the current IReader read the file? |
||
432 | */ |
||
433 | 98 | public function canRead(string $filename): bool |
|
434 | { |
||
435 | // Check if file exists |
||
436 | try { |
||
437 | 98 | $this->openFile($filename); |
|
438 | 3 | } catch (ReaderException $e) { |
|
439 | 3 | return false; |
|
440 | } |
||
441 | |||
442 | 95 | fclose($this->fileHandle); |
|
443 | |||
444 | // Trust file extension if any |
||
445 | 95 | $extension = strtolower(pathinfo($filename, PATHINFO_EXTENSION)); |
|
1 ignored issue
–
show
|
|||
446 | 95 | if (in_array($extension, ['csv', 'tsv'])) { |
|
447 | 77 | return true; |
|
448 | } |
||
449 | |||
450 | // Attempt to guess mimetype |
||
451 | 18 | $type = mime_content_type($filename); |
|
452 | $supportedTypes = [ |
||
453 | 18 | 'application/csv', |
|
454 | 'text/csv', |
||
455 | 'text/plain', |
||
456 | 'inode/x-empty', |
||
457 | ]; |
||
458 | |||
459 | 18 | return in_array($type, $supportedTypes, true); |
|
460 | } |
||
461 | |||
462 | 19 | private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void |
|
463 | { |
||
464 | 19 | if ($encoding === '') { |
|
465 | 19 | $pos = strpos($contents, $compare); |
|
466 | 19 | if ($pos !== false && $pos % strlen($compare) === 0) { |
|
467 | 9 | $encoding = $setEncoding; |
|
468 | } |
||
469 | } |
||
470 | 19 | } |
|
471 | |||
472 | 19 | private static function guessEncodingNoBom(string $filename): string |
|
473 | { |
||
474 | 19 | $encoding = ''; |
|
475 | 19 | $contents = file_get_contents($filename); |
|
476 | 19 | self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE'); |
|
477 | 19 | self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE'); |
|
478 | 19 | self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE'); |
|
479 | 19 | self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE'); |
|
480 | 19 | if ($encoding === '' && preg_match('//u', $contents) === 1) { |
|
481 | 3 | $encoding = 'UTF-8'; |
|
482 | } |
||
483 | |||
484 | 19 | return $encoding; |
|
485 | } |
||
486 | |||
487 | 29 | private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void |
|
488 | { |
||
489 | 29 | if ($encoding === '') { |
|
490 | 29 | if ($compare === substr($first4, 0, strlen($compare))) { |
|
491 | 10 | $encoding = $setEncoding; |
|
492 | } |
||
493 | } |
||
494 | 29 | } |
|
495 | |||
496 | 29 | private static function guessEncodingBom(string $filename): string |
|
509 | } |
||
510 | |||
511 | 29 | public static function guessEncoding(string $filename, string $dflt = self::DEFAULT_FALLBACK_ENCODING): string |
|
512 | { |
||
519 | } |
||
520 | } |
||
521 |