Total Complexity | 64 |
Total Lines | 509 |
Duplicated Lines | 0 % |
Coverage | 67.78% |
Changes | 0 |
Complex classes like Csv often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Csv, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class Csv extends BaseReader |
||
10 | { |
||
11 | /** |
||
12 | * Input encoding. |
||
13 | * |
||
14 | * @var string |
||
15 | */ |
||
16 | private $inputEncoding = 'UTF-8'; |
||
17 | |||
18 | /** |
||
19 | * Delimiter. |
||
20 | * |
||
21 | * @var string |
||
22 | */ |
||
23 | private $delimiter; |
||
24 | |||
25 | /** |
||
26 | * Enclosure. |
||
27 | * |
||
28 | * @var string |
||
29 | */ |
||
30 | private $enclosure = '"'; |
||
31 | |||
32 | /** |
||
33 | * Sheet index to read. |
||
34 | * |
||
35 | * @var int |
||
36 | */ |
||
37 | private $sheetIndex = 0; |
||
38 | |||
39 | /** |
||
40 | * Load rows contiguously. |
||
41 | * |
||
42 | * @var bool |
||
43 | */ |
||
44 | private $contiguous = false; |
||
45 | |||
46 | /** |
||
47 | * Row counter for loading rows contiguously. |
||
48 | * |
||
49 | * @var int |
||
50 | */ |
||
51 | private $contiguousRow = -1; |
||
52 | |||
53 | /** |
||
54 | * The character that can escape the enclosure. |
||
55 | * |
||
56 | * @var string |
||
57 | */ |
||
58 | private $escapeCharacter = '\\'; |
||
59 | |||
60 | /** |
||
61 | * Create a new CSV Reader instance. |
||
62 | */ |
||
63 | 20 | public function __construct() |
|
64 | { |
||
65 | 20 | $this->readFilter = new DefaultReadFilter(); |
|
66 | 20 | } |
|
67 | |||
68 | /** |
||
69 | * Set input encoding. |
||
70 | * |
||
71 | * @param string $pValue Input encoding, eg: 'UTF-8' |
||
72 | * |
||
73 | * @return Csv |
||
74 | */ |
||
75 | public function setInputEncoding($pValue) |
||
76 | { |
||
77 | $this->inputEncoding = $pValue; |
||
78 | |||
79 | return $this; |
||
80 | } |
||
81 | |||
82 | /** |
||
83 | * Get input encoding. |
||
84 | * |
||
85 | * @return string |
||
86 | */ |
||
87 | public function getInputEncoding() |
||
88 | { |
||
89 | return $this->inputEncoding; |
||
90 | } |
||
91 | |||
92 | /** |
||
93 | * Move filepointer past any BOM marker. |
||
94 | */ |
||
95 | 11 | protected function skipBOM() |
|
96 | { |
||
97 | 11 | rewind($this->fileHandle); |
|
98 | |||
99 | 11 | switch ($this->inputEncoding) { |
|
100 | 11 | case 'UTF-8': |
|
101 | 11 | fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ? |
|
102 | 11 | fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0); |
|
103 | |||
104 | 11 | break; |
|
105 | case 'UTF-16LE': |
||
106 | fgets($this->fileHandle, 3) == "\xFF\xFE" ? |
||
107 | fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
||
108 | |||
109 | break; |
||
110 | case 'UTF-16BE': |
||
111 | fgets($this->fileHandle, 3) == "\xFE\xFF" ? |
||
112 | fseek($this->fileHandle, 2) : fseek($this->fileHandle, 0); |
||
113 | |||
114 | break; |
||
115 | case 'UTF-32LE': |
||
116 | fgets($this->fileHandle, 5) == "\xFF\xFE\x00\x00" ? |
||
117 | fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
||
118 | |||
119 | break; |
||
120 | case 'UTF-32BE': |
||
121 | fgets($this->fileHandle, 5) == "\x00\x00\xFE\xFF" ? |
||
122 | fseek($this->fileHandle, 4) : fseek($this->fileHandle, 0); |
||
123 | |||
124 | break; |
||
125 | default: |
||
126 | break; |
||
127 | } |
||
128 | 11 | } |
|
129 | |||
130 | /** |
||
131 | * Identify any separator that is explicitly set in the file. |
||
132 | */ |
||
133 | 11 | protected function checkSeparator() |
|
134 | { |
||
135 | 11 | $line = fgets($this->fileHandle); |
|
136 | 11 | if ($line === false) { |
|
137 | return; |
||
138 | } |
||
139 | |||
140 | 11 | if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { |
|
141 | $this->delimiter = substr($line, 4, 1); |
||
142 | |||
143 | return; |
||
144 | } |
||
145 | |||
146 | 11 | return $this->skipBOM(); |
|
|
|||
147 | } |
||
148 | |||
149 | /** |
||
150 | * Infer the separator if it isn't explicitly set in the file or specified by the user. |
||
151 | */ |
||
152 | 11 | protected function inferSeparator() |
|
153 | { |
||
154 | 11 | if ($this->delimiter !== null) { |
|
155 | 4 | return; |
|
156 | } |
||
157 | |||
158 | 9 | $potentialDelimiters = [',', ';', "\t", '|', ':', ' ']; |
|
159 | 9 | $counts = []; |
|
160 | 9 | foreach ($potentialDelimiters as $delimiter) { |
|
161 | 9 | $counts[$delimiter] = []; |
|
162 | } |
||
163 | |||
164 | // Count how many times each of the potential delimiters appears in each line |
||
165 | 9 | $numberLines = 0; |
|
166 | 9 | while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { |
|
167 | // Drop everything that is enclosed to avoid counting false positives in enclosures |
||
168 | 9 | $enclosure = preg_quote($this->enclosure, '/'); |
|
169 | 9 | $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); |
|
170 | |||
171 | 9 | $countLine = []; |
|
172 | 9 | for ($i = strlen($line) - 1; $i >= 0; --$i) { |
|
173 | 9 | $char = $line[$i]; |
|
174 | 9 | if (isset($counts[$char])) { |
|
175 | 8 | if (!isset($countLine[$char])) { |
|
176 | 8 | $countLine[$char] = 0; |
|
177 | } |
||
178 | 8 | ++$countLine[$char]; |
|
179 | } |
||
180 | } |
||
181 | 9 | foreach ($potentialDelimiters as $delimiter) { |
|
182 | 9 | $counts[$delimiter][] = isset($countLine[$delimiter]) |
|
183 | 8 | ? $countLine[$delimiter] |
|
184 | 9 | : 0; |
|
185 | } |
||
186 | } |
||
187 | |||
188 | // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) |
||
189 | 9 | $meanSquareDeviations = []; |
|
190 | 9 | $middleIdx = floor(($numberLines - 1) / 2); |
|
191 | |||
192 | 9 | foreach ($potentialDelimiters as $delimiter) { |
|
193 | 9 | $series = $counts[$delimiter]; |
|
194 | 9 | sort($series); |
|
195 | |||
196 | 9 | $median = ($numberLines % 2) |
|
197 | 5 | ? $series[$middleIdx] |
|
198 | 9 | : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; |
|
199 | |||
200 | 9 | if ($median === 0) { |
|
201 | 9 | continue; |
|
202 | } |
||
203 | |||
204 | 8 | $meanSquareDeviations[$delimiter] = array_reduce( |
|
205 | 8 | $series, |
|
206 | 8 | function ($sum, $value) use ($median) { |
|
207 | 8 | return $sum + pow($value - $median, 2); |
|
208 | 8 | } |
|
209 | 8 | ) / count($series); |
|
210 | } |
||
211 | |||
212 | // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) |
||
213 | 9 | $min = INF; |
|
214 | 9 | foreach ($potentialDelimiters as $delimiter) { |
|
215 | 9 | if (!isset($meanSquareDeviations[$delimiter])) { |
|
216 | 9 | continue; |
|
217 | } |
||
218 | |||
219 | 8 | if ($meanSquareDeviations[$delimiter] < $min) { |
|
220 | 8 | $min = $meanSquareDeviations[$delimiter]; |
|
221 | 8 | $this->delimiter = $delimiter; |
|
222 | } |
||
223 | } |
||
224 | |||
225 | // If no delimiter could be detected, fall back to the default |
||
226 | 9 | if ($this->delimiter === null) { |
|
227 | 1 | $this->delimiter = reset($potentialDelimiters); |
|
228 | } |
||
229 | |||
230 | 9 | return $this->skipBOM(); |
|
231 | } |
||
232 | |||
233 | /** |
||
234 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
||
235 | * |
||
236 | * @param string $pFilename |
||
237 | * |
||
238 | * @throws Exception |
||
239 | * |
||
240 | * @return array |
||
241 | */ |
||
242 | public function listWorksheetInfo($pFilename) |
||
243 | { |
||
244 | // Open file |
||
245 | if (!$this->canRead($pFilename)) { |
||
246 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
247 | } |
||
248 | $this->openFile($pFilename); |
||
249 | $fileHandle = $this->fileHandle; |
||
250 | |||
251 | // Skip BOM, if any |
||
252 | $this->skipBOM(); |
||
253 | $this->checkSeparator(); |
||
254 | $this->inferSeparator(); |
||
255 | |||
256 | $worksheetInfo = []; |
||
257 | $worksheetInfo[0]['worksheetName'] = 'Worksheet'; |
||
258 | $worksheetInfo[0]['lastColumnLetter'] = 'A'; |
||
259 | $worksheetInfo[0]['lastColumnIndex'] = 0; |
||
260 | $worksheetInfo[0]['totalRows'] = 0; |
||
261 | $worksheetInfo[0]['totalColumns'] = 0; |
||
262 | |||
263 | // Loop through each line of the file in turn |
||
264 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
||
1 ignored issue
–
show
|
|||
265 | ++$worksheetInfo[0]['totalRows']; |
||
266 | $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); |
||
267 | } |
||
268 | |||
269 | $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); |
||
270 | $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; |
||
271 | |||
272 | // Close file |
||
273 | fclose($fileHandle); |
||
1 ignored issue
–
show
|
|||
274 | |||
275 | return $worksheetInfo; |
||
276 | } |
||
277 | |||
278 | /** |
||
279 | * Loads Spreadsheet from file. |
||
280 | * |
||
281 | * @param string $pFilename |
||
282 | * |
||
283 | * @throws Exception |
||
284 | * |
||
285 | * @return Spreadsheet |
||
286 | */ |
||
287 | 10 | public function load($pFilename) |
|
288 | { |
||
289 | // Create new Spreadsheet |
||
290 | 10 | $spreadsheet = new Spreadsheet(); |
|
291 | |||
292 | // Load into this instance |
||
293 | 10 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
294 | } |
||
295 | |||
296 | /** |
||
297 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
298 | * |
||
299 | * @param string $pFilename |
||
300 | * @param Spreadsheet $spreadsheet |
||
301 | * |
||
302 | * @throws Exception |
||
303 | * |
||
304 | * @return Spreadsheet |
||
305 | */ |
||
306 | 11 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
307 | { |
||
308 | 11 | $lineEnding = ini_get('auto_detect_line_endings'); |
|
309 | 11 | ini_set('auto_detect_line_endings', true); |
|
310 | |||
311 | // Open file |
||
312 | 11 | if (!$this->canRead($pFilename)) { |
|
313 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
314 | } |
||
315 | 11 | $this->openFile($pFilename); |
|
316 | 11 | $fileHandle = $this->fileHandle; |
|
317 | |||
318 | // Skip BOM, if any |
||
319 | 11 | $this->skipBOM(); |
|
320 | 11 | $this->checkSeparator(); |
|
321 | 11 | $this->inferSeparator(); |
|
322 | |||
323 | // Create new PhpSpreadsheet object |
||
324 | 11 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
325 | 2 | $spreadsheet->createSheet(); |
|
326 | } |
||
327 | 11 | $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
328 | |||
329 | // Set our starting row based on whether we're in contiguous mode or not |
||
330 | 11 | $currentRow = 1; |
|
331 | 11 | if ($this->contiguous) { |
|
332 | 1 | $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; |
|
333 | } |
||
334 | |||
335 | // Loop through each line of the file in turn |
||
336 | 11 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
|
1 ignored issue
–
show
|
|||
337 | 11 | $columnLetter = 'A'; |
|
338 | 11 | foreach ($rowData as $rowDatum) { |
|
339 | 11 | if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
|
340 | // Convert encoding if necessary |
||
341 | 11 | if ($this->inputEncoding !== 'UTF-8') { |
|
342 | $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); |
||
343 | } |
||
344 | |||
345 | // Set cell value |
||
346 | 11 | $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); |
|
347 | } |
||
348 | 11 | ++$columnLetter; |
|
349 | } |
||
350 | 11 | ++$currentRow; |
|
351 | } |
||
352 | |||
353 | // Close file |
||
354 | 11 | fclose($fileHandle); |
|
1 ignored issue
–
show
|
|||
355 | |||
356 | 11 | if ($this->contiguous) { |
|
357 | 1 | $this->contiguousRow = $currentRow; |
|
358 | } |
||
359 | |||
360 | 11 | ini_set('auto_detect_line_endings', $lineEnding); |
|
361 | |||
362 | // Return |
||
363 | 11 | return $spreadsheet; |
|
364 | } |
||
365 | |||
366 | /** |
||
367 | * Get delimiter. |
||
368 | * |
||
369 | * @return string |
||
370 | */ |
||
371 | 5 | public function getDelimiter() |
|
374 | } |
||
375 | |||
376 | /** |
||
377 | * Set delimiter. |
||
378 | * |
||
379 | * @param string $delimiter Delimiter, eg: ',' |
||
380 | * |
||
381 | * @return CSV |
||
382 | */ |
||
383 | 2 | public function setDelimiter($delimiter) |
|
384 | { |
||
385 | 2 | $this->delimiter = $delimiter; |
|
386 | |||
387 | 2 | return $this; |
|
388 | } |
||
389 | |||
390 | /** |
||
391 | * Get enclosure. |
||
392 | * |
||
393 | * @return string |
||
394 | */ |
||
395 | public function getEnclosure() |
||
398 | } |
||
399 | |||
400 | /** |
||
401 | * Set enclosure. |
||
402 | * |
||
403 | * @param string $enclosure Enclosure, defaults to " |
||
404 | * |
||
405 | * @return CSV |
||
406 | */ |
||
407 | 1 | public function setEnclosure($enclosure) |
|
408 | { |
||
409 | 1 | if ($enclosure == '') { |
|
410 | $enclosure = '"'; |
||
411 | } |
||
412 | 1 | $this->enclosure = $enclosure; |
|
413 | |||
414 | 1 | return $this; |
|
415 | } |
||
416 | |||
417 | /** |
||
418 | * Get sheet index. |
||
419 | * |
||
420 | * @return int |
||
421 | */ |
||
422 | public function getSheetIndex() |
||
423 | { |
||
424 | return $this->sheetIndex; |
||
425 | } |
||
426 | |||
427 | /** |
||
428 | * Set sheet index. |
||
429 | * |
||
430 | * @param int $pValue Sheet index |
||
431 | * |
||
432 | * @return CSV |
||
433 | */ |
||
434 | 3 | public function setSheetIndex($pValue) |
|
435 | { |
||
436 | 3 | $this->sheetIndex = $pValue; |
|
437 | |||
438 | 3 | return $this; |
|
439 | } |
||
440 | |||
441 | /** |
||
442 | * Set Contiguous. |
||
443 | * |
||
444 | * @param bool $contiguous |
||
445 | * |
||
446 | * @return Csv |
||
447 | */ |
||
448 | 1 | public function setContiguous($contiguous) |
|
449 | { |
||
450 | 1 | $this->contiguous = (bool) $contiguous; |
|
451 | 1 | if (!$contiguous) { |
|
452 | $this->contiguousRow = -1; |
||
453 | } |
||
454 | |||
455 | 1 | return $this; |
|
456 | } |
||
457 | |||
458 | /** |
||
459 | * Get Contiguous. |
||
460 | * |
||
461 | * @return bool |
||
462 | */ |
||
463 | public function getContiguous() |
||
464 | { |
||
465 | return $this->contiguous; |
||
466 | } |
||
467 | |||
468 | /** |
||
469 | * Set escape backslashes. |
||
470 | * |
||
471 | * @param string $escapeCharacter |
||
472 | * |
||
473 | * @return $this |
||
474 | */ |
||
475 | 1 | public function setEscapeCharacter($escapeCharacter) |
|
476 | { |
||
477 | 1 | $this->escapeCharacter = $escapeCharacter; |
|
478 | |||
479 | 1 | return $this; |
|
480 | } |
||
481 | |||
482 | /** |
||
483 | * Get escape backslashes. |
||
484 | * |
||
485 | * @return string |
||
486 | */ |
||
487 | 1 | public function getEscapeCharacter() |
|
488 | { |
||
489 | 1 | return $this->escapeCharacter; |
|
490 | } |
||
491 | |||
492 | /** |
||
493 | * Can the current IReader read the file? |
||
494 | * |
||
495 | * @param string $pFilename |
||
496 | * |
||
497 | * @return bool |
||
498 | */ |
||
499 | 19 | public function canRead($pFilename) |
|
518 | } |
||
519 | } |
||
520 |
This check looks for function or method calls that always return null and whose return value is used.
The method
getObject()
can return nothing but null, so it makes no sense to use the return value.The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.