Total Complexity | 68 |
Total Lines | 553 |
Duplicated Lines | 0 % |
Coverage | 70.92% |
Changes | 4 | ||
Bugs | 0 | Features | 0 |
Complex classes like Csv often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Csv, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class Csv extends BaseReader |
||
10 | { |
||
11 | /** |
||
12 | * Input encoding. |
||
13 | * |
||
14 | * @var string |
||
15 | */ |
||
16 | private $inputEncoding = 'UTF-8'; |
||
17 | |||
18 | /** |
||
19 | * Delimiter. |
||
20 | * |
||
21 | * @var string |
||
22 | */ |
||
23 | private $delimiter; |
||
24 | |||
25 | /** |
||
26 | * Enclosure. |
||
27 | * |
||
28 | * @var string |
||
29 | */ |
||
30 | private $enclosure = '"'; |
||
31 | |||
32 | /** |
||
33 | * Sheet index to read. |
||
34 | * |
||
35 | * @var int |
||
36 | */ |
||
37 | private $sheetIndex = 0; |
||
38 | |||
39 | /** |
||
40 | * Load rows contiguously. |
||
41 | * |
||
42 | * @var bool |
||
43 | */ |
||
44 | private $contiguous = false; |
||
45 | |||
46 | /** |
||
47 | * Row counter for loading rows contiguously. |
||
48 | * |
||
49 | * @var int |
||
50 | */ |
||
51 | private $contiguousRow = -1; |
||
52 | |||
53 | /** |
||
54 | * The character that can escape the enclosure. |
||
55 | * |
||
56 | * @var string |
||
57 | */ |
||
58 | private $escapeCharacter = '\\'; |
||
59 | |||
60 | /** |
||
61 | * Create a new CSV Reader instance. |
||
62 | */ |
||
63 | 27 | public function __construct() |
|
66 | 27 | } |
|
67 | |||
68 | /** |
||
69 | * Set input encoding. |
||
70 | * |
||
71 | * @param string $pValue Input encoding, eg: 'UTF-8' |
||
72 | * |
||
73 | * @return Csv |
||
74 | */ |
||
75 | public function setInputEncoding($pValue) |
||
76 | { |
||
77 | $this->inputEncoding = $pValue; |
||
78 | |||
79 | return $this; |
||
80 | } |
||
81 | |||
82 | /** |
||
83 | * Get input encoding. |
||
84 | * |
||
85 | * @return string |
||
86 | */ |
||
87 | public function getInputEncoding() |
||
90 | } |
||
91 | |||
92 | /** |
||
93 | * Move filepointer past any BOM marker. |
||
94 | */ |
||
95 | 16 | protected function skipBOM() |
|
127 | } |
||
128 | 16 | } |
|
129 | |||
130 | /** |
||
131 | * Identify any separator that is explicitly set in the file. |
||
132 | */ |
||
133 | 16 | protected function checkSeparator() |
|
147 | 15 | } |
|
148 | |||
149 | /** |
||
150 | * Infer the separator if it isn't explicitly set in the file or specified by the user. |
||
151 | */ |
||
152 | 16 | protected function inferSeparator() |
|
153 | { |
||
154 | 16 | if ($this->delimiter !== null) { |
|
155 | 4 | return; |
|
156 | } |
||
157 | |||
158 | 14 | $potentialDelimiters = [',', ';', "\t", '|', ':', ' ', '~']; |
|
159 | 14 | $counts = []; |
|
160 | 14 | foreach ($potentialDelimiters as $delimiter) { |
|
161 | 14 | $counts[$delimiter] = []; |
|
162 | } |
||
163 | |||
164 | // Count how many times each of the potential delimiters appears in each line |
||
165 | 14 | $numberLines = 0; |
|
166 | 14 | while (($line = $this->getNextLine()) !== false && (++$numberLines < 1000)) { |
|
167 | 13 | $countLine = []; |
|
168 | 13 | for ($i = strlen($line) - 1; $i >= 0; --$i) { |
|
169 | 13 | $char = $line[$i]; |
|
170 | 13 | if (isset($counts[$char])) { |
|
171 | 11 | if (!isset($countLine[$char])) { |
|
172 | 11 | $countLine[$char] = 0; |
|
173 | } |
||
174 | 11 | ++$countLine[$char]; |
|
175 | } |
||
176 | } |
||
177 | 13 | foreach ($potentialDelimiters as $delimiter) { |
|
178 | 13 | $counts[$delimiter][] = $countLine[$delimiter] |
|
179 | 13 | ?? 0; |
|
180 | } |
||
181 | } |
||
182 | |||
183 | // If number of lines is 0, nothing to infer : fall back to the default |
||
184 | 14 | if ($numberLines === 0) { |
|
185 | 1 | $this->delimiter = reset($potentialDelimiters); |
|
186 | 1 | $this->skipBOM(); |
|
187 | |||
188 | 1 | return; |
|
189 | } |
||
190 | |||
191 | // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) |
||
192 | 13 | $meanSquareDeviations = []; |
|
193 | 13 | $middleIdx = floor(($numberLines - 1) / 2); |
|
194 | |||
195 | 13 | foreach ($potentialDelimiters as $delimiter) { |
|
196 | 13 | $series = $counts[$delimiter]; |
|
197 | 13 | sort($series); |
|
198 | |||
199 | 13 | $median = ($numberLines % 2) |
|
200 | 7 | ? $series[$middleIdx] |
|
201 | 13 | : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; |
|
202 | |||
203 | 13 | if ($median === 0) { |
|
204 | 13 | continue; |
|
205 | } |
||
206 | |||
207 | 11 | $meanSquareDeviations[$delimiter] = array_reduce( |
|
208 | 11 | $series, |
|
209 | function ($sum, $value) use ($median) { |
||
210 | 11 | return $sum + pow($value - $median, 2); |
|
211 | 11 | } |
|
212 | 11 | ) / count($series); |
|
213 | } |
||
214 | |||
215 | // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) |
||
216 | 13 | $min = INF; |
|
217 | 13 | foreach ($potentialDelimiters as $delimiter) { |
|
218 | 13 | if (!isset($meanSquareDeviations[$delimiter])) { |
|
219 | 13 | continue; |
|
220 | } |
||
221 | |||
222 | 11 | if ($meanSquareDeviations[$delimiter] < $min) { |
|
223 | 11 | $min = $meanSquareDeviations[$delimiter]; |
|
224 | 11 | $this->delimiter = $delimiter; |
|
225 | } |
||
226 | } |
||
227 | |||
228 | // If no delimiter could be detected, fall back to the default |
||
229 | 13 | if ($this->delimiter === null) { |
|
230 | 2 | $this->delimiter = reset($potentialDelimiters); |
|
231 | } |
||
232 | |||
233 | 13 | $this->skipBOM(); |
|
234 | 13 | } |
|
235 | |||
236 | /** |
||
237 | * Get the next full line from the file. |
||
238 | * |
||
239 | * @param string $line |
||
240 | * |
||
241 | * @return bool|string |
||
242 | */ |
||
243 | 14 | private function getNextLine($line = '') |
|
244 | { |
||
245 | // Get the next line in the file |
||
246 | 14 | $newLine = fgets($this->fileHandle); |
|
247 | |||
248 | // Return false if there is no next line |
||
249 | 14 | if ($newLine === false) { |
|
250 | 14 | return false; |
|
251 | } |
||
252 | |||
253 | // Add the new line to the line passed in |
||
254 | 13 | $line = $line . $newLine; |
|
255 | |||
256 | // Drop everything that is enclosed to avoid counting false positives in enclosures |
||
257 | 13 | $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' |
|
258 | 13 | . preg_quote($this->enclosure, '/'); |
|
259 | 13 | $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/Us', '', $line); |
|
260 | |||
261 | // See if we have any enclosures left in the line |
||
262 | // if we still have an enclosure then we need to read the next line as well |
||
263 | 13 | if (preg_match('/(' . $enclosure . ')/', $line) > 0) { |
|
264 | 2 | $line = $this->getNextLine($line); |
|
265 | } |
||
266 | |||
267 | 13 | return $line; |
|
268 | } |
||
269 | |||
270 | /** |
||
271 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
||
272 | * |
||
273 | * @param string $pFilename |
||
274 | * |
||
275 | * @throws Exception |
||
276 | * |
||
277 | * @return array |
||
278 | */ |
||
279 | public function listWorksheetInfo($pFilename) |
||
280 | { |
||
281 | // Open file |
||
282 | if (!$this->canRead($pFilename)) { |
||
283 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
284 | } |
||
285 | $this->openFile($pFilename); |
||
286 | $fileHandle = $this->fileHandle; |
||
287 | |||
288 | // Skip BOM, if any |
||
289 | $this->skipBOM(); |
||
290 | $this->checkSeparator(); |
||
291 | $this->inferSeparator(); |
||
292 | |||
293 | $worksheetInfo = []; |
||
294 | $worksheetInfo[0]['worksheetName'] = 'Worksheet'; |
||
295 | $worksheetInfo[0]['lastColumnLetter'] = 'A'; |
||
296 | $worksheetInfo[0]['lastColumnIndex'] = 0; |
||
297 | $worksheetInfo[0]['totalRows'] = 0; |
||
298 | $worksheetInfo[0]['totalColumns'] = 0; |
||
299 | |||
300 | // Loop through each line of the file in turn |
||
301 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
||
1 ignored issue
–
show
|
|||
302 | ++$worksheetInfo[0]['totalRows']; |
||
303 | $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); |
||
304 | } |
||
305 | |||
306 | $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); |
||
307 | $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; |
||
308 | |||
309 | // Close file |
||
310 | fclose($fileHandle); |
||
1 ignored issue
–
show
|
|||
311 | |||
312 | return $worksheetInfo; |
||
313 | } |
||
314 | |||
315 | /** |
||
316 | * Loads Spreadsheet from file. |
||
317 | * |
||
318 | * @param string $pFilename |
||
319 | * |
||
320 | * @throws Exception |
||
321 | * |
||
322 | * @return Spreadsheet |
||
323 | */ |
||
324 | 15 | public function load($pFilename) |
|
325 | { |
||
326 | // Create new Spreadsheet |
||
327 | 15 | $spreadsheet = new Spreadsheet(); |
|
328 | |||
329 | // Load into this instance |
||
330 | 15 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
331 | } |
||
332 | |||
333 | /** |
||
334 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
335 | * |
||
336 | * @param string $pFilename |
||
337 | * @param Spreadsheet $spreadsheet |
||
338 | * |
||
339 | * @throws Exception |
||
340 | * |
||
341 | * @return Spreadsheet |
||
342 | */ |
||
343 | 16 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
344 | { |
||
345 | 16 | $lineEnding = ini_get('auto_detect_line_endings'); |
|
346 | 16 | ini_set('auto_detect_line_endings', true); |
|
347 | |||
348 | // Open file |
||
349 | 16 | if (!$this->canRead($pFilename)) { |
|
350 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
351 | } |
||
352 | 16 | $this->openFile($pFilename); |
|
353 | 16 | $fileHandle = $this->fileHandle; |
|
354 | |||
355 | // Skip BOM, if any |
||
356 | 16 | $this->skipBOM(); |
|
357 | 16 | $this->checkSeparator(); |
|
358 | 16 | $this->inferSeparator(); |
|
359 | |||
360 | // Create new PhpSpreadsheet object |
||
361 | 16 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
362 | 2 | $spreadsheet->createSheet(); |
|
363 | } |
||
364 | 16 | $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
365 | |||
366 | // Set our starting row based on whether we're in contiguous mode or not |
||
367 | 16 | $currentRow = 1; |
|
368 | 16 | if ($this->contiguous) { |
|
369 | 1 | $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; |
|
370 | } |
||
371 | |||
372 | // Loop through each line of the file in turn |
||
373 | 16 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure, $this->escapeCharacter)) !== false) { |
|
1 ignored issue
–
show
|
|||
374 | 15 | $columnLetter = 'A'; |
|
375 | 15 | foreach ($rowData as $rowDatum) { |
|
376 | 15 | if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
|
377 | // Convert encoding if necessary |
||
378 | 15 | if ($this->inputEncoding !== 'UTF-8') { |
|
379 | $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); |
||
380 | } |
||
381 | |||
382 | // Set cell value |
||
383 | 15 | $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); |
|
384 | } |
||
385 | 15 | ++$columnLetter; |
|
386 | } |
||
387 | 15 | ++$currentRow; |
|
388 | } |
||
389 | |||
390 | // Close file |
||
391 | 16 | fclose($fileHandle); |
|
1 ignored issue
–
show
|
|||
392 | |||
393 | 16 | if ($this->contiguous) { |
|
394 | 1 | $this->contiguousRow = $currentRow; |
|
395 | } |
||
396 | |||
397 | 16 | ini_set('auto_detect_line_endings', $lineEnding); |
|
398 | |||
399 | // Return |
||
400 | 16 | return $spreadsheet; |
|
401 | } |
||
402 | |||
403 | /** |
||
404 | * Get delimiter. |
||
405 | * |
||
406 | * @return string |
||
407 | */ |
||
408 | 9 | public function getDelimiter() |
|
411 | } |
||
412 | |||
413 | /** |
||
414 | * Set delimiter. |
||
415 | * |
||
416 | * @param string $delimiter Delimiter, eg: ',' |
||
417 | * |
||
418 | * @return CSV |
||
419 | */ |
||
420 | 2 | public function setDelimiter($delimiter) |
|
421 | { |
||
422 | 2 | $this->delimiter = $delimiter; |
|
423 | |||
424 | 2 | return $this; |
|
425 | } |
||
426 | |||
427 | /** |
||
428 | * Get enclosure. |
||
429 | * |
||
430 | * @return string |
||
431 | */ |
||
432 | public function getEnclosure() |
||
435 | } |
||
436 | |||
437 | /** |
||
438 | * Set enclosure. |
||
439 | * |
||
440 | * @param string $enclosure Enclosure, defaults to " |
||
441 | * |
||
442 | * @return CSV |
||
443 | */ |
||
444 | 1 | public function setEnclosure($enclosure) |
|
452 | } |
||
453 | |||
454 | /** |
||
455 | * Get sheet index. |
||
456 | * |
||
457 | * @return int |
||
458 | */ |
||
459 | public function getSheetIndex() |
||
460 | { |
||
461 | return $this->sheetIndex; |
||
462 | } |
||
463 | |||
464 | /** |
||
465 | * Set sheet index. |
||
466 | * |
||
467 | * @param int $pValue Sheet index |
||
468 | * |
||
469 | * @return CSV |
||
470 | */ |
||
471 | 3 | public function setSheetIndex($pValue) |
|
472 | { |
||
473 | 3 | $this->sheetIndex = $pValue; |
|
474 | |||
475 | 3 | return $this; |
|
476 | } |
||
477 | |||
478 | /** |
||
479 | * Set Contiguous. |
||
480 | * |
||
481 | * @param bool $contiguous |
||
482 | * |
||
483 | * @return Csv |
||
484 | */ |
||
485 | 1 | public function setContiguous($contiguous) |
|
486 | { |
||
487 | 1 | $this->contiguous = (bool) $contiguous; |
|
488 | 1 | if (!$contiguous) { |
|
489 | $this->contiguousRow = -1; |
||
490 | } |
||
491 | |||
492 | 1 | return $this; |
|
493 | } |
||
494 | |||
495 | /** |
||
496 | * Get Contiguous. |
||
497 | * |
||
498 | * @return bool |
||
499 | */ |
||
500 | public function getContiguous() |
||
503 | } |
||
504 | |||
505 | /** |
||
506 | * Set escape backslashes. |
||
507 | * |
||
508 | * @param string $escapeCharacter |
||
509 | * |
||
510 | * @return $this |
||
511 | */ |
||
512 | 1 | public function setEscapeCharacter($escapeCharacter) |
|
517 | } |
||
518 | |||
519 | /** |
||
520 | * Get escape backslashes. |
||
521 | * |
||
522 | * @return string |
||
523 | */ |
||
524 | 1 | public function getEscapeCharacter() |
|
527 | } |
||
528 | |||
529 | /** |
||
530 | * Can the current IReader read the file? |
||
531 | * |
||
532 | * @param string $pFilename |
||
533 | * |
||
534 | * @return bool |
||
535 | */ |
||
536 | 26 | public function canRead($pFilename) |
|
562 | } |
||
563 | } |
||
564 |