Total Complexity | 62 |
Total Lines | 478 |
Duplicated Lines | 0 % |
Coverage | 66.86% |
Changes | 0 |
Complex classes like Csv often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Csv, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
9 | class Csv extends BaseReader |
||
10 | { |
||
11 | /** |
||
12 | * Input encoding. |
||
13 | * |
||
14 | * @var string |
||
15 | */ |
||
16 | private $inputEncoding = 'UTF-8'; |
||
17 | |||
18 | /** |
||
19 | * Delimiter. |
||
20 | * |
||
21 | * @var string |
||
22 | */ |
||
23 | private $delimiter; |
||
24 | |||
25 | /** |
||
26 | * Enclosure. |
||
27 | * |
||
28 | * @var string |
||
29 | */ |
||
30 | private $enclosure = '"'; |
||
31 | |||
32 | /** |
||
33 | * Sheet index to read. |
||
34 | * |
||
35 | * @var int |
||
36 | */ |
||
37 | private $sheetIndex = 0; |
||
38 | |||
39 | /** |
||
40 | * Load rows contiguously. |
||
41 | * |
||
42 | * @var bool |
||
43 | */ |
||
44 | private $contiguous = false; |
||
45 | |||
46 | /** |
||
47 | * Row counter for loading rows contiguously. |
||
48 | * |
||
49 | * @var int |
||
50 | */ |
||
51 | private $contiguousRow = -1; |
||
52 | |||
53 | /** |
||
54 | * Create a new CSV Reader instance. |
||
55 | */ |
||
56 | 19 | public function __construct() |
|
59 | 19 | } |
|
60 | |||
61 | /** |
||
62 | * Set input encoding. |
||
63 | * |
||
64 | * @param string $pValue Input encoding, eg: 'UTF-8' |
||
65 | * |
||
66 | * @return Csv |
||
67 | */ |
||
68 | public function setInputEncoding($pValue) |
||
69 | { |
||
70 | $this->inputEncoding = $pValue; |
||
71 | |||
72 | return $this; |
||
73 | } |
||
74 | |||
75 | /** |
||
76 | * Get input encoding. |
||
77 | * |
||
78 | * @return string |
||
79 | */ |
||
80 | public function getInputEncoding() |
||
81 | { |
||
82 | return $this->inputEncoding; |
||
83 | } |
||
84 | |||
85 | /** |
||
86 | * Move filepointer past any BOM marker. |
||
87 | */ |
||
88 | 10 | protected function skipBOM() |
|
120 | } |
||
121 | 10 | } |
|
122 | |||
123 | /** |
||
124 | * Identify any separator that is explicitly set in the file. |
||
125 | */ |
||
126 | 10 | protected function checkSeparator() |
|
127 | { |
||
128 | 10 | $line = fgets($this->fileHandle); |
|
129 | 10 | if ($line === false) { |
|
|
|||
130 | return; |
||
131 | } |
||
132 | |||
133 | 10 | if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) { |
|
134 | $this->delimiter = substr($line, 4, 1); |
||
135 | |||
136 | return; |
||
137 | } |
||
138 | |||
139 | 10 | return $this->skipBOM(); |
|
140 | } |
||
141 | |||
142 | /** |
||
143 | * Infer the separator if it isn't explicitly set in the file or specified by the user. |
||
144 | */ |
||
145 | 10 | protected function inferSeparator() |
|
146 | { |
||
147 | 10 | if ($this->delimiter !== null) { |
|
148 | 4 | return; |
|
149 | } |
||
150 | |||
151 | 8 | $potentialDelimiters = [',', ';', "\t", '|', ':', ' ']; |
|
152 | 8 | $counts = []; |
|
153 | 8 | foreach ($potentialDelimiters as $delimiter) { |
|
154 | 8 | $counts[$delimiter] = []; |
|
155 | } |
||
156 | |||
157 | // Count how many times each of the potential delimiters appears in each line |
||
158 | 8 | $numberLines = 0; |
|
159 | 8 | while (($line = fgets($this->fileHandle)) !== false && (++$numberLines < 1000)) { |
|
160 | // Drop everything that is enclosed to avoid counting false positives in enclosures |
||
161 | 8 | $enclosure = preg_quote($this->enclosure, '/'); |
|
162 | 8 | $line = preg_replace('/(' . $enclosure . '.*' . $enclosure . ')/U', '', $line); |
|
163 | |||
164 | 8 | $countLine = []; |
|
165 | 8 | for ($i = strlen($line) - 1; $i >= 0; --$i) { |
|
166 | 8 | $char = $line[$i]; |
|
167 | 8 | if (isset($counts[$char])) { |
|
168 | 7 | if (!isset($countLine[$char])) { |
|
169 | 7 | $countLine[$char] = 0; |
|
170 | } |
||
171 | 7 | ++$countLine[$char]; |
|
172 | } |
||
173 | } |
||
174 | 8 | foreach ($potentialDelimiters as $delimiter) { |
|
175 | 8 | $counts[$delimiter][] = isset($countLine[$delimiter]) |
|
176 | 7 | ? $countLine[$delimiter] |
|
177 | 8 | : 0; |
|
178 | } |
||
179 | } |
||
180 | |||
181 | // Calculate the mean square deviations for each delimiter (ignoring delimiters that haven't been found consistently) |
||
182 | 8 | $meanSquareDeviations = []; |
|
183 | 8 | $middleIdx = floor(($numberLines - 1) / 2); |
|
184 | |||
185 | 8 | foreach ($potentialDelimiters as $delimiter) { |
|
186 | 8 | $series = $counts[$delimiter]; |
|
187 | 8 | sort($series); |
|
188 | |||
189 | 8 | $median = ($numberLines % 2) |
|
190 | 5 | ? $series[$middleIdx] |
|
191 | 8 | : ($series[$middleIdx] + $series[$middleIdx + 1]) / 2; |
|
192 | |||
193 | 8 | if ($median === 0) { |
|
194 | 8 | continue; |
|
195 | } |
||
196 | |||
197 | 7 | $meanSquareDeviations[$delimiter] = array_reduce( |
|
198 | 7 | $series, |
|
199 | 7 | function ($sum, $value) use ($median) { |
|
200 | 7 | return $sum + pow($value - $median, 2); |
|
201 | 7 | } |
|
202 | 7 | ) / count($series); |
|
203 | } |
||
204 | |||
205 | // ... and pick the delimiter with the smallest mean square deviation (in case of ties, the order in potentialDelimiters is respected) |
||
206 | 8 | $min = INF; |
|
207 | 8 | foreach ($potentialDelimiters as $delimiter) { |
|
208 | 8 | if (!isset($meanSquareDeviations[$delimiter])) { |
|
209 | 8 | continue; |
|
210 | } |
||
211 | |||
212 | 7 | if ($meanSquareDeviations[$delimiter] < $min) { |
|
213 | 7 | $min = $meanSquareDeviations[$delimiter]; |
|
214 | 7 | $this->delimiter = $delimiter; |
|
215 | } |
||
216 | } |
||
217 | |||
218 | // If no delimiter could be detected, fall back to the default |
||
219 | 8 | if ($this->delimiter === null) { |
|
220 | 1 | $this->delimiter = reset($potentialDelimiters); |
|
221 | } |
||
222 | |||
223 | 8 | return $this->skipBOM(); |
|
224 | } |
||
225 | |||
226 | /** |
||
227 | * Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns). |
||
228 | * |
||
229 | * @param string $pFilename |
||
230 | * |
||
231 | * @throws Exception |
||
232 | * |
||
233 | * @return array |
||
234 | */ |
||
235 | public function listWorksheetInfo($pFilename) |
||
236 | { |
||
237 | // Open file |
||
238 | if (!$this->canRead($pFilename)) { |
||
239 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
240 | } |
||
241 | $this->openFile($pFilename); |
||
242 | $fileHandle = $this->fileHandle; |
||
243 | |||
244 | // Skip BOM, if any |
||
245 | $this->skipBOM(); |
||
246 | $this->checkSeparator(); |
||
247 | $this->inferSeparator(); |
||
248 | |||
249 | $worksheetInfo = []; |
||
250 | $worksheetInfo[0]['worksheetName'] = 'Worksheet'; |
||
251 | $worksheetInfo[0]['lastColumnLetter'] = 'A'; |
||
252 | $worksheetInfo[0]['lastColumnIndex'] = 0; |
||
253 | $worksheetInfo[0]['totalRows'] = 0; |
||
254 | $worksheetInfo[0]['totalColumns'] = 0; |
||
255 | |||
256 | // Loop through each line of the file in turn |
||
257 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) { |
||
1 ignored issue
–
show
|
|||
258 | ++$worksheetInfo[0]['totalRows']; |
||
259 | $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); |
||
260 | } |
||
261 | |||
262 | $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); |
||
263 | $worksheetInfo[0]['totalColumns'] = $worksheetInfo[0]['lastColumnIndex'] + 1; |
||
264 | |||
265 | // Close file |
||
266 | fclose($fileHandle); |
||
1 ignored issue
–
show
|
|||
267 | |||
268 | return $worksheetInfo; |
||
269 | } |
||
270 | |||
271 | /** |
||
272 | * Loads Spreadsheet from file. |
||
273 | * |
||
274 | * @param string $pFilename |
||
275 | * |
||
276 | * @throws Exception |
||
277 | * |
||
278 | * @return Spreadsheet |
||
279 | */ |
||
280 | 9 | public function load($pFilename) |
|
281 | { |
||
282 | // Create new Spreadsheet |
||
283 | 9 | $spreadsheet = new Spreadsheet(); |
|
284 | |||
285 | // Load into this instance |
||
286 | 9 | return $this->loadIntoExisting($pFilename, $spreadsheet); |
|
287 | } |
||
288 | |||
289 | /** |
||
290 | * Loads PhpSpreadsheet from file into PhpSpreadsheet instance. |
||
291 | * |
||
292 | * @param string $pFilename |
||
293 | * @param Spreadsheet $spreadsheet |
||
294 | * |
||
295 | * @throws Exception |
||
296 | * |
||
297 | * @return Spreadsheet |
||
298 | */ |
||
299 | 10 | public function loadIntoExisting($pFilename, Spreadsheet $spreadsheet) |
|
300 | { |
||
301 | 10 | $lineEnding = ini_get('auto_detect_line_endings'); |
|
302 | 10 | ini_set('auto_detect_line_endings', true); |
|
303 | |||
304 | // Open file |
||
305 | 10 | if (!$this->canRead($pFilename)) { |
|
306 | throw new Exception($pFilename . ' is an Invalid Spreadsheet file.'); |
||
307 | } |
||
308 | 10 | $this->openFile($pFilename); |
|
309 | 10 | $fileHandle = $this->fileHandle; |
|
310 | |||
311 | // Skip BOM, if any |
||
312 | 10 | $this->skipBOM(); |
|
313 | 10 | $this->checkSeparator(); |
|
314 | 10 | $this->inferSeparator(); |
|
315 | |||
316 | // Create new PhpSpreadsheet object |
||
317 | 10 | while ($spreadsheet->getSheetCount() <= $this->sheetIndex) { |
|
318 | 2 | $spreadsheet->createSheet(); |
|
319 | } |
||
320 | 10 | $sheet = $spreadsheet->setActiveSheetIndex($this->sheetIndex); |
|
321 | |||
322 | // Set our starting row based on whether we're in contiguous mode or not |
||
323 | 10 | $currentRow = 1; |
|
324 | 10 | if ($this->contiguous) { |
|
325 | 1 | $currentRow = ($this->contiguousRow == -1) ? $sheet->getHighestRow() : $this->contiguousRow; |
|
326 | } |
||
327 | |||
328 | // Loop through each line of the file in turn |
||
329 | 10 | while (($rowData = fgetcsv($fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) { |
|
1 ignored issue
–
show
|
|||
330 | 10 | $columnLetter = 'A'; |
|
331 | 10 | foreach ($rowData as $rowDatum) { |
|
332 | 10 | if ($rowDatum != '' && $this->readFilter->readCell($columnLetter, $currentRow)) { |
|
333 | // Convert encoding if necessary |
||
334 | 10 | if ($this->inputEncoding !== 'UTF-8') { |
|
335 | $rowDatum = StringHelper::convertEncoding($rowDatum, 'UTF-8', $this->inputEncoding); |
||
336 | } |
||
337 | |||
338 | // Set cell value |
||
339 | 10 | $sheet->getCell($columnLetter . $currentRow)->setValue($rowDatum); |
|
340 | } |
||
341 | 10 | ++$columnLetter; |
|
342 | } |
||
343 | 10 | ++$currentRow; |
|
344 | } |
||
345 | |||
346 | // Close file |
||
347 | 10 | fclose($fileHandle); |
|
1 ignored issue
–
show
|
|||
348 | |||
349 | 10 | if ($this->contiguous) { |
|
350 | 1 | $this->contiguousRow = $currentRow; |
|
351 | } |
||
352 | |||
353 | 10 | ini_set('auto_detect_line_endings', $lineEnding); |
|
354 | |||
355 | // Return |
||
356 | 10 | return $spreadsheet; |
|
357 | } |
||
358 | |||
359 | /** |
||
360 | * Get delimiter. |
||
361 | * |
||
362 | * @return string |
||
363 | */ |
||
364 | 5 | public function getDelimiter() |
|
367 | } |
||
368 | |||
369 | /** |
||
370 | * Set delimiter. |
||
371 | * |
||
372 | * @param string $delimiter Delimiter, eg: ',' |
||
373 | * |
||
374 | * @return CSV |
||
375 | */ |
||
376 | 2 | public function setDelimiter($delimiter) |
|
377 | { |
||
378 | 2 | $this->delimiter = $delimiter; |
|
379 | |||
380 | 2 | return $this; |
|
381 | } |
||
382 | |||
383 | /** |
||
384 | * Get enclosure. |
||
385 | * |
||
386 | * @return string |
||
387 | */ |
||
388 | public function getEnclosure() |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Set enclosure. |
||
395 | * |
||
396 | * @param string $enclosure Enclosure, defaults to " |
||
397 | * |
||
398 | * @return CSV |
||
399 | */ |
||
400 | 1 | public function setEnclosure($enclosure) |
|
408 | } |
||
409 | |||
410 | /** |
||
411 | * Get sheet index. |
||
412 | * |
||
413 | * @return int |
||
414 | */ |
||
415 | public function getSheetIndex() |
||
416 | { |
||
417 | return $this->sheetIndex; |
||
418 | } |
||
419 | |||
420 | /** |
||
421 | * Set sheet index. |
||
422 | * |
||
423 | * @param int $pValue Sheet index |
||
424 | * |
||
425 | * @return CSV |
||
426 | */ |
||
427 | 3 | public function setSheetIndex($pValue) |
|
428 | { |
||
429 | 3 | $this->sheetIndex = $pValue; |
|
430 | |||
431 | 3 | return $this; |
|
432 | } |
||
433 | |||
434 | /** |
||
435 | * Set Contiguous. |
||
436 | * |
||
437 | * @param bool $contiguous |
||
438 | * |
||
439 | * @return Csv |
||
440 | */ |
||
441 | 1 | public function setContiguous($contiguous) |
|
442 | { |
||
443 | 1 | $this->contiguous = (bool) $contiguous; |
|
444 | 1 | if (!$contiguous) { |
|
445 | $this->contiguousRow = -1; |
||
446 | } |
||
447 | |||
448 | 1 | return $this; |
|
449 | } |
||
450 | |||
451 | /** |
||
452 | * Get Contiguous. |
||
453 | * |
||
454 | * @return bool |
||
455 | */ |
||
456 | public function getContiguous() |
||
459 | } |
||
460 | |||
461 | /** |
||
462 | * Can the current IReader read the file? |
||
463 | * |
||
464 | * @param string $pFilename |
||
465 | * |
||
466 | * @return bool |
||
467 | */ |
||
468 | 18 | public function canRead($pFilename) |
|
469 | { |
||
489 |