Total Complexity | 53 |
Total Lines | 381 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like CsvBulkLoader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CsvBulkLoader, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class CsvBulkLoader extends BulkLoader |
||
22 | { |
||
23 | |||
24 | /** |
||
25 | * Delimiter character (Default: comma). |
||
26 | * |
||
27 | * @var string |
||
28 | */ |
||
29 | public $delimiter = ','; |
||
30 | |||
31 | /** |
||
32 | * Enclosure character (Default: doublequote) |
||
33 | * |
||
34 | * @var string |
||
35 | */ |
||
36 | public $enclosure = '"'; |
||
37 | |||
38 | /** |
||
39 | * Identifies if csv the has a header row. |
||
40 | * |
||
41 | * @var boolean |
||
42 | */ |
||
43 | public $hasHeaderRow = true; |
||
44 | |||
45 | /** |
||
46 | * Number of lines to split large CSV files into. |
||
47 | * |
||
48 | * @var int |
||
49 | * |
||
50 | * @config |
||
51 | */ |
||
52 | private static $lines = 1000; |
||
53 | |||
54 | /** |
||
55 | * @inheritDoc |
||
56 | */ |
||
57 | public function preview($filepath) |
||
58 | { |
||
59 | return $this->processAll($filepath, true); |
||
60 | } |
||
61 | |||
62 | /** |
||
63 | * @param string $filepath |
||
64 | * @param boolean $preview |
||
65 | * |
||
66 | * @return null|BulkLoader_Result |
||
67 | */ |
||
68 | protected function processAll($filepath, $preview = false) |
||
69 | { |
||
70 | $filepath = Director::getAbsFile($filepath); |
||
71 | $files = $this->splitFile($filepath); |
||
72 | |||
73 | $result = null; |
||
74 | $last = null; |
||
|
|||
75 | |||
76 | try { |
||
77 | foreach ($files as $file) { |
||
78 | $last = $file; |
||
79 | |||
80 | $next = $this->processChunk($file, $preview); |
||
81 | |||
82 | if ($result instanceof BulkLoader_Result) { |
||
83 | $result->merge($next); |
||
84 | } else { |
||
85 | $result = $next; |
||
86 | } |
||
87 | |||
88 | @unlink($file); |
||
89 | } |
||
90 | } catch (Exception $e) { |
||
91 | $failedMessage = sprintf("Failed to parse %s", $last); |
||
92 | if (Director::isDev()) { |
||
93 | $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage()); |
||
94 | } |
||
95 | print $failedMessage . PHP_EOL; |
||
96 | } |
||
97 | |||
98 | return $result; |
||
99 | } |
||
100 | |||
101 | /** |
||
102 | * Splits a large file up into many smaller files. |
||
103 | * |
||
104 | * @param string $path Path to large file to split |
||
105 | * @param int $lines Number of lines per file |
||
106 | * |
||
107 | * @return array List of file paths |
||
108 | */ |
||
109 | protected function splitFile($path, $lines = null) |
||
110 | { |
||
111 | $previous = ini_get('auto_detect_line_endings'); |
||
112 | |||
113 | ini_set('auto_detect_line_endings', true); |
||
114 | |||
115 | if (!is_int($lines)) { |
||
116 | $lines = $this->config()->get("lines"); |
||
117 | } |
||
118 | |||
119 | $new = $this->getNewSplitFileName(); |
||
120 | |||
121 | $to = fopen($new, 'w+'); |
||
122 | $from = fopen($path, 'r'); |
||
123 | |||
124 | $header = null; |
||
125 | |||
126 | if ($this->hasHeaderRow) { |
||
127 | $header = fgets($from); |
||
128 | fwrite($to, $header); |
||
129 | } |
||
130 | |||
131 | $files = array(); |
||
132 | $files[] = $new; |
||
133 | |||
134 | $count = 0; |
||
135 | |||
136 | while (!feof($from)) { |
||
137 | fwrite($to, fgets($from)); |
||
138 | |||
139 | $count++; |
||
140 | |||
141 | if ($count >= $lines) { |
||
142 | fclose($to); |
||
143 | |||
144 | // get a new temporary file name, to write the next lines to |
||
145 | $new = $this->getNewSplitFileName(); |
||
146 | |||
147 | $to = fopen($new, 'w+'); |
||
148 | |||
149 | if ($this->hasHeaderRow) { |
||
150 | // add the headers to the new file |
||
151 | fwrite($to, $header); |
||
152 | } |
||
153 | |||
154 | $files[] = $new; |
||
155 | |||
156 | $count = 0; |
||
157 | } |
||
158 | } |
||
159 | |||
160 | fclose($to); |
||
161 | |||
162 | ini_set('auto_detect_line_endings', $previous); |
||
163 | |||
164 | return $files; |
||
165 | } |
||
166 | |||
167 | /** |
||
168 | * @return string |
||
169 | */ |
||
170 | protected function getNewSplitFileName() |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * @param string $filepath |
||
177 | * @param boolean $preview |
||
178 | * |
||
179 | * @return BulkLoader_Result |
||
180 | */ |
||
181 | protected function processChunk($filepath, $preview = false) |
||
182 | { |
||
183 | $results = BulkLoader_Result::create(); |
||
184 | |||
185 | $csv = new CSVParser( |
||
186 | $filepath, |
||
187 | $this->delimiter, |
||
188 | $this->enclosure |
||
189 | ); |
||
190 | |||
191 | // ColumnMap has two uses, depending on whether hasHeaderRow is set |
||
192 | if ($this->columnMap) { |
||
193 | // if the map goes to a callback, use the same key value as the map |
||
194 | // value, rather than function name as multiple keys may use the |
||
195 | // same callback |
||
196 | $map = []; |
||
197 | foreach ($this->columnMap as $k => $v) { |
||
198 | if (strpos($v, "->") === 0) { |
||
199 | $map[$k] = $k; |
||
200 | } else { |
||
201 | $map[$k] = $v; |
||
202 | } |
||
203 | } |
||
204 | |||
205 | if ($this->hasHeaderRow) { |
||
206 | $csv->mapColumns($map); |
||
207 | } else { |
||
208 | $csv->provideHeaderRow($map); |
||
209 | } |
||
210 | } |
||
211 | |||
212 | foreach ($csv as $row) { |
||
213 | $this->processRecord($row, $this->columnMap, $results, $preview); |
||
214 | } |
||
215 | |||
216 | return $results; |
||
217 | } |
||
218 | |||
219 | /** |
||
220 | * @todo Better messages for relation checks and duplicate detection |
||
221 | * Note that columnMap isn't used. |
||
222 | * |
||
223 | * @param array $record |
||
224 | * @param array $columnMap |
||
225 | * @param BulkLoader_Result $results |
||
226 | * @param boolean $preview |
||
227 | * |
||
228 | * @return int |
||
229 | */ |
||
230 | protected function processRecord($record, $columnMap, &$results, $preview = false) |
||
231 | { |
||
232 | $class = $this->objectClass; |
||
233 | |||
234 | // find existing object, or create new one |
||
235 | $existingObj = $this->findExistingObject($record, $columnMap); |
||
236 | /** @var DataObject $obj */ |
||
237 | $obj = ($existingObj) ? $existingObj : new $class(); |
||
238 | $schema = DataObject::getSchema(); |
||
239 | |||
240 | // first run: find/create any relations and store them on the object |
||
241 | // we can't combine runs, as other columns might rely on the relation being present |
||
242 | foreach ($record as $fieldName => $val) { |
||
243 | // don't bother querying of value is not set |
||
244 | if ($this->isNullValue($val)) { |
||
245 | continue; |
||
246 | } |
||
247 | |||
248 | // checking for existing relations |
||
249 | if (isset($this->relationCallbacks[$fieldName])) { |
||
250 | // trigger custom search method for finding a relation based on the given value |
||
251 | // and write it back to the relation (or create a new object) |
||
252 | $relationName = $this->relationCallbacks[$fieldName]['relationname']; |
||
253 | /** @var DataObject $relationObj */ |
||
254 | $relationObj = null; |
||
255 | if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||
256 | $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record); |
||
257 | } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||
258 | $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record); |
||
259 | } |
||
260 | if (!$relationObj || !$relationObj->exists()) { |
||
261 | $relationClass = $schema->hasOneComponent(get_class($obj), $relationName); |
||
262 | $relationObj = new $relationClass(); |
||
263 | //write if we aren't previewing |
||
264 | if (!$preview) { |
||
265 | $relationObj->write(); |
||
266 | } |
||
267 | } |
||
268 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||
269 | //write if we are not previewing |
||
270 | if (!$preview) { |
||
271 | $obj->write(); |
||
272 | $obj->flushCache(); // avoid relation caching confusion |
||
273 | } |
||
274 | } elseif (strpos($fieldName, '.') !== false) { |
||
275 | // we have a relation column with dot notation |
||
276 | list($relationName, $columnName) = explode('.', $fieldName); |
||
277 | // always gives us an component (either empty or existing) |
||
278 | $relationObj = $obj->getComponent($relationName); |
||
279 | if (!$preview) { |
||
280 | $relationObj->write(); |
||
281 | } |
||
282 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||
283 | |||
284 | //write if we are not previewing |
||
285 | if (!$preview) { |
||
286 | $obj->write(); |
||
287 | $obj->flushCache(); // avoid relation caching confusion |
||
288 | } |
||
289 | } |
||
290 | } |
||
291 | |||
292 | // second run: save data |
||
293 | |||
294 | foreach ($record as $fieldName => $val) { |
||
295 | // break out of the loop if we are previewing |
||
296 | if ($preview) { |
||
297 | break; |
||
298 | } |
||
299 | |||
300 | // look up the mapping to see if this needs to map to callback |
||
301 | $mapped = $this->columnMap && isset($this->columnMap[$fieldName]); |
||
302 | |||
303 | if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) { |
||
304 | $funcName = substr($this->columnMap[$fieldName], 2); |
||
305 | |||
306 | $this->$funcName($obj, $val, $record); |
||
307 | } elseif ($obj->hasMethod("import{$fieldName}")) { |
||
308 | $obj->{"import{$fieldName}"}($val, $record); |
||
309 | } else { |
||
310 | $obj->update(array($fieldName => $val)); |
||
311 | } |
||
312 | } |
||
313 | |||
314 | // write record |
||
315 | if (!$preview) { |
||
316 | $obj->write(); |
||
317 | } |
||
318 | |||
319 | // @todo better message support |
||
320 | $message = ''; |
||
321 | |||
322 | // save to results |
||
323 | if ($existingObj) { |
||
324 | $results->addUpdated($obj, $message); |
||
325 | } else { |
||
326 | $results->addCreated($obj, $message); |
||
327 | } |
||
328 | |||
329 | $objID = $obj->ID; |
||
330 | |||
331 | $obj->destroy(); |
||
332 | |||
333 | // memory usage |
||
334 | unset($existingObj); |
||
335 | unset($obj); |
||
336 | |||
337 | return $objID; |
||
338 | } |
||
339 | |||
340 | /** |
||
341 | * Find an existing objects based on one or more uniqueness columns |
||
342 | * specified via {@link self::$duplicateChecks}. |
||
343 | * |
||
344 | * @todo support $columnMap |
||
345 | * |
||
346 | * @param array $record CSV data column |
||
347 | * @param array $columnMap |
||
348 | * @return DataObject |
||
349 | */ |
||
350 | public function findExistingObject($record, $columnMap = []) |
||
351 | { |
||
352 | $SNG_objectClass = singleton($this->objectClass); |
||
353 | // checking for existing records (only if not already found) |
||
354 | |||
355 | foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) { |
||
356 | $existingRecord = null; |
||
357 | if (is_string($duplicateCheck)) { |
||
358 | // Skip current duplicate check if field value is empty |
||
359 | if (empty($record[$duplicateCheck])) { |
||
360 | continue; |
||
361 | } |
||
362 | |||
363 | // Check existing record with this value |
||
364 | $dbFieldValue = $record[$duplicateCheck]; |
||
365 | $existingRecord = DataObject::get($this->objectClass) |
||
366 | ->filter($duplicateCheck, $dbFieldValue) |
||
367 | ->first(); |
||
368 | |||
369 | if ($existingRecord) { |
||
370 | return $existingRecord; |
||
371 | } |
||
372 | } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) { |
||
373 | if ($this->hasMethod($duplicateCheck['callback'])) { |
||
374 | $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||
375 | } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) { |
||
376 | $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||
377 | } else { |
||
378 | user_error("CsvBulkLoader::processRecord():" |
||
379 | . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR); |
||
380 | } |
||
381 | |||
382 | if ($existingRecord) { |
||
383 | return $existingRecord; |
||
384 | } |
||
385 | } else { |
||
386 | user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR); |
||
387 | } |
||
388 | } |
||
389 | |||
390 | return false; |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Determine whether any loaded files should be parsed with a |
||
395 | * header-row (otherwise we rely on {@link self::$columnMap}. |
||
396 | * |
||
397 | * @return boolean |
||
398 | */ |
||
399 | public function hasHeaderRow() |
||
402 | } |
||
403 | } |
||
404 |