| Total Complexity | 53 |
| Total Lines | 381 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like CsvBulkLoader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CsvBulkLoader, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 21 | class CsvBulkLoader extends BulkLoader |
||
| 22 | { |
||
| 23 | |||
| 24 | /** |
||
| 25 | * Delimiter character (Default: comma). |
||
| 26 | * |
||
| 27 | * @var string |
||
| 28 | */ |
||
| 29 | public $delimiter = ','; |
||
| 30 | |||
| 31 | /** |
||
| 32 | * Enclosure character (Default: doublequote) |
||
| 33 | * |
||
| 34 | * @var string |
||
| 35 | */ |
||
| 36 | public $enclosure = '"'; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * Identifies if csv the has a header row. |
||
| 40 | * |
||
| 41 | * @var boolean |
||
| 42 | */ |
||
| 43 | public $hasHeaderRow = true; |
||
| 44 | |||
| 45 | /** |
||
| 46 | * Number of lines to split large CSV files into. |
||
| 47 | * |
||
| 48 | * @var int |
||
| 49 | * |
||
| 50 | * @config |
||
| 51 | */ |
||
| 52 | private static $lines = 1000; |
||
| 53 | |||
| 54 | /** |
||
| 55 | * @inheritDoc |
||
| 56 | */ |
||
| 57 | public function preview($filepath) |
||
| 58 | { |
||
| 59 | return $this->processAll($filepath, true); |
||
| 60 | } |
||
| 61 | |||
| 62 | /** |
||
| 63 | * @param string $filepath |
||
| 64 | * @param boolean $preview |
||
| 65 | * |
||
| 66 | * @return null|BulkLoader_Result |
||
| 67 | */ |
||
| 68 | protected function processAll($filepath, $preview = false) |
||
| 69 | { |
||
| 70 | $filepath = Director::getAbsFile($filepath); |
||
| 71 | $files = $this->splitFile($filepath); |
||
| 72 | |||
| 73 | $result = null; |
||
| 74 | $last = null; |
||
|
|
|||
| 75 | |||
| 76 | try { |
||
| 77 | foreach ($files as $file) { |
||
| 78 | $last = $file; |
||
| 79 | |||
| 80 | $next = $this->processChunk($file, $preview); |
||
| 81 | |||
| 82 | if ($result instanceof BulkLoader_Result) { |
||
| 83 | $result->merge($next); |
||
| 84 | } else { |
||
| 85 | $result = $next; |
||
| 86 | } |
||
| 87 | |||
| 88 | @unlink($file); |
||
| 89 | } |
||
| 90 | } catch (Exception $e) { |
||
| 91 | $failedMessage = sprintf("Failed to parse %s", $last); |
||
| 92 | if (Director::isDev()) { |
||
| 93 | $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage()); |
||
| 94 | } |
||
| 95 | print $failedMessage . PHP_EOL; |
||
| 96 | } |
||
| 97 | |||
| 98 | return $result; |
||
| 99 | } |
||
| 100 | |||
| 101 | /** |
||
| 102 | * Splits a large file up into many smaller files. |
||
| 103 | * |
||
| 104 | * @param string $path Path to large file to split |
||
| 105 | * @param int $lines Number of lines per file |
||
| 106 | * |
||
| 107 | * @return array List of file paths |
||
| 108 | */ |
||
| 109 | protected function splitFile($path, $lines = null) |
||
| 110 | { |
||
| 111 | $previous = ini_get('auto_detect_line_endings'); |
||
| 112 | |||
| 113 | ini_set('auto_detect_line_endings', true); |
||
| 114 | |||
| 115 | if (!is_int($lines)) { |
||
| 116 | $lines = $this->config()->get("lines"); |
||
| 117 | } |
||
| 118 | |||
| 119 | $new = $this->getNewSplitFileName(); |
||
| 120 | |||
| 121 | $to = fopen($new, 'w+'); |
||
| 122 | $from = fopen($path, 'r'); |
||
| 123 | |||
| 124 | $header = null; |
||
| 125 | |||
| 126 | if ($this->hasHeaderRow) { |
||
| 127 | $header = fgets($from); |
||
| 128 | fwrite($to, $header); |
||
| 129 | } |
||
| 130 | |||
| 131 | $files = array(); |
||
| 132 | $files[] = $new; |
||
| 133 | |||
| 134 | $count = 0; |
||
| 135 | |||
| 136 | while (!feof($from)) { |
||
| 137 | fwrite($to, fgets($from)); |
||
| 138 | |||
| 139 | $count++; |
||
| 140 | |||
| 141 | if ($count >= $lines) { |
||
| 142 | fclose($to); |
||
| 143 | |||
| 144 | // get a new temporary file name, to write the next lines to |
||
| 145 | $new = $this->getNewSplitFileName(); |
||
| 146 | |||
| 147 | $to = fopen($new, 'w+'); |
||
| 148 | |||
| 149 | if ($this->hasHeaderRow) { |
||
| 150 | // add the headers to the new file |
||
| 151 | fwrite($to, $header); |
||
| 152 | } |
||
| 153 | |||
| 154 | $files[] = $new; |
||
| 155 | |||
| 156 | $count = 0; |
||
| 157 | } |
||
| 158 | } |
||
| 159 | |||
| 160 | fclose($to); |
||
| 161 | |||
| 162 | ini_set('auto_detect_line_endings', $previous); |
||
| 163 | |||
| 164 | return $files; |
||
| 165 | } |
||
| 166 | |||
| 167 | /** |
||
| 168 | * @return string |
||
| 169 | */ |
||
| 170 | protected function getNewSplitFileName() |
||
| 173 | } |
||
| 174 | |||
| 175 | /** |
||
| 176 | * @param string $filepath |
||
| 177 | * @param boolean $preview |
||
| 178 | * |
||
| 179 | * @return BulkLoader_Result |
||
| 180 | */ |
||
| 181 | protected function processChunk($filepath, $preview = false) |
||
| 182 | { |
||
| 183 | $results = BulkLoader_Result::create(); |
||
| 184 | |||
| 185 | $csv = new CSVParser( |
||
| 186 | $filepath, |
||
| 187 | $this->delimiter, |
||
| 188 | $this->enclosure |
||
| 189 | ); |
||
| 190 | |||
| 191 | // ColumnMap has two uses, depending on whether hasHeaderRow is set |
||
| 192 | if ($this->columnMap) { |
||
| 193 | // if the map goes to a callback, use the same key value as the map |
||
| 194 | // value, rather than function name as multiple keys may use the |
||
| 195 | // same callback |
||
| 196 | $map = []; |
||
| 197 | foreach ($this->columnMap as $k => $v) { |
||
| 198 | if (strpos($v, "->") === 0) { |
||
| 199 | $map[$k] = $k; |
||
| 200 | } else { |
||
| 201 | $map[$k] = $v; |
||
| 202 | } |
||
| 203 | } |
||
| 204 | |||
| 205 | if ($this->hasHeaderRow) { |
||
| 206 | $csv->mapColumns($map); |
||
| 207 | } else { |
||
| 208 | $csv->provideHeaderRow($map); |
||
| 209 | } |
||
| 210 | } |
||
| 211 | |||
| 212 | foreach ($csv as $row) { |
||
| 213 | $this->processRecord($row, $this->columnMap, $results, $preview); |
||
| 214 | } |
||
| 215 | |||
| 216 | return $results; |
||
| 217 | } |
||
| 218 | |||
| 219 | /** |
||
| 220 | * @todo Better messages for relation checks and duplicate detection |
||
| 221 | * Note that columnMap isn't used. |
||
| 222 | * |
||
| 223 | * @param array $record |
||
| 224 | * @param array $columnMap |
||
| 225 | * @param BulkLoader_Result $results |
||
| 226 | * @param boolean $preview |
||
| 227 | * |
||
| 228 | * @return int |
||
| 229 | */ |
||
| 230 | protected function processRecord($record, $columnMap, &$results, $preview = false) |
||
| 231 | { |
||
| 232 | $class = $this->objectClass; |
||
| 233 | |||
| 234 | // find existing object, or create new one |
||
| 235 | $existingObj = $this->findExistingObject($record, $columnMap); |
||
| 236 | /** @var DataObject $obj */ |
||
| 237 | $obj = ($existingObj) ? $existingObj : new $class(); |
||
| 238 | $schema = DataObject::getSchema(); |
||
| 239 | |||
| 240 | // first run: find/create any relations and store them on the object |
||
| 241 | // we can't combine runs, as other columns might rely on the relation being present |
||
| 242 | foreach ($record as $fieldName => $val) { |
||
| 243 | // don't bother querying of value is not set |
||
| 244 | if ($this->isNullValue($val)) { |
||
| 245 | continue; |
||
| 246 | } |
||
| 247 | |||
| 248 | // checking for existing relations |
||
| 249 | if (isset($this->relationCallbacks[$fieldName])) { |
||
| 250 | // trigger custom search method for finding a relation based on the given value |
||
| 251 | // and write it back to the relation (or create a new object) |
||
| 252 | $relationName = $this->relationCallbacks[$fieldName]['relationname']; |
||
| 253 | /** @var DataObject $relationObj */ |
||
| 254 | $relationObj = null; |
||
| 255 | if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||
| 256 | $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record); |
||
| 257 | } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||
| 258 | $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record); |
||
| 259 | } |
||
| 260 | if (!$relationObj || !$relationObj->exists()) { |
||
| 261 | $relationClass = $schema->hasOneComponent(get_class($obj), $relationName); |
||
| 262 | $relationObj = new $relationClass(); |
||
| 263 | //write if we aren't previewing |
||
| 264 | if (!$preview) { |
||
| 265 | $relationObj->write(); |
||
| 266 | } |
||
| 267 | } |
||
| 268 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||
| 269 | //write if we are not previewing |
||
| 270 | if (!$preview) { |
||
| 271 | $obj->write(); |
||
| 272 | $obj->flushCache(); // avoid relation caching confusion |
||
| 273 | } |
||
| 274 | } elseif (strpos($fieldName, '.') !== false) { |
||
| 275 | // we have a relation column with dot notation |
||
| 276 | list($relationName, $columnName) = explode('.', $fieldName); |
||
| 277 | // always gives us an component (either empty or existing) |
||
| 278 | $relationObj = $obj->getComponent($relationName); |
||
| 279 | if (!$preview) { |
||
| 280 | $relationObj->write(); |
||
| 281 | } |
||
| 282 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||
| 283 | |||
| 284 | //write if we are not previewing |
||
| 285 | if (!$preview) { |
||
| 286 | $obj->write(); |
||
| 287 | $obj->flushCache(); // avoid relation caching confusion |
||
| 288 | } |
||
| 289 | } |
||
| 290 | } |
||
| 291 | |||
| 292 | // second run: save data |
||
| 293 | |||
| 294 | foreach ($record as $fieldName => $val) { |
||
| 295 | // break out of the loop if we are previewing |
||
| 296 | if ($preview) { |
||
| 297 | break; |
||
| 298 | } |
||
| 299 | |||
| 300 | // look up the mapping to see if this needs to map to callback |
||
| 301 | $mapped = $this->columnMap && isset($this->columnMap[$fieldName]); |
||
| 302 | |||
| 303 | if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) { |
||
| 304 | $funcName = substr($this->columnMap[$fieldName], 2); |
||
| 305 | |||
| 306 | $this->$funcName($obj, $val, $record); |
||
| 307 | } elseif ($obj->hasMethod("import{$fieldName}")) { |
||
| 308 | $obj->{"import{$fieldName}"}($val, $record); |
||
| 309 | } else { |
||
| 310 | $obj->update(array($fieldName => $val)); |
||
| 311 | } |
||
| 312 | } |
||
| 313 | |||
| 314 | // write record |
||
| 315 | if (!$preview) { |
||
| 316 | $obj->write(); |
||
| 317 | } |
||
| 318 | |||
| 319 | // @todo better message support |
||
| 320 | $message = ''; |
||
| 321 | |||
| 322 | // save to results |
||
| 323 | if ($existingObj) { |
||
| 324 | $results->addUpdated($obj, $message); |
||
| 325 | } else { |
||
| 326 | $results->addCreated($obj, $message); |
||
| 327 | } |
||
| 328 | |||
| 329 | $objID = $obj->ID; |
||
| 330 | |||
| 331 | $obj->destroy(); |
||
| 332 | |||
| 333 | // memory usage |
||
| 334 | unset($existingObj); |
||
| 335 | unset($obj); |
||
| 336 | |||
| 337 | return $objID; |
||
| 338 | } |
||
| 339 | |||
| 340 | /** |
||
| 341 | * Find an existing objects based on one or more uniqueness columns |
||
| 342 | * specified via {@link self::$duplicateChecks}. |
||
| 343 | * |
||
| 344 | * @todo support $columnMap |
||
| 345 | * |
||
| 346 | * @param array $record CSV data column |
||
| 347 | * @param array $columnMap |
||
| 348 | * @return DataObject |
||
| 349 | */ |
||
| 350 | public function findExistingObject($record, $columnMap = []) |
||
| 351 | { |
||
| 352 | $SNG_objectClass = singleton($this->objectClass); |
||
| 353 | // checking for existing records (only if not already found) |
||
| 354 | |||
| 355 | foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) { |
||
| 356 | $existingRecord = null; |
||
| 357 | if (is_string($duplicateCheck)) { |
||
| 358 | // Skip current duplicate check if field value is empty |
||
| 359 | if (empty($record[$duplicateCheck])) { |
||
| 360 | continue; |
||
| 361 | } |
||
| 362 | |||
| 363 | // Check existing record with this value |
||
| 364 | $dbFieldValue = $record[$duplicateCheck]; |
||
| 365 | $existingRecord = DataObject::get($this->objectClass) |
||
| 366 | ->filter($duplicateCheck, $dbFieldValue) |
||
| 367 | ->first(); |
||
| 368 | |||
| 369 | if ($existingRecord) { |
||
| 370 | return $existingRecord; |
||
| 371 | } |
||
| 372 | } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) { |
||
| 373 | if ($this->hasMethod($duplicateCheck['callback'])) { |
||
| 374 | $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||
| 375 | } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) { |
||
| 376 | $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||
| 377 | } else { |
||
| 378 | user_error("CsvBulkLoader::processRecord():" |
||
| 379 | . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR); |
||
| 380 | } |
||
| 381 | |||
| 382 | if ($existingRecord) { |
||
| 383 | return $existingRecord; |
||
| 384 | } |
||
| 385 | } else { |
||
| 386 | user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR); |
||
| 387 | } |
||
| 388 | } |
||
| 389 | |||
| 390 | return false; |
||
| 391 | } |
||
| 392 | |||
| 393 | /** |
||
| 394 | * Determine whether any loaded files should be parsed with a |
||
| 395 | * header-row (otherwise we rely on {@link self::$columnMap}. |
||
| 396 | * |
||
| 397 | * @return boolean |
||
| 398 | */ |
||
| 399 | public function hasHeaderRow() |
||
| 402 | } |
||
| 403 | } |
||
| 404 |