| Total Complexity | 69 | 
| Total Lines | 455 | 
| Duplicated Lines | 0 % | 
| Changes | 1 | ||
| Bugs | 0 | Features | 0 | 
Complex classes like CsvBulkLoader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use CsvBulkLoader, and based on these observations, apply Extract Interface, too.
| 1 | <?php  | 
            ||
| 22 | class CsvBulkLoader extends BulkLoader  | 
            ||
| 23 | { | 
            ||
| 24 | |||
| 25 | /**  | 
            ||
| 26 | * Delimiter character (Default: comma).  | 
            ||
| 27 | *  | 
            ||
| 28 | * @var string  | 
            ||
| 29 | */  | 
            ||
| 30 | public $delimiter = ',';  | 
            ||
| 31 | |||
| 32 | /**  | 
            ||
| 33 | * Enclosure character (Default: doublequote)  | 
            ||
| 34 | *  | 
            ||
| 35 | * @var string  | 
            ||
| 36 | */  | 
            ||
| 37 | public $enclosure = '"';  | 
            ||
| 38 | |||
| 39 | /**  | 
            ||
| 40 | * Identifies if csv the has a header row.  | 
            ||
| 41 | *  | 
            ||
| 42 | * @var boolean  | 
            ||
| 43 | */  | 
            ||
| 44 | public $hasHeaderRow = true;  | 
            ||
| 45 | |||
| 46 | /**  | 
            ||
| 47 | * Number of lines to split large CSV files into.  | 
            ||
| 48 | *  | 
            ||
| 49 | * @var int  | 
            ||
| 50 | *  | 
            ||
| 51 | * @config  | 
            ||
| 52 | */  | 
            ||
| 53 | private static $lines = 1000;  | 
            ||
| 54 | |||
| 55 | /**  | 
            ||
| 56 | * @inheritDoc  | 
            ||
| 57 | */  | 
            ||
| 58 | public function preview($filepath)  | 
            ||
| 61 | }  | 
            ||
| 62 | |||
| 63 | /**  | 
            ||
| 64 | * @param string $filepath  | 
            ||
| 65 | * @param boolean $preview  | 
            ||
| 66 | *  | 
            ||
| 67 | * @return null|BulkLoader_Result  | 
            ||
| 68 | */  | 
            ||
| 69 | protected function processAll($filepath, $preview = false)  | 
            ||
| 70 |     { | 
            ||
| 71 |         $this->extend('onBeforeProcessAll', $filepath, $preview); | 
            ||
| 72 | |||
| 73 | $result = BulkLoader_Result::create();  | 
            ||
| 74 | |||
| 75 |         try { | 
            ||
| 76 | $filepath = Director::getAbsFile($filepath);  | 
            ||
| 77 | $csvReader = Reader::createFromPath($filepath, 'r');  | 
            ||
| 78 | $csvReader->setDelimiter($this->delimiter);  | 
            ||
| 79 | |||
| 80 | // league/csv 9  | 
            ||
| 81 |             if (method_exists($csvReader, 'skipInputBOM')) { | 
            ||
| 82 | $csvReader->skipInputBOM();  | 
            ||
| 83 | // league/csv 8  | 
            ||
| 84 |             } else { | 
            ||
| 85 | $csvReader->stripBom(true);  | 
            ||
| 86 | }  | 
            ||
| 87 | |||
| 88 |             $tabExtractor = function ($row, $rowOffset) { | 
            ||
| 89 |                 foreach ($row as &$item) { | 
            ||
| 90 | // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import  | 
            ||
| 91 |                     if (preg_match("/^\t[\-@=\+]+.*/", $item)) { | 
            ||
| 92 | $item = ltrim($item, "\t");  | 
            ||
| 93 | }  | 
            ||
| 94 | }  | 
            ||
| 95 | return $row;  | 
            ||
| 96 | };  | 
            ||
| 97 | |||
| 98 |             if ($this->columnMap) { | 
            ||
| 99 | $headerMap = $this->getNormalisedColumnMap();  | 
            ||
| 100 | |||
| 101 |                 $remapper = function ($row, $rowOffset) use ($headerMap, $tabExtractor) { | 
            ||
| 102 | $row = $tabExtractor($row, $rowOffset);  | 
            ||
| 103 |                     foreach ($headerMap as $column => $renamedColumn) { | 
            ||
| 104 |                         if ($column == $renamedColumn) { | 
            ||
| 105 | continue;  | 
            ||
| 106 | }  | 
            ||
| 107 |                         if (array_key_exists($column, $row)) { | 
            ||
| 108 |                             if (strpos($renamedColumn, '_ignore_') !== 0) { | 
            ||
| 109 | $row[$renamedColumn] = $row[$column];  | 
            ||
| 110 | }  | 
            ||
| 111 | unset($row[$column]);  | 
            ||
| 112 | }  | 
            ||
| 113 | }  | 
            ||
| 114 | return $row;  | 
            ||
| 115 | };  | 
            ||
| 116 |             } else { | 
            ||
| 117 | $remapper = $tabExtractor;  | 
            ||
| 118 | }  | 
            ||
| 119 | |||
| 120 |             if ($this->hasHeaderRow) { | 
            ||
| 121 |                 if (method_exists($csvReader, 'fetchAssoc')) { | 
            ||
| 122 | $rows = $csvReader->fetchAssoc(0, $remapper);  | 
            ||
| 123 |                 } else { | 
            ||
| 124 | $csvReader->setHeaderOffset(0);  | 
            ||
| 125 | $rows = new MapIterator($csvReader->getRecords(), $remapper);  | 
            ||
| 126 | }  | 
            ||
| 127 |             } elseif ($this->columnMap) { | 
            ||
| 128 |                 if (method_exists($csvReader, 'fetchAssoc')) { | 
            ||
| 129 | $rows = $csvReader->fetchAssoc($headerMap, $remapper);  | 
            ||
| 130 |                 } else { | 
            ||
| 131 | $rows = new MapIterator($csvReader->getRecords($headerMap), $remapper);  | 
            ||
| 132 | }  | 
            ||
| 133 | }  | 
            ||
| 134 | |||
| 135 |             foreach ($rows as $row) { | 
            ||
| 136 | $this->processRecord($row, $this->columnMap, $result, $preview);  | 
            ||
| 137 | }  | 
            ||
| 138 |         } catch (\Exception $e) { | 
            ||
| 139 |             $failedMessage = sprintf("Failed to parse %s", $filepath); | 
            ||
| 140 |             if (Director::isDev()) { | 
            ||
| 141 | $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());  | 
            ||
| 142 | }  | 
            ||
| 143 | print $failedMessage . PHP_EOL;  | 
            ||
| 144 | }  | 
            ||
| 145 | |||
| 146 |         $this->extend('onAfterProcessAll', $result, $preview); | 
            ||
| 147 | |||
| 148 | return $result;  | 
            ||
| 149 | }  | 
            ||
| 150 | |||
| 151 | protected function getNormalisedColumnMap()  | 
            ||
| 152 |     { | 
            ||
| 153 | $map = [];  | 
            ||
| 154 |         foreach ($this->columnMap as $column => $newColumn) { | 
            ||
| 155 |             if (is_string($newColumn) && strpos($newColumn, "->") === 0) { | 
            ||
| 156 | $map[$column] = $column;  | 
            ||
| 157 |             } elseif (is_null($newColumn)) { | 
            ||
| 158 | // the column map must consist of unique scalar values  | 
            ||
| 159 | // `null` can be present multiple times and is not scalar  | 
            ||
| 160 | // so we name it in a standard way so we can remove it later  | 
            ||
| 161 | $map[$column] = '_ignore_' . $column;  | 
            ||
| 162 |             } else { | 
            ||
| 163 | $map[$column] = $newColumn;  | 
            ||
| 164 | }  | 
            ||
| 165 | }  | 
            ||
| 166 | return $map;  | 
            ||
| 167 | }  | 
            ||
| 168 | |||
| 169 | /**  | 
            ||
| 170 | * Splits a large file up into many smaller files.  | 
            ||
| 171 | *  | 
            ||
| 172 | * @param string $path Path to large file to split  | 
            ||
| 173 | * @param int $lines Number of lines per file  | 
            ||
| 174 | *  | 
            ||
| 175 | * @return array List of file paths  | 
            ||
| 176 | */  | 
            ||
| 177 | protected function splitFile($path, $lines = null)  | 
            ||
| 178 |     { | 
            ||
| 179 |         Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream'); | 
            ||
| 180 | |||
| 181 |         if (!is_int($lines)) { | 
            ||
| 182 |             $lines = $this->config()->get("lines"); | 
            ||
| 183 | }  | 
            ||
| 184 | |||
| 185 | $new = $this->getNewSplitFileName();  | 
            ||
| 186 | |||
| 187 | $to = fopen($new, 'w+');  | 
            ||
| 188 | $from = fopen($path, 'r');  | 
            ||
| 189 | |||
| 190 | $header = null;  | 
            ||
| 191 | |||
| 192 |         if ($this->hasHeaderRow) { | 
            ||
| 193 | $header = fgets($from);  | 
            ||
| 194 | fwrite($to, $header);  | 
            ||
| 195 | }  | 
            ||
| 196 | |||
| 197 | $files = [];  | 
            ||
| 198 | $files[] = $new;  | 
            ||
| 199 | |||
| 200 | $count = 0;  | 
            ||
| 201 | |||
| 202 |         while (!feof($from)) { | 
            ||
| 203 | fwrite($to, fgets($from));  | 
            ||
| 204 | |||
| 205 | $count++;  | 
            ||
| 206 | |||
| 207 |             if ($count >= $lines) { | 
            ||
| 208 | fclose($to);  | 
            ||
| 209 | |||
| 210 | // get a new temporary file name, to write the next lines to  | 
            ||
| 211 | $new = $this->getNewSplitFileName();  | 
            ||
| 212 | |||
| 213 | $to = fopen($new, 'w+');  | 
            ||
| 214 | |||
| 215 |                 if ($this->hasHeaderRow) { | 
            ||
| 216 | // add the headers to the new file  | 
            ||
| 217 | fwrite($to, $header);  | 
            ||
| 218 | }  | 
            ||
| 219 | |||
| 220 | $files[] = $new;  | 
            ||
| 221 | |||
| 222 | $count = 0;  | 
            ||
| 223 | }  | 
            ||
| 224 | }  | 
            ||
| 225 | |||
| 226 | fclose($to);  | 
            ||
| 227 | |||
| 228 | return $files;  | 
            ||
| 229 | }  | 
            ||
| 230 | |||
| 231 | /**  | 
            ||
| 232 | * @return string  | 
            ||
| 233 | */  | 
            ||
| 234 | protected function getNewSplitFileName()  | 
            ||
| 238 | }  | 
            ||
| 239 | |||
| 240 | /**  | 
            ||
| 241 | * @param string $filepath  | 
            ||
| 242 | * @param boolean $preview  | 
            ||
| 243 | *  | 
            ||
| 244 | * @return BulkLoader_Result  | 
            ||
| 245 | */  | 
            ||
| 246 | protected function processChunk($filepath, $preview = false)  | 
            ||
| 283 | }  | 
            ||
| 284 | |||
| 285 | /**  | 
            ||
| 286 | * @todo Better messages for relation checks and duplicate detection  | 
            ||
| 287 | * Note that columnMap isn't used.  | 
            ||
| 288 | *  | 
            ||
| 289 | * @param array $record  | 
            ||
| 290 | * @param array $columnMap  | 
            ||
| 291 | * @param BulkLoader_Result $results  | 
            ||
| 292 | * @param boolean $preview  | 
            ||
| 293 | *  | 
            ||
| 294 | * @return int  | 
            ||
| 295 | */  | 
            ||
| 296 | protected function processRecord($record, $columnMap, &$results, $preview = false)  | 
            ||
| 409 | }  | 
            ||
| 410 | |||
| 411 | /**  | 
            ||
| 412 | * Find an existing objects based on one or more uniqueness columns  | 
            ||
| 413 |      * specified via {@link self::$duplicateChecks}. | 
            ||
| 414 | *  | 
            ||
| 415 | * @todo support $columnMap  | 
            ||
| 416 | *  | 
            ||
| 417 | * @param array $record CSV data column  | 
            ||
| 418 | * @param array $columnMap  | 
            ||
| 419 | * @return DataObject  | 
            ||
| 420 | */  | 
            ||
| 421 | public function findExistingObject($record, $columnMap = [])  | 
            ||
| 466 | }  | 
            ||
| 467 | |||
| 468 | /**  | 
            ||
| 469 | * Determine whether any loaded files should be parsed with a  | 
            ||
| 470 |      * header-row (otherwise we rely on {@link self::$columnMap}. | 
            ||
| 471 | *  | 
            ||
| 472 | * @return boolean  | 
            ||
| 473 | */  | 
            ||
| 474 | public function hasHeaderRow()  | 
            ||
| 479 |