1 | <?php |
||||
2 | |||||
3 | namespace SilverStripe\Dev; |
||||
4 | |||||
5 | use League\Csv\EscapeFormula; |
||||
6 | use League\Csv\Reader; |
||||
7 | use SilverStripe\Control\Director; |
||||
8 | use SilverStripe\ORM\DataObject; |
||||
9 | |||||
10 | /** |
||||
11 | * Utility class to facilitate complex CSV-imports by defining column-mappings |
||||
12 | * and custom converters. |
||||
13 | * |
||||
14 | * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as |
||||
15 | * input. |
||||
16 | * |
||||
17 | * @see http://tools.ietf.org/html/rfc4180 |
||||
18 | * |
||||
19 | * @todo Support for deleting existing records not matched in the import |
||||
20 | * (through relation checks) |
||||
21 | */ |
||||
22 | class CsvBulkLoader extends BulkLoader |
||||
23 | { |
||||
24 | |||||
25 | /** |
||||
26 | * Delimiter character (Default: comma). |
||||
27 | * |
||||
28 | * @var string |
||||
29 | */ |
||||
30 | public $delimiter = ','; |
||||
31 | |||||
32 | /** |
||||
33 | * Enclosure character (Default: doublequote) |
||||
34 | * |
||||
35 | * @var string |
||||
36 | */ |
||||
37 | public $enclosure = '"'; |
||||
38 | |||||
39 | /** |
||||
40 | * Identifies if csv the has a header row. |
||||
41 | * |
||||
42 | * @var boolean |
||||
43 | */ |
||||
44 | public $hasHeaderRow = true; |
||||
45 | |||||
46 | /** |
||||
47 | * Number of lines to split large CSV files into. |
||||
48 | * |
||||
49 | * @var int |
||||
50 | * |
||||
51 | * @config |
||||
52 | */ |
||||
53 | private static $lines = 1000; |
||||
54 | |||||
55 | /** |
||||
56 | * @inheritDoc |
||||
57 | */ |
||||
58 | public function preview($filepath) |
||||
59 | { |
||||
60 | return $this->processAll($filepath, true); |
||||
61 | } |
||||
62 | |||||
63 | /** |
||||
64 | * @param string $filepath |
||||
65 | * @param boolean $preview |
||||
66 | * |
||||
67 | * @return null|BulkLoader_Result |
||||
68 | */ |
||||
69 | protected function processAll($filepath, $preview = false) |
||||
70 | { |
||||
71 | $previousDetectLE = ini_get('auto_detect_line_endings'); |
||||
72 | |||||
73 | ini_set('auto_detect_line_endings', true); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
74 | $result = BulkLoader_Result::create(); |
||||
75 | |||||
76 | try { |
||||
77 | $filepath = Director::getAbsFile($filepath); |
||||
78 | $csvReader = Reader::createFromPath($filepath, 'r'); |
||||
79 | $csvReader->setDelimiter($this->delimiter); |
||||
80 | $csvReader->skipInputBOM(); |
||||
81 | |||||
82 | $tabExtractor = function ($row) { |
||||
83 | foreach ($row as &$item) { |
||||
84 | // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import |
||||
85 | $specialChars = implode('', EscapeFormula::FORMULA_STARTING_CHARS); |
||||
86 | $specialChars = preg_quote($specialChars, '/'); |
||||
87 | if (preg_match("/^\t[$specialChars]+.*/", $item)) { |
||||
88 | $item = ltrim($item, "\t"); |
||||
89 | } |
||||
90 | } |
||||
91 | return $row; |
||||
92 | }; |
||||
93 | |||||
94 | if ($this->columnMap) { |
||||
0 ignored issues
–
show
The expression
$this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||||
95 | $headerMap = $this->getNormalisedColumnMap(); |
||||
96 | $remapper = function ($row) use ($headerMap, $tabExtractor) { |
||||
97 | $row = $tabExtractor($row); |
||||
98 | foreach ($headerMap as $column => $renamedColumn) { |
||||
99 | if ($column == $renamedColumn) { |
||||
100 | continue; |
||||
101 | } |
||||
102 | if (array_key_exists($column, $row)) { |
||||
103 | if (strpos($renamedColumn, '_ignore_') !== 0) { |
||||
104 | $row[$renamedColumn] = $row[$column]; |
||||
105 | } |
||||
106 | unset($row[$column]); |
||||
107 | } |
||||
108 | } |
||||
109 | return $row; |
||||
110 | }; |
||||
111 | } else { |
||||
112 | $remapper = $tabExtractor; |
||||
113 | } |
||||
114 | |||||
115 | if ($this->hasHeaderRow) { |
||||
116 | $csvReader->setHeaderOffset(0); |
||||
117 | $rows = $csvReader->getRecords(); |
||||
118 | } elseif ($this->columnMap) { |
||||
0 ignored issues
–
show
The expression
$this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||||
119 | $rows = $csvReader->getRecords($headerMap); |
||||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||||
120 | } |
||||
121 | |||||
122 | foreach ($rows as $row) { |
||||
0 ignored issues
–
show
Comprehensibility
Best Practice
introduced
by
|
|||||
123 | $row = $remapper($row); |
||||
124 | $this->processRecord($row, $this->columnMap, $result, $preview); |
||||
125 | } |
||||
126 | } catch (\Exception $e) { |
||||
127 | $failedMessage = sprintf("Failed to parse %s", $filepath); |
||||
128 | if (Director::isDev()) { |
||||
129 | $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage()); |
||||
130 | } |
||||
131 | print $failedMessage . PHP_EOL; |
||||
132 | } finally { |
||||
133 | ini_set('auto_detect_line_endings', $previousDetectLE); |
||||
134 | } |
||||
135 | return $result; |
||||
136 | } |
||||
137 | |||||
138 | protected function getNormalisedColumnMap() |
||||
139 | { |
||||
140 | $map = []; |
||||
141 | foreach ($this->columnMap as $column => $newColumn) { |
||||
142 | if (strpos($newColumn, "->") === 0) { |
||||
143 | $map[$column] = $column; |
||||
144 | } elseif (is_null($newColumn)) { |
||||
145 | // the column map must consist of unique scalar values |
||||
146 | // `null` can be present multiple times and is not scalar |
||||
147 | // so we name it in a standard way so we can remove it later |
||||
148 | $map[$column] = '_ignore_' . $column; |
||||
149 | } else { |
||||
150 | $map[$column] = $newColumn; |
||||
151 | } |
||||
152 | } |
||||
153 | return $map; |
||||
154 | } |
||||
155 | |||||
156 | /** |
||||
157 | * Splits a large file up into many smaller files. |
||||
158 | * |
||||
159 | * @param string $path Path to large file to split |
||||
160 | * @param int $lines Number of lines per file |
||||
161 | * |
||||
162 | * @return array List of file paths |
||||
163 | */ |
||||
164 | protected function splitFile($path, $lines = null) |
||||
165 | { |
||||
166 | Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream'); |
||||
167 | $previous = ini_get('auto_detect_line_endings'); |
||||
168 | |||||
169 | ini_set('auto_detect_line_endings', true); |
||||
0 ignored issues
–
show
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
170 | |||||
171 | if (!is_int($lines)) { |
||||
172 | $lines = $this->config()->get("lines"); |
||||
173 | } |
||||
174 | |||||
175 | $new = $this->getNewSplitFileName(); |
||||
176 | |||||
177 | $to = fopen($new, 'w+'); |
||||
178 | $from = fopen($path, 'r'); |
||||
179 | |||||
180 | $header = null; |
||||
181 | |||||
182 | if ($this->hasHeaderRow) { |
||||
183 | $header = fgets($from); |
||||
0 ignored issues
–
show
It seems like
$from can also be of type false ; however, parameter $handle of fgets() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
184 | fwrite($to, $header); |
||||
0 ignored issues
–
show
It seems like
$to can also be of type false ; however, parameter $handle of fwrite() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
185 | } |
||||
186 | |||||
187 | $files = array(); |
||||
188 | $files[] = $new; |
||||
189 | |||||
190 | $count = 0; |
||||
191 | |||||
192 | while (!feof($from)) { |
||||
0 ignored issues
–
show
It seems like
$from can also be of type false ; however, parameter $handle of feof() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
193 | fwrite($to, fgets($from)); |
||||
194 | |||||
195 | $count++; |
||||
196 | |||||
197 | if ($count >= $lines) { |
||||
198 | fclose($to); |
||||
0 ignored issues
–
show
It seems like
$to can also be of type false ; however, parameter $handle of fclose() does only seem to accept resource , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
199 | |||||
200 | // get a new temporary file name, to write the next lines to |
||||
201 | $new = $this->getNewSplitFileName(); |
||||
202 | |||||
203 | $to = fopen($new, 'w+'); |
||||
204 | |||||
205 | if ($this->hasHeaderRow) { |
||||
206 | // add the headers to the new file |
||||
207 | fwrite($to, $header); |
||||
208 | } |
||||
209 | |||||
210 | $files[] = $new; |
||||
211 | |||||
212 | $count = 0; |
||||
213 | } |
||||
214 | } |
||||
215 | |||||
216 | fclose($to); |
||||
217 | |||||
218 | ini_set('auto_detect_line_endings', $previous); |
||||
219 | |||||
220 | return $files; |
||||
221 | } |
||||
222 | |||||
223 | /** |
||||
224 | * @return string |
||||
225 | */ |
||||
226 | protected function getNewSplitFileName() |
||||
227 | { |
||||
228 | Deprecation::notice('5.0', 'getNewSplitFileName is deprecated, please name your files yourself'); |
||||
229 | return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv'; |
||||
230 | } |
||||
231 | |||||
232 | /** |
||||
233 | * @param string $filepath |
||||
234 | * @param boolean $preview |
||||
235 | * |
||||
236 | * @return BulkLoader_Result |
||||
237 | */ |
||||
238 | protected function processChunk($filepath, $preview = false) |
||||
239 | { |
||||
240 | Deprecation::notice('5.0', 'processChunk is deprecated, please process rows individually'); |
||||
241 | $results = BulkLoader_Result::create(); |
||||
242 | |||||
243 | $csv = new CSVParser( |
||||
244 | $filepath, |
||||
245 | $this->delimiter, |
||||
246 | $this->enclosure |
||||
247 | ); |
||||
248 | |||||
249 | // ColumnMap has two uses, depending on whether hasHeaderRow is set |
||||
250 | if ($this->columnMap) { |
||||
0 ignored issues
–
show
The expression
$this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||||
251 | // if the map goes to a callback, use the same key value as the map |
||||
252 | // value, rather than function name as multiple keys may use the |
||||
253 | // same callback |
||||
254 | $map = []; |
||||
255 | foreach ($this->columnMap as $k => $v) { |
||||
256 | if (strpos($v, "->") === 0) { |
||||
257 | $map[$k] = $k; |
||||
258 | } else { |
||||
259 | $map[$k] = $v; |
||||
260 | } |
||||
261 | } |
||||
262 | |||||
263 | if ($this->hasHeaderRow) { |
||||
264 | $csv->mapColumns($map); |
||||
265 | } else { |
||||
266 | $csv->provideHeaderRow($map); |
||||
267 | } |
||||
268 | } |
||||
269 | |||||
270 | foreach ($csv as $row) { |
||||
271 | $this->processRecord($row, $this->columnMap, $results, $preview); |
||||
272 | } |
||||
273 | |||||
274 | return $results; |
||||
275 | } |
||||
276 | |||||
277 | /** |
||||
278 | * @todo Better messages for relation checks and duplicate detection |
||||
279 | * Note that columnMap isn't used. |
||||
280 | * |
||||
281 | * @param array $record |
||||
282 | * @param array $columnMap |
||||
283 | * @param BulkLoader_Result $results |
||||
284 | * @param boolean $preview |
||||
285 | * |
||||
286 | * @return int |
||||
287 | */ |
||||
288 | protected function processRecord($record, $columnMap, &$results, $preview = false) |
||||
289 | { |
||||
290 | $class = $this->objectClass; |
||||
291 | |||||
292 | // find existing object, or create new one |
||||
293 | $existingObj = $this->findExistingObject($record, $columnMap); |
||||
294 | /** @var DataObject $obj */ |
||||
295 | $obj = ($existingObj) ? $existingObj : new $class(); |
||||
0 ignored issues
–
show
|
|||||
296 | $schema = DataObject::getSchema(); |
||||
297 | |||||
298 | // first run: find/create any relations and store them on the object |
||||
299 | // we can't combine runs, as other columns might rely on the relation being present |
||||
300 | foreach ($record as $fieldName => $val) { |
||||
301 | // don't bother querying of value is not set |
||||
302 | if ($this->isNullValue($val)) { |
||||
303 | continue; |
||||
304 | } |
||||
305 | |||||
306 | // checking for existing relations |
||||
307 | if (isset($this->relationCallbacks[$fieldName])) { |
||||
308 | // trigger custom search method for finding a relation based on the given value |
||||
309 | // and write it back to the relation (or create a new object) |
||||
310 | $relationName = $this->relationCallbacks[$fieldName]['relationname']; |
||||
311 | /** @var DataObject $relationObj */ |
||||
312 | $relationObj = null; |
||||
313 | if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||||
314 | $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record); |
||||
315 | } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) { |
||||
316 | $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record); |
||||
317 | } |
||||
318 | if (!$relationObj || !$relationObj->exists()) { |
||||
319 | $relationClass = $schema->hasOneComponent(get_class($obj), $relationName); |
||||
320 | $relationObj = new $relationClass(); |
||||
321 | //write if we aren't previewing |
||||
322 | if (!$preview) { |
||||
323 | $relationObj->write(); |
||||
324 | } |
||||
325 | } |
||||
326 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||||
327 | //write if we are not previewing |
||||
328 | if (!$preview) { |
||||
329 | $obj->write(); |
||||
330 | $obj->flushCache(); // avoid relation caching confusion |
||||
331 | } |
||||
332 | } elseif (strpos($fieldName, '.') !== false) { |
||||
333 | // we have a relation column with dot notation |
||||
334 | list($relationName, $columnName) = explode('.', $fieldName); |
||||
335 | // always gives us an component (either empty or existing) |
||||
336 | $relationObj = $obj->getComponent($relationName); |
||||
337 | if (!$preview) { |
||||
338 | $relationObj->write(); |
||||
339 | } |
||||
340 | $obj->{"{$relationName}ID"} = $relationObj->ID; |
||||
341 | |||||
342 | //write if we are not previewing |
||||
343 | if (!$preview) { |
||||
344 | $obj->write(); |
||||
345 | $obj->flushCache(); // avoid relation caching confusion |
||||
346 | } |
||||
347 | } |
||||
348 | } |
||||
349 | |||||
350 | // second run: save data |
||||
351 | |||||
352 | foreach ($record as $fieldName => $val) { |
||||
353 | // break out of the loop if we are previewing |
||||
354 | if ($preview) { |
||||
355 | break; |
||||
356 | } |
||||
357 | |||||
358 | // look up the mapping to see if this needs to map to callback |
||||
359 | $mapped = $this->columnMap && isset($this->columnMap[$fieldName]); |
||||
0 ignored issues
–
show
The expression
$this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using ![]() |
|||||
360 | |||||
361 | if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) { |
||||
362 | $funcName = substr($this->columnMap[$fieldName], 2); |
||||
363 | |||||
364 | $this->$funcName($obj, $val, $record); |
||||
365 | } elseif ($obj->hasMethod("import{$fieldName}")) { |
||||
366 | $obj->{"import{$fieldName}"}($val, $record); |
||||
367 | } else { |
||||
368 | $obj->update(array($fieldName => $val)); |
||||
369 | } |
||||
370 | } |
||||
371 | |||||
372 | // write record |
||||
373 | if (!$preview) { |
||||
374 | $obj->write(); |
||||
375 | } |
||||
376 | |||||
377 | // @todo better message support |
||||
378 | $message = ''; |
||||
379 | |||||
380 | // save to results |
||||
381 | if ($existingObj) { |
||||
0 ignored issues
–
show
|
|||||
382 | $results->addUpdated($obj, $message); |
||||
383 | } else { |
||||
384 | $results->addCreated($obj, $message); |
||||
385 | } |
||||
386 | |||||
387 | $objID = $obj->ID; |
||||
388 | |||||
389 | $obj->destroy(); |
||||
390 | |||||
391 | // memory usage |
||||
392 | unset($existingObj, $obj); |
||||
393 | |||||
394 | return $objID; |
||||
395 | } |
||||
396 | |||||
397 | /** |
||||
398 | * Find an existing objects based on one or more uniqueness columns |
||||
399 | * specified via {@link self::$duplicateChecks}. |
||||
400 | * |
||||
401 | * @todo support $columnMap |
||||
402 | * |
||||
403 | * @param array $record CSV data column |
||||
404 | * @param array $columnMap |
||||
405 | * @return DataObject |
||||
406 | */ |
||||
407 | public function findExistingObject($record, $columnMap = []) |
||||
408 | { |
||||
409 | $SNG_objectClass = singleton($this->objectClass); |
||||
410 | // checking for existing records (only if not already found) |
||||
411 | |||||
412 | foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) { |
||||
413 | $existingRecord = null; |
||||
414 | if (is_string($duplicateCheck)) { |
||||
415 | // Skip current duplicate check if field value is empty |
||||
416 | if (empty($record[$duplicateCheck])) { |
||||
417 | continue; |
||||
418 | } |
||||
419 | |||||
420 | // Check existing record with this value |
||||
421 | $dbFieldValue = $record[$duplicateCheck]; |
||||
422 | $existingRecord = DataObject::get($this->objectClass) |
||||
423 | ->filter($duplicateCheck, $dbFieldValue) |
||||
424 | ->first(); |
||||
425 | |||||
426 | if ($existingRecord) { |
||||
427 | return $existingRecord; |
||||
428 | } |
||||
429 | } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) { |
||||
430 | if ($this->hasMethod($duplicateCheck['callback'])) { |
||||
431 | $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||||
432 | } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) { |
||||
433 | $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record); |
||||
434 | } else { |
||||
435 | user_error("CsvBulkLoader::processRecord():" |
||||
436 | . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR); |
||||
437 | } |
||||
438 | |||||
439 | if ($existingRecord) { |
||||
440 | return $existingRecord; |
||||
441 | } |
||||
442 | } else { |
||||
443 | user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR); |
||||
444 | } |
||||
445 | } |
||||
446 | |||||
447 | return false; |
||||
448 | } |
||||
449 | |||||
450 | /** |
||||
451 | * Determine whether any loaded files should be parsed with a |
||||
452 | * header-row (otherwise we rely on {@link self::$columnMap}. |
||||
453 | * |
||||
454 | * @return boolean |
||||
455 | */ |
||||
456 | public function hasHeaderRow() |
||||
457 | { |
||||
458 | return ($this->hasHeaderRow || isset($this->columnMap)); |
||||
459 | } |
||||
460 | } |
||||
461 |