Passed
Push — 4 ( 895921...ae0ece )
by Daniel
08:28
created

CsvBulkLoader::getNewSplitFileName()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\Dev;
4
5
use League\Csv\MapIterator;
6
use League\Csv\Reader;
7
use SilverStripe\Control\Director;
8
use SilverStripe\ORM\DataObject;
9
10
/**
11
 * Utility class to facilitate complex CSV-imports by defining column-mappings
12
 * and custom converters.
13
 *
14
 * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
15
 * input.
16
 *
17
 * @see http://tools.ietf.org/html/rfc4180
18
 *
19
 * @todo Support for deleting existing records not matched in the import
20
 * (through relation checks)
21
 */
22
class CsvBulkLoader extends BulkLoader
23
{
24
25
    /**
26
     * Delimiter character (Default: comma).
27
     *
28
     * @var string
29
     */
30
    public $delimiter = ',';
31
32
    /**
33
     * Enclosure character (Default: doublequote)
34
     *
35
     * @var string
36
     */
37
    public $enclosure = '"';
38
39
    /**
40
     * Identifies if csv the has a header row.
41
     *
42
     * @var boolean
43
     */
44
    public $hasHeaderRow = true;
45
46
    /**
47
     * Number of lines to split large CSV files into.
48
     *
49
     * @var int
50
     *
51
     * @config
52
     */
53
    private static $lines = 1000;
54
55
    /**
56
     * @inheritDoc
57
     */
58
    public function preview($filepath)
59
    {
60
        return $this->processAll($filepath, true);
61
    }
62
63
    /**
64
     * @param string $filepath
65
     * @param boolean $preview
66
     *
67
     * @return null|BulkLoader_Result
68
     */
69
    protected function processAll($filepath, $preview = false)
70
    {
71
        $previousDetectLE = ini_get('auto_detect_line_endings');
72
73
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

73
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
74
        $result = BulkLoader_Result::create();
75
76
        try {
77
            $filepath = Director::getAbsFile($filepath);
78
            $csvReader = Reader::createFromPath($filepath, 'r');
79
            $csvReader->setDelimiter($this->delimiter);
80
81
            // league/csv 9
82
            if (method_exists($csvReader, 'skipInputBOM')) {
83
                $csvReader->skipInputBOM();
84
            // league/csv 8
85
            } else {
86
                $csvReader->stripBom(true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type Iterator expected by parameter $iterator of League\Csv\Reader::stripBOM(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

86
                $csvReader->stripBom(/** @scrutinizer ignore-type */ true);
Loading history...
Bug introduced by
The call to League\Csv\Reader::stripBOM() has too few arguments starting with bom. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

86
                $csvReader->/** @scrutinizer ignore-call */ 
87
                            stripBom(true);

This check compares calls to functions or methods with their respective definitions. If the call has less arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress. Please note the @ignore annotation hint above.

Loading history...
87
            }
88
89
            $tabExtractor = function ($row, $rowOffset) {
90
                foreach ($row as &$item) {
91
                    // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import
92
                    if (preg_match("/^\t[\-@=\+]+.*/", $item)) {
93
                        $item = ltrim($item, "\t");
94
                    }
95
                }
96
                return $row;
97
            };
98
99
            if ($this->columnMap) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
100
                $headerMap = $this->getNormalisedColumnMap();
101
102
                $remapper = function ($row, $rowOffset) use ($headerMap, $tabExtractor) {
103
                    $row = $tabExtractor($row, $rowOffset);
104
                    foreach ($headerMap as $column => $renamedColumn) {
105
                        if ($column == $renamedColumn) {
106
                            continue;
107
                        }
108
                        if (array_key_exists($column, $row)) {
109
                            if (strpos($renamedColumn, '_ignore_') !== 0) {
110
                                $row[$renamedColumn] = $row[$column];
111
                            }
112
                            unset($row[$column]);
113
                        }
114
                    }
115
                    return $row;
116
                };
117
            } else {
118
                $remapper = $tabExtractor;
119
            }
120
121
            if ($this->hasHeaderRow) {
122
                if (method_exists($csvReader, 'fetchAssoc')) {
123
                    $rows = $csvReader->fetchAssoc(0, $remapper);
124
                } else {
125
                    $csvReader->setHeaderOffset(0);
126
                    $rows = new MapIterator($csvReader->getRecords(), $remapper);
127
                }
128
            } elseif ($this->columnMap) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
129
                if (method_exists($csvReader, 'fetchAssoc')) {
130
                    $rows = $csvReader->fetchAssoc($headerMap, $remapper);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $headerMap does not seem to be defined for all execution paths leading up to this point.
Loading history...
131
                } else {
132
                    $rows = new MapIterator($csvReader->getRecords($headerMap), $remapper);
133
                }
134
            }
135
136
            foreach ($rows as $row) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $rows does not seem to be defined for all execution paths leading up to this point.
Loading history...
137
                $this->processRecord($row, $this->columnMap, $result, $preview);
138
            }
139
        } catch (\Exception $e) {
140
            $failedMessage = sprintf("Failed to parse %s", $filepath);
141
            if (Director::isDev()) {
142
                $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());
143
            }
144
            print $failedMessage . PHP_EOL;
145
        } finally {
146
            ini_set('auto_detect_line_endings', $previousDetectLE);
147
        }
148
        return $result;
149
    }
150
151
    protected function getNormalisedColumnMap()
152
    {
153
        $map = [];
154
        foreach ($this->columnMap as $column => $newColumn) {
155
            if (strpos($newColumn, "->") === 0) {
156
                $map[$column] = $column;
157
            } elseif (is_null($newColumn)) {
158
                // the column map must consist of unique scalar values
159
                // `null` can be present multiple times and is not scalar
160
                // so we name it in a standard way so we can remove it later
161
                $map[$column] = '_ignore_' . $column;
162
            } else {
163
                $map[$column] = $newColumn;
164
            }
165
        }
166
        return $map;
167
    }
168
169
    /**
170
     * Splits a large file up into many smaller files.
171
     *
172
     * @param string $path Path to large file to split
173
     * @param int $lines Number of lines per file
174
     *
175
     * @return array List of file paths
176
     */
177
    protected function splitFile($path, $lines = null)
178
    {
179
        Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream');
180
        $previous = ini_get('auto_detect_line_endings');
181
182
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

182
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
183
184
        if (!is_int($lines)) {
185
            $lines = $this->config()->get("lines");
186
        }
187
188
        $new = $this->getNewSplitFileName();
189
190
        $to = fopen($new, 'w+');
191
        $from = fopen($path, 'r');
192
193
        $header = null;
194
195
        if ($this->hasHeaderRow) {
196
            $header = fgets($from);
0 ignored issues
show
Bug introduced by
It seems like $from can also be of type false; however, parameter $handle of fgets() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

196
            $header = fgets(/** @scrutinizer ignore-type */ $from);
Loading history...
197
            fwrite($to, $header);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type false; however, parameter $handle of fwrite() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

197
            fwrite(/** @scrutinizer ignore-type */ $to, $header);
Loading history...
198
        }
199
200
        $files = [];
201
        $files[] = $new;
202
203
        $count = 0;
204
205
        while (!feof($from)) {
0 ignored issues
show
Bug introduced by
It seems like $from can also be of type false; however, parameter $handle of feof() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

205
        while (!feof(/** @scrutinizer ignore-type */ $from)) {
Loading history...
206
            fwrite($to, fgets($from));
207
208
            $count++;
209
210
            if ($count >= $lines) {
211
                fclose($to);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

211
                fclose(/** @scrutinizer ignore-type */ $to);
Loading history...
212
213
                // get a new temporary file name, to write the next lines to
214
                $new = $this->getNewSplitFileName();
215
216
                $to = fopen($new, 'w+');
217
218
                if ($this->hasHeaderRow) {
219
                    // add the headers to the new file
220
                    fwrite($to, $header);
221
                }
222
223
                $files[] = $new;
224
225
                $count = 0;
226
            }
227
        }
228
229
        fclose($to);
230
231
        ini_set('auto_detect_line_endings', $previous);
232
233
        return $files;
234
    }
235
236
    /**
237
     * @return string
238
     */
239
    protected function getNewSplitFileName()
240
    {
241
        Deprecation::notice('5.0', 'getNewSplitFileName is deprecated, please name your files yourself');
242
        return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv';
243
    }
244
245
    /**
246
     * @param string $filepath
247
     * @param boolean $preview
248
     *
249
     * @return BulkLoader_Result
250
     */
251
    protected function processChunk($filepath, $preview = false)
252
    {
253
        Deprecation::notice('5.0', 'processChunk is deprecated, please process rows individually');
254
        $results = BulkLoader_Result::create();
255
256
        $csv = new CSVParser(
257
            $filepath,
258
            $this->delimiter,
259
            $this->enclosure
260
        );
261
262
        // ColumnMap has two uses, depending on whether hasHeaderRow is set
263
        if ($this->columnMap) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
264
            // if the map goes to a callback, use the same key value as the map
265
            // value, rather than function name as multiple keys may use the
266
            // same callback
267
            $map = [];
268
            foreach ($this->columnMap as $k => $v) {
269
                if (strpos($v, "->") === 0) {
270
                    $map[$k] = $k;
271
                } else {
272
                    $map[$k] = $v;
273
                }
274
            }
275
276
            if ($this->hasHeaderRow) {
277
                $csv->mapColumns($map);
278
            } else {
279
                $csv->provideHeaderRow($map);
280
            }
281
        }
282
283
        foreach ($csv as $row) {
284
            $this->processRecord($row, $this->columnMap, $results, $preview);
285
        }
286
287
        return $results;
288
    }
289
290
    /**
291
     * @todo Better messages for relation checks and duplicate detection
292
     * Note that columnMap isn't used.
293
     *
294
     * @param array $record
295
     * @param array $columnMap
296
     * @param BulkLoader_Result $results
297
     * @param boolean $preview
298
     *
299
     * @return int
300
     */
301
    protected function processRecord($record, $columnMap, &$results, $preview = false)
302
    {
303
        $class = $this->objectClass;
304
305
        // find existing object, or create new one
306
        $existingObj = $this->findExistingObject($record, $columnMap);
307
        /** @var DataObject $obj */
308
        $obj = ($existingObj) ? $existingObj : new $class();
0 ignored issues
show
introduced by
$existingObj is of type SilverStripe\ORM\DataObject, thus it always evaluated to true.
Loading history...
309
        $schema = DataObject::getSchema();
310
311
        // first run: find/create any relations and store them on the object
312
        // we can't combine runs, as other columns might rely on the relation being present
313
        foreach ($record as $fieldName => $val) {
314
            // don't bother querying of value is not set
315
            if ($this->isNullValue($val)) {
316
                continue;
317
            }
318
319
            // checking for existing relations
320
            if (isset($this->relationCallbacks[$fieldName])) {
321
                // trigger custom search method for finding a relation based on the given value
322
                // and write it back to the relation (or create a new object)
323
                $relationName = $this->relationCallbacks[$fieldName]['relationname'];
324
                /** @var DataObject $relationObj */
325
                $relationObj = null;
326
                if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
327
                    $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
328
                } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
329
                    $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
330
                }
331
                if (!$relationObj || !$relationObj->exists()) {
332
                    $relationClass = $schema->hasOneComponent(get_class($obj), $relationName);
333
                    $relationObj = new $relationClass();
334
                    //write if we aren't previewing
335
                    if (!$preview) {
336
                        $relationObj->write();
337
                    }
338
                }
339
                $obj->{"{$relationName}ID"} = $relationObj->ID;
340
                //write if we are not previewing
341
                if (!$preview) {
342
                    $obj->write();
343
                    $obj->flushCache(); // avoid relation caching confusion
344
                }
345
            } elseif (strpos($fieldName, '.') !== false) {
346
                // we have a relation column with dot notation
347
                list($relationName, $columnName) = explode('.', $fieldName);
348
                // always gives us an component (either empty or existing)
349
                $relationObj = $obj->getComponent($relationName);
350
                if (!$preview) {
351
                    $relationObj->write();
352
                }
353
                $obj->{"{$relationName}ID"} = $relationObj->ID;
354
355
                //write if we are not previewing
356
                if (!$preview) {
357
                    $obj->write();
358
                    $obj->flushCache(); // avoid relation caching confusion
359
                }
360
            }
361
        }
362
363
        // second run: save data
364
365
        foreach ($record as $fieldName => $val) {
366
            // break out of the loop if we are previewing
367
            if ($preview) {
368
                break;
369
            }
370
371
            // look up the mapping to see if this needs to map to callback
372
            $mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
373
374
            if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
375
                $funcName = substr($this->columnMap[$fieldName], 2);
376
377
                $this->$funcName($obj, $val, $record);
378
            } elseif ($obj->hasMethod("import{$fieldName}")) {
379
                $obj->{"import{$fieldName}"}($val, $record);
380
            } else {
381
                $obj->update([$fieldName => $val]);
382
            }
383
        }
384
385
        // write record
386
        if (!$preview) {
387
            $obj->write();
388
        }
389
390
        // @todo better message support
391
        $message = '';
392
393
        // save to results
394
        if ($existingObj) {
0 ignored issues
show
introduced by
$existingObj is of type SilverStripe\ORM\DataObject, thus it always evaluated to true.
Loading history...
395
            $results->addUpdated($obj, $message);
396
        } else {
397
            $results->addCreated($obj, $message);
398
        }
399
400
        $objID = $obj->ID;
401
402
        $obj->destroy();
403
404
        // memory usage
405
        unset($existingObj, $obj);
406
407
        return $objID;
408
    }
409
410
    /**
411
     * Find an existing objects based on one or more uniqueness columns
412
     * specified via {@link self::$duplicateChecks}.
413
     *
414
     * @todo support $columnMap
415
     *
416
     * @param array $record CSV data column
417
     * @param array $columnMap
418
     * @return DataObject
419
     */
420
    public function findExistingObject($record, $columnMap = [])
421
    {
422
        $SNG_objectClass = singleton($this->objectClass);
423
        // checking for existing records (only if not already found)
424
425
        foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) {
426
            $existingRecord = null;
427
            if (is_string($duplicateCheck)) {
428
                // Skip current duplicate check if field value is empty
429
                if (empty($record[$duplicateCheck])) {
430
                    continue;
431
                }
432
433
                // Check existing record with this value
434
                $dbFieldValue = $record[$duplicateCheck];
435
                $existingRecord = DataObject::get($this->objectClass)
436
                    ->filter($duplicateCheck, $dbFieldValue)
437
                    ->first();
438
439
                if ($existingRecord) {
440
                    return $existingRecord;
441
                }
442
            } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
443
                if ($this->hasMethod($duplicateCheck['callback'])) {
444
                    $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
445
                } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
446
                    $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
447
                } else {
448
                    user_error("CsvBulkLoader::processRecord():"
449
                        . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
450
                }
451
452
                if ($existingRecord) {
453
                    return $existingRecord;
454
                }
455
            } else {
456
                user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
457
            }
458
        }
459
460
        return false;
461
    }
462
463
    /**
464
     * Determine whether any loaded files should be parsed with a
465
     * header-row (otherwise we rely on {@link self::$columnMap}.
466
     *
467
     * @return boolean
468
     */
469
    public function hasHeaderRow()
470
    {
471
        return ($this->hasHeaderRow || isset($this->columnMap));
472
    }
473
}
474