Completed
Push — 4.0 ( b59aea...80f83b )
by Loz
52s queued 21s
created

CsvBulkLoader::getNewSplitFileName()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\Dev;
4
5
use SilverStripe\Control\Director;
6
use SilverStripe\ORM\DataObject;
7
use Exception;
8
9
/**
10
 * Utility class to facilitate complex CSV-imports by defining column-mappings
11
 * and custom converters.
12
 *
13
 * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
14
 * input.
15
 *
16
 * @see http://tools.ietf.org/html/rfc4180
17
 *
18
 * @todo Support for deleting existing records not matched in the import
19
 * (through relation checks)
20
 */
21
class CsvBulkLoader extends BulkLoader
22
{
23
24
    /**
25
     * Delimiter character (Default: comma).
26
     *
27
     * @var string
28
     */
29
    public $delimiter = ',';
30
31
    /**
32
     * Enclosure character (Default: doublequote)
33
     *
34
     * @var string
35
     */
36
    public $enclosure = '"';
37
38
    /**
39
     * Identifies if csv the has a header row.
40
     *
41
     * @var boolean
42
     */
43
    public $hasHeaderRow = true;
44
45
    /**
46
     * Number of lines to split large CSV files into.
47
     *
48
     * @var int
49
     *
50
     * @config
51
     */
52
    private static $lines = 1000;
53
54
    /**
55
     * @inheritDoc
56
     */
57
    public function preview($filepath)
58
    {
59
        return $this->processAll($filepath, true);
60
    }
61
62
    /**
63
     * @param string $filepath
64
     * @param boolean $preview
65
     *
66
     * @return null|BulkLoader_Result
67
     */
68
    protected function processAll($filepath, $preview = false)
69
    {
70
        $filepath = Director::getAbsFile($filepath);
71
        $files = $this->splitFile($filepath);
72
73
        $result = null;
74
        $last = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $last is dead and can be removed.
Loading history...
75
76
        try {
77
            foreach ($files as $file) {
78
                $last = $file;
79
80
                $next = $this->processChunk($file, $preview);
81
82
                if ($result instanceof BulkLoader_Result) {
83
                    $result->merge($next);
84
                } else {
85
                    $result = $next;
86
                }
87
88
                @unlink($file);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for unlink(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

88
                /** @scrutinizer ignore-unhandled */ @unlink($file);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
89
            }
90
        } catch (Exception $e) {
91
            $failedMessage = sprintf("Failed to parse %s", $last);
92
            if (Director::isDev()) {
93
                $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());
94
            }
95
            print $failedMessage . PHP_EOL;
96
        }
97
98
        return $result;
99
    }
100
101
    /**
102
     * Splits a large file up into many smaller files.
103
     *
104
     * @param string $path Path to large file to split
105
     * @param int $lines Number of lines per file
106
     *
107
     * @return array List of file paths
108
     */
109
    protected function splitFile($path, $lines = null)
110
    {
111
        $previous = ini_get('auto_detect_line_endings');
112
113
        ini_set('auto_detect_line_endings', true);
0 ignored issues
show
Bug introduced by
true of type true is incompatible with the type string expected by parameter $newvalue of ini_set(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

113
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
Loading history...
114
115
        if (!is_int($lines)) {
116
            $lines = $this->config()->get("lines");
117
        }
118
119
        $new = $this->getNewSplitFileName();
120
121
        $to = fopen($new, 'w+');
122
        $from = fopen($path, 'r');
123
124
        $header = null;
125
126
        if ($this->hasHeaderRow) {
127
            $header = fgets($from);
0 ignored issues
show
Bug introduced by
It seems like $from can also be of type false; however, parameter $handle of fgets() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

127
            $header = fgets(/** @scrutinizer ignore-type */ $from);
Loading history...
128
            fwrite($to, $header);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type false; however, parameter $handle of fwrite() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

128
            fwrite(/** @scrutinizer ignore-type */ $to, $header);
Loading history...
129
        }
130
131
        $files = array();
132
        $files[] = $new;
133
134
        $count = 0;
135
136
        while (!feof($from)) {
0 ignored issues
show
Bug introduced by
It seems like $from can also be of type false; however, parameter $handle of feof() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

136
        while (!feof(/** @scrutinizer ignore-type */ $from)) {
Loading history...
137
            fwrite($to, fgets($from));
138
139
            $count++;
140
141
            if ($count >= $lines) {
142
                fclose($to);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type false; however, parameter $handle of fclose() does only seem to accept resource, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

142
                fclose(/** @scrutinizer ignore-type */ $to);
Loading history...
143
144
                // get a new temporary file name, to write the next lines to
145
                $new = $this->getNewSplitFileName();
146
147
                $to = fopen($new, 'w+');
148
149
                if ($this->hasHeaderRow) {
150
                    // add the headers to the new file
151
                    fwrite($to, $header);
152
                }
153
154
                $files[] = $new;
155
156
                $count = 0;
157
            }
158
        }
159
160
        fclose($to);
161
162
        ini_set('auto_detect_line_endings', $previous);
163
164
        return $files;
165
    }
166
167
    /**
168
     * @return string
169
     */
170
    protected function getNewSplitFileName()
171
    {
172
        return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv';
173
    }
174
175
    /**
176
     * @param string $filepath
177
     * @param boolean $preview
178
     *
179
     * @return BulkLoader_Result
180
     */
181
    protected function processChunk($filepath, $preview = false)
182
    {
183
        $results = BulkLoader_Result::create();
184
185
        $csv = new CSVParser(
186
            $filepath,
187
            $this->delimiter,
188
            $this->enclosure
189
        );
190
191
        // ColumnMap has two uses, depending on whether hasHeaderRow is set
192
        if ($this->columnMap) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
193
            // if the map goes to a callback, use the same key value as the map
194
            // value, rather than function name as multiple keys may use the
195
            // same callback
196
            $map = [];
197
            foreach ($this->columnMap as $k => $v) {
198
                if (strpos($v, "->") === 0) {
199
                    $map[$k] = $k;
200
                } else {
201
                    $map[$k] = $v;
202
                }
203
            }
204
205
            if ($this->hasHeaderRow) {
206
                $csv->mapColumns($map);
207
            } else {
208
                $csv->provideHeaderRow($map);
209
            }
210
        }
211
212
        foreach ($csv as $row) {
213
            $this->processRecord($row, $this->columnMap, $results, $preview);
214
        }
215
216
        return $results;
217
    }
218
219
    /**
220
     * @todo Better messages for relation checks and duplicate detection
221
     * Note that columnMap isn't used.
222
     *
223
     * @param array $record
224
     * @param array $columnMap
225
     * @param BulkLoader_Result $results
226
     * @param boolean $preview
227
     *
228
     * @return int
229
     */
230
    protected function processRecord($record, $columnMap, &$results, $preview = false)
231
    {
232
        $class = $this->objectClass;
233
234
        // find existing object, or create new one
235
        $existingObj = $this->findExistingObject($record, $columnMap);
236
        /** @var DataObject $obj */
237
        $obj = ($existingObj) ? $existingObj : new $class();
0 ignored issues
show
introduced by
$existingObj is of type SilverStripe\ORM\DataObject, thus it always evaluated to true.
Loading history...
238
        $schema = DataObject::getSchema();
239
240
        // first run: find/create any relations and store them on the object
241
        // we can't combine runs, as other columns might rely on the relation being present
242
        foreach ($record as $fieldName => $val) {
243
            // don't bother querying of value is not set
244
            if ($this->isNullValue($val)) {
245
                continue;
246
            }
247
248
            // checking for existing relations
249
            if (isset($this->relationCallbacks[$fieldName])) {
250
                // trigger custom search method for finding a relation based on the given value
251
                // and write it back to the relation (or create a new object)
252
                $relationName = $this->relationCallbacks[$fieldName]['relationname'];
253
                /** @var DataObject $relationObj */
254
                $relationObj = null;
255
                if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
256
                    $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
257
                } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
258
                    $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
259
                }
260
                if (!$relationObj || !$relationObj->exists()) {
261
                    $relationClass = $schema->hasOneComponent(get_class($obj), $relationName);
262
                    $relationObj = new $relationClass();
263
                    //write if we aren't previewing
264
                    if (!$preview) {
265
                        $relationObj->write();
266
                    }
267
                }
268
                $obj->{"{$relationName}ID"} = $relationObj->ID;
269
                //write if we are not previewing
270
                if (!$preview) {
271
                    $obj->write();
272
                    $obj->flushCache(); // avoid relation caching confusion
273
                }
274
            } elseif (strpos($fieldName, '.') !== false) {
275
                // we have a relation column with dot notation
276
                list($relationName, $columnName) = explode('.', $fieldName);
277
                // always gives us an component (either empty or existing)
278
                $relationObj = $obj->getComponent($relationName);
279
                if (!$preview) {
280
                    $relationObj->write();
281
                }
282
                $obj->{"{$relationName}ID"} = $relationObj->ID;
283
284
                //write if we are not previewing
285
                if (!$preview) {
286
                    $obj->write();
287
                    $obj->flushCache(); // avoid relation caching confusion
288
                }
289
            }
290
        }
291
292
        // second run: save data
293
294
        foreach ($record as $fieldName => $val) {
295
            // break out of the loop if we are previewing
296
            if ($preview) {
297
                break;
298
            }
299
300
            // look up the mapping to see if this needs to map to callback
301
            $mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->columnMap of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
302
303
            if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
304
                $funcName = substr($this->columnMap[$fieldName], 2);
305
306
                $this->$funcName($obj, $val, $record);
307
            } elseif ($obj->hasMethod("import{$fieldName}")) {
308
                $obj->{"import{$fieldName}"}($val, $record);
309
            } else {
310
                $obj->update(array($fieldName => $val));
311
            }
312
        }
313
314
        // write record
315
        if (!$preview) {
316
            $obj->write();
317
        }
318
319
        // @todo better message support
320
        $message = '';
321
322
        // save to results
323
        if ($existingObj) {
0 ignored issues
show
introduced by
$existingObj is of type SilverStripe\ORM\DataObject, thus it always evaluated to true.
Loading history...
324
            $results->addUpdated($obj, $message);
325
        } else {
326
            $results->addCreated($obj, $message);
327
        }
328
329
        $objID = $obj->ID;
330
331
        $obj->destroy();
332
333
        // memory usage
334
        unset($existingObj);
335
        unset($obj);
336
337
        return $objID;
338
    }
339
340
    /**
341
     * Find an existing objects based on one or more uniqueness columns
342
     * specified via {@link self::$duplicateChecks}.
343
     *
344
     * @todo support $columnMap
345
     *
346
     * @param array $record CSV data column
347
     * @param array $columnMap
348
     * @return DataObject
349
     */
350
    public function findExistingObject($record, $columnMap = [])
351
    {
352
        $SNG_objectClass = singleton($this->objectClass);
353
        // checking for existing records (only if not already found)
354
355
        foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) {
356
            $existingRecord = null;
357
            if (is_string($duplicateCheck)) {
358
                // Skip current duplicate check if field value is empty
359
                if (empty($record[$duplicateCheck])) {
360
                    continue;
361
                }
362
363
                // Check existing record with this value
364
                $dbFieldValue = $record[$duplicateCheck];
365
                $existingRecord = DataObject::get($this->objectClass)
366
                    ->filter($duplicateCheck, $dbFieldValue)
367
                    ->first();
368
369
                if ($existingRecord) {
370
                    return $existingRecord;
371
                }
372
            } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
373
                if ($this->hasMethod($duplicateCheck['callback'])) {
374
                    $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
375
                } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
376
                    $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
377
                } else {
378
                    user_error("CsvBulkLoader::processRecord():"
379
                        . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
380
                }
381
382
                if ($existingRecord) {
383
                    return $existingRecord;
384
                }
385
            } else {
386
                user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
387
            }
388
        }
389
390
        return false;
391
    }
392
393
    /**
394
     * Determine whether any loaded files should be parsed with a
395
     * header-row (otherwise we rely on {@link self::$columnMap}.
396
     *
397
     * @return boolean
398
     */
399
    public function hasHeaderRow()
400
    {
401
        return ($this->hasHeaderRow || isset($this->columnMap));
402
    }
403
}
404