CsvBulkLoader - Code Metrics - silverstripe/silverstripe-framework - Measure and Improve Code Quality continuously with Scrutinizer

CsvBulkLoader F
last analyzed 2020-06-18 12:14 UTC

↳ Parent: Project
Complexity

Total Complexity
Size/Duplication

Total Lines	437
Duplicated Lines	0 %
Importance

Changes	3
Bugs	0	Features	0
Metric	Value
eloc	191
c	3
b	0
f	0
dl	0
loc	437
rs	3.2
wmc	65
9 Methods

Rating	Name	Size	Complexity
A	preview()	3	1
A	getNewSplitFileName()	4	1
B	processChunk()	37	6
B	splitFile()	57	6
F	processRecord()	107	22
B	findExistingObject()	41	10
A	getNormalisedColumnMap()	16	4
A	hasHeaderRow()	3	2
C	processAll()	67	13
How to fix Complexity

<?php

namespace SilverStripe\Dev;

use League\Csv\EscapeFormula;
use League\Csv\Reader;
use SilverStripe\Control\Director;
use SilverStripe\ORM\DataObject;

/**
 * Utility class to facilitate complex CSV-imports by defining column-mappings
 * and custom converters.
 *
 * Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
 * input.
 *
 * @see http://tools.ietf.org/html/rfc4180
 *
 * @todo Support for deleting existing records not matched in the import
 * (through relation checks)
 */
class CsvBulkLoader extends BulkLoader
{

    /**
     * Delimiter character (Default: comma).
     *
     * @var string
     */
    public $delimiter = ',';

    /**
     * Enclosure character (Default: doublequote)
     *
     * @var string
     */
    public $enclosure = '"';

    /**
     * Identifies if csv the has a header row.
     *
     * @var boolean
     */
    public $hasHeaderRow = true;

    /**
     * Number of lines to split large CSV files into.
     *
     * @var int
     *
     * @config
     */
    private static $lines = 1000;

    /**
     * @inheritDoc
     */
    public function preview($filepath)
    {
        return $this->processAll($filepath, true);
    }

    /**
     * @param string $filepath
     * @param boolean $preview
     *
     * @return null|BulkLoader_Result
     */
    protected function processAll($filepath, $preview = false)
    {
        $previousDetectLE = ini_get('auto_detect_line_endings');

        ini_set('auto_detect_line_endings', true);
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);
        $result = BulkLoader_Result::create();

        try {
            $filepath = Director::getAbsFile($filepath);
            $csvReader = Reader::createFromPath($filepath, 'r');
            $csvReader->setDelimiter($this->delimiter);
            $csvReader->skipInputBOM();

            $tabExtractor = function ($row) {
                foreach ($row as &$item) {
                    // [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import
                    $specialChars = implode('', EscapeFormula::FORMULA_STARTING_CHARS);
                    $specialChars = preg_quote($specialChars, '/');
                    if (preg_match("/^\t[$specialChars]+.*/", $item)) {
                        $item = ltrim($item, "\t");
                    }
                }
                return $row;
            };

            if ($this->columnMap) {

                $headerMap = $this->getNormalisedColumnMap();
                $remapper = function ($row) use ($headerMap, $tabExtractor) {
                    $row = $tabExtractor($row);
                    foreach ($headerMap as $column => $renamedColumn) {
                        if ($column == $renamedColumn) {
                            continue;
                        }
                        if (array_key_exists($column, $row)) {
                            if (strpos($renamedColumn, '_ignore_') !== 0) {
                                $row[$renamedColumn] = $row[$column];
                            }
                            unset($row[$column]);
                        }
                    }
                    return $row;
                };
            } else {
                $remapper = $tabExtractor;
            }

            if ($this->hasHeaderRow) {
                $csvReader->setHeaderOffset(0);
                $rows = $csvReader->getRecords();
            } elseif ($this->columnMap) {

                $rows = $csvReader->getRecords($headerMap);

            }

            foreach ($rows as $row) {

                $row = $remapper($row);
                $this->processRecord($row, $this->columnMap, $result, $preview);
            }
        } catch (\Exception $e) {
            $failedMessage = sprintf("Failed to parse %s", $filepath);
            if (Director::isDev()) {
                $failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());
            }
            print $failedMessage . PHP_EOL;
        } finally {
            ini_set('auto_detect_line_endings', $previousDetectLE);
        }
        return $result;
    }

    protected function getNormalisedColumnMap()
    {
        $map = [];
        foreach ($this->columnMap as $column => $newColumn) {
            if (strpos($newColumn, "->") === 0) {
                $map[$column] = $column;
            } elseif (is_null($newColumn)) {
                // the column map must consist of unique scalar values
                // `null` can be present multiple times and is not scalar
                // so we name it in a standard way so we can remove it later
                $map[$column] = '_ignore_' . $column;
            } else {
                $map[$column] = $newColumn;
            }
        }
        return $map;
    }

    /**
     * Splits a large file up into many smaller files.
     *
     * @param string $path Path to large file to split
     * @param int $lines Number of lines per file
     *
     * @return array List of file paths
     */
    protected function splitFile($path, $lines = null)
    {
        Deprecation::notice('5.0', 'splitFile is deprecated, please process files using a stream');
        $previous = ini_get('auto_detect_line_endings');

        ini_set('auto_detect_line_endings', true);
        ini_set('auto_detect_line_endings', /** @scrutinizer ignore-type */ true);

        if (!is_int($lines)) {
            $lines = $this->config()->get("lines");
        }

        $new = $this->getNewSplitFileName();

        $to = fopen($new, 'w+');
        $from = fopen($path, 'r');

        $header = null;

        if ($this->hasHeaderRow) {
            $header = fgets($from);
            $header = fgets(/** @scrutinizer ignore-type */ $from);
            fwrite($to, $header);
            fwrite(/** @scrutinizer ignore-type */ $to, $header);
        }

        $files = array();
        $files[] = $new;

        $count = 0;

        while (!feof($from)) {
        while (!feof(/** @scrutinizer ignore-type */ $from)) {
            fwrite($to, fgets($from));

            $count++;

            if ($count >= $lines) {
                fclose($to);
                fclose(/** @scrutinizer ignore-type */ $to);

                // get a new temporary file name, to write the next lines to
                $new = $this->getNewSplitFileName();

                $to = fopen($new, 'w+');

                if ($this->hasHeaderRow) {
                    // add the headers to the new file
                    fwrite($to, $header);
                }

                $files[] = $new;

                $count = 0;
            }
        }

        fclose($to);

        ini_set('auto_detect_line_endings', $previous);

        return $files;
    }

    /**
     * @return string
     */
    protected function getNewSplitFileName()
    {
        Deprecation::notice('5.0', 'getNewSplitFileName is deprecated, please name your files yourself');
        return TEMP_PATH . DIRECTORY_SEPARATOR . uniqid(str_replace('\\', '_', static::class), true) . '.csv';
    }

    /**
     * @param string $filepath
     * @param boolean $preview
     *
     * @return BulkLoader_Result
     */
    protected function processChunk($filepath, $preview = false)
    {
        Deprecation::notice('5.0', 'processChunk is deprecated, please process rows individually');
        $results = BulkLoader_Result::create();

        $csv = new CSVParser(
            $filepath,
            $this->delimiter,
            $this->enclosure
        );

        // ColumnMap has two uses, depending on whether hasHeaderRow is set
        if ($this->columnMap) {

            // if the map goes to a callback, use the same key value as the map
            // value, rather than function name as multiple keys may use the
            // same callback
            $map = [];
            foreach ($this->columnMap as $k => $v) {
                if (strpos($v, "->") === 0) {
                    $map[$k] = $k;
                } else {
                    $map[$k] = $v;
                }
            }

            if ($this->hasHeaderRow) {
                $csv->mapColumns($map);
            } else {
                $csv->provideHeaderRow($map);
            }
        }

        foreach ($csv as $row) {
            $this->processRecord($row, $this->columnMap, $results, $preview);
        }

        return $results;
    }

    /**
     * @todo Better messages for relation checks and duplicate detection
     * Note that columnMap isn't used.
     *
     * @param array $record
     * @param array $columnMap
     * @param BulkLoader_Result $results
     * @param boolean $preview
     *
     * @return int
     */
    protected function processRecord($record, $columnMap, &$results, $preview = false)
    {
        $class = $this->objectClass;

        // find existing object, or create new one
        $existingObj = $this->findExistingObject($record, $columnMap);
        /** @var DataObject $obj */
        $obj = ($existingObj) ? $existingObj : new $class();

        $schema = DataObject::getSchema();

        // first run: find/create any relations and store them on the object
        // we can't combine runs, as other columns might rely on the relation being present
        foreach ($record as $fieldName => $val) {
            // don't bother querying of value is not set
            if ($this->isNullValue($val)) {
                continue;
            }

            // checking for existing relations
            if (isset($this->relationCallbacks[$fieldName])) {
                // trigger custom search method for finding a relation based on the given value
                // and write it back to the relation (or create a new object)
                $relationName = $this->relationCallbacks[$fieldName]['relationname'];
                /** @var DataObject $relationObj */
                $relationObj = null;
                if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
                    $relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
                } elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
                    $relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
                }
                if (!$relationObj || !$relationObj->exists()) {
                    $relationClass = $schema->hasOneComponent(get_class($obj), $relationName);
                    $relationObj = new $relationClass();
                    //write if we aren't previewing
                    if (!$preview) {
                        $relationObj->write();
                    }
                }
                $obj->{"{$relationName}ID"} = $relationObj->ID;
                //write if we are not previewing
                if (!$preview) {
                    $obj->write();
                    $obj->flushCache(); // avoid relation caching confusion
                }
            } elseif (strpos($fieldName, '.') !== false) {
                // we have a relation column with dot notation
                list($relationName, $columnName) = explode('.', $fieldName);
                // always gives us an component (either empty or existing)
                $relationObj = $obj->getComponent($relationName);
                if (!$preview) {
                    $relationObj->write();
                }
                $obj->{"{$relationName}ID"} = $relationObj->ID;

                //write if we are not previewing
                if (!$preview) {
                    $obj->write();
                    $obj->flushCache(); // avoid relation caching confusion
                }
            }
        }

        // second run: save data

        foreach ($record as $fieldName => $val) {
            // break out of the loop if we are previewing
            if ($preview) {
                break;
            }

            // look up the mapping to see if this needs to map to callback
            $mapped = $this->columnMap && isset($this->columnMap[$fieldName]);


            if ($mapped && strpos($this->columnMap[$fieldName], '->') === 0) {
                $funcName = substr($this->columnMap[$fieldName], 2);

                $this->$funcName($obj, $val, $record);
            } elseif ($obj->hasMethod("import{$fieldName}")) {
                $obj->{"import{$fieldName}"}($val, $record);
            } else {
                $obj->update(array($fieldName => $val));
            }
        }

        // write record
        if (!$preview) {
            $obj->write();
        }

        // @todo better message support
        $message = '';

        // save to results
        if ($existingObj) {

            $results->addUpdated($obj, $message);
        } else {
            $results->addCreated($obj, $message);
        }

        $objID = $obj->ID;

        $obj->destroy();

        // memory usage
        unset($existingObj, $obj);

        return $objID;
    }

    /**
     * Find an existing objects based on one or more uniqueness columns
     * specified via {@link self::$duplicateChecks}.
     *
     * @todo support $columnMap
     *
     * @param array $record CSV data column
     * @param array $columnMap
     * @return DataObject
     */
    public function findExistingObject($record, $columnMap = [])
    {
        $SNG_objectClass = singleton($this->objectClass);
        // checking for existing records (only if not already found)

        foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) {
            $existingRecord = null;
            if (is_string($duplicateCheck)) {
                // Skip current duplicate check if field value is empty
                if (empty($record[$duplicateCheck])) {
                    continue;
                }

                // Check existing record with this value
                $dbFieldValue = $record[$duplicateCheck];
                $existingRecord = DataObject::get($this->objectClass)
                    ->filter($duplicateCheck, $dbFieldValue)
                    ->first();

                if ($existingRecord) {
                    return $existingRecord;
                }
            } elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
                if ($this->hasMethod($duplicateCheck['callback'])) {
                    $existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
                } elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
                    $existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
                } else {
                    user_error("CsvBulkLoader::processRecord():"
                        . " {$duplicateCheck['callback']} not found on importer or object class.", E_USER_ERROR);
                }

                if ($existingRecord) {
                    return $existingRecord;
                }
            } else {
                user_error('CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks', E_USER_ERROR);
            }
        }

        return false;
    }

    /**
     * Determine whether any loaded files should be parsed with a
     * header-row (otherwise we rely on {@link self::$columnMap}.
     *
     * @return boolean
     */
    public function hasHeaderRow()
    {
        return ($this->hasHeaderRow || isset($this->columnMap));
    }
}

silverstripe / silverstripe-framework

CsvBulkLoader F last analyzed 2020-06-18 12:14 UTC

Complexity

Size/Duplication

Importance

9 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like

CsvBulkLoader F
last analyzed 2020-06-18 12:14 UTC