Completed
Push — master ( a1f222...7ceccc )
by Emmanuel
03:50
created

DB::loadDataInBatch()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 17
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 8
dl 0
loc 17
rs 10
c 0
b 0
f 0
ccs 7
cts 7
cp 1
cc 3
nc 4
nop 1
crap 3
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package   edyan/neuralyzer
12
 *
13
 * @license   GNU General Public License v2.0
14
 *
15
 * @link      https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Edyan\Neuralyzer\Exception\NeuralizerConfigurationException;
21
use Edyan\Neuralyzer\Exception\NeuralizerException;
22
use Edyan\Neuralyzer\Utils\CSVWriter;
23
use Edyan\Neuralyzer\Utils\Expression;
24
use Edyan\Neuralyzer\Utils\DBUtils;
25
26
/**
27
 * Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL
28
 */
29
class DB extends AbstractAnonymizer
30
{
31
    /**
32
     * Various generic utils
33
     *
34
     * @var Expression
35
     */
36
    private $expression;
37
38
    /**
39
     * Various generic utils
40
     *
41
     * @var DBUtils
42
     */
43
    private $dbUtils;
44
45
    /**
46
     * Various generic utils
47
     *
48
     * @var DBHelper
0 ignored issues
show
Bug introduced by
The type Edyan\Neuralyzer\Anonymizer\DBHelper was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
49
     */
50
    private $dbHelper;
51
52
    /**
53
     * Primary Key
54
     *
55
     * @var string
56
     */
57
    private $priKey;
58
59
    /**
60
     * Define the way we update / insert data
61
     *
62
     * @var string
63
     */
64
    private $mode = 'queries';
65
66
    /**
67
     * Contains queries if returnRes is true
68
     *
69
     * @var array
70
     */
71
    private $queries = [];
72
73
    /**
74
     * File resource for the csv (batch mode)
75
     *
76
     * @var CSVWriter
77
     */
78
    private $csv;
79
80
    /**
81
     * Define available update modes
82
     *
83
     * @var array
84
     */
85
    private $updateMode = [
86
        'queries' => 'doUpdateByQueries',
87
        'batch' => 'doBatchUpdate',
88
    ];
89
90
    /**
91
     * Define available insert modes
92
     *
93
     * @var array
94
     */
95
    private $insertMode = [
96
        'queries' => 'doInsertByQueries',
97
        'batch' => 'doBatchInsert',
98
    ];
99 47
100
101 47
    public function __construct(Expression $expression, DBUtils $dbUtils)
102
    {
103
        $this->expression = $expression;
104 46
        $this->dbUtils = $dbUtils;
105 46
        $this->dbHelper = $this->dbUtils->getDBHelper();
0 ignored issues
show
Documentation Bug introduced by
It seems like $this->dbUtils->getDBHelper() of type Edyan\Neuralyzer\Helper\DB\AbstractDBHelper is incompatible with the declared type Edyan\Neuralyzer\Anonymizer\DBHelper of property $dbHelper.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
106 46
        $this->dbHelper->registerCustomTypes();
107
    }
108
109 46
    /**
110 46
     * Returns the dependency
111 46
     * @return DBUtils
112 46
     */
113
    public function getDbUtils(): DBUtils
114
    {
115
        return $this->dbUtils;
116
    }
117
118
    /**
119
     * Set the mode for update / insert
120 26
     *
121
     * @param string $mode
122 26
     * @throws NeuralizerException
123
     * @return DB
124
     */
125
    public function setMode(string $mode): DB
126
    {
127
        if (!in_array($mode, ['queries', 'batch'])) {
128
            throw new NeuralizerException('Mode could be only queries or batch');
129
        }
130
131 17
        if ($mode === 'batch') {
132
            $this->csv = new CSVWriter();
133 17
            $this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV());
134 1
        }
135
136
        $this->mode = $mode;
137 16
138 9
        return $this;
139 9
    }
140
141
142 16
    /**
143
     * Process an entity by reading / writing to the DB
144 16
     *
145
     * @param string        $entity
146
     * @param callable|null $callback
147
     * @throws \Exception
148
     * @return void|array
149
     */
150
    public function processEntity(string $entity, callable $callback = null): array
151
    {
152
        $this->dbUtils->assertTableExists($entity);
153
154
        $this->priKey = $this->dbUtils->getPrimaryKey($entity);
155
        $this->entityCols = $this->dbUtils->getTableCols($entity);
156 29
        $this->entity = $entity;
157
158 29
        $actionsOnThatEntity = $this->whatToDoWithEntity();
159
        $this->queries = [];
160 28
161 27
        // Wrap everything in a transaction
162 27
        $conn = $this->dbUtils->getConn();
163
        try {
164 27
            $conn->beginTransaction();
165 25
166
            if ($actionsOnThatEntity & self::UPDATE_TABLE) {
167
                $this->updateData($callback);
168
            }
169 25
170
            if ($actionsOnThatEntity & self::INSERT_TABLE) {
171 25
                $this->insertData($callback);
172 5
            }
173 5
174 4
            $conn->commit();
175
        } catch (\Exception $e) {
176
            $conn->rollBack();
177 24
            $conn->close(); // To avoid locks
178 18
179
            throw $e;
180
        }
181 21
182 6
        return $this->queries;
183
    }
184
185 20
186 5
    /**
187 5
     * Update data of db table.
188 5
     *
189
     * @param  callable $callback
190 5
     *
191
     * @throws NeuralizerException
192
     */
193 20
    private function updateData($callback = null): void
194
    {
195
        $queryBuilder = $this->dbUtils->getConn()->createQueryBuilder();
196
        if ($this->limit === 0) {
197
            $this->setLimit($this->dbUtils->countResults($this->entity));
198
        }
199
200
        $this->expression->evaluateExpressions($this->configuration->getPreActions());
201
202
        $startAt = 0; // The first part of the limit (offset)
203
        $num = 0; // The number of rows updated
204 5
        while ($num < $this->limit) {
205
            $rows = $queryBuilder
206 5
                ->select('*')->from($this->entity)
207 5
                ->setFirstResult($startAt)->setMaxResults($this->batchSize)
208 5
                ->orderBy($this->priKey)
209 3
                ->execute();
210
211 5
            // I need to read line by line if I have to update the table
212
            // to make sure I do update by update (slower but no other choice for now)
213 5
            foreach ($rows as $row) {
214 1
                // Call the right method according to the mode
215
                $this->{$this->updateMode[$this->mode]}($row);
216
217 4
                if ($callback !== null) {
218
                    $callback(++$num);
219 3
                }
220
                // Have to exit now as we have reached the max
221
                if ($num >= $this->limit) {
222
                    break 2;
223
                }
224
            }
225
            // Move the offset
226
            // Make sure the loop ends if we have nothing to process
227
            $num = $startAt += $this->batchSize;
228 18
        }
229
        // Run a final method if defined
230 18
        if ($this->mode === 'batch') {
231 18
            $this->loadDataInBatch('update');
232 9
        }
233
234
        $this->expression->evaluateExpressions($this->configuration->getPostActions());
235 18
    }
236 18
237 18
238
    /**
239 17
     * Execute the Update with Doctrine QueryBuilder
240 17
     * @SuppressWarnings("unused") - Used dynamically
241 17
     *
242 17
     * @param  array $row Full row
243
     */
244
    private function doUpdateByQueries(array $row): void
245
    {
246 17
        $data = $this->generateFakeData();
247
248 17
        $queryBuilder = $this->dbUtils->getConn()->createQueryBuilder();
249
        $queryBuilder = $queryBuilder->update($this->entity);
250 14
        foreach ($data as $field => $value) {
251 9
            $value = empty($row[$field]) ?
252
                $this->dbUtils->getEmptyValue($this->entityCols[$field]['type']) :
253
                $value;
254 14
255 14
            $condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]);
256
            $queryBuilder = $queryBuilder->set($field, $condition);
257
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
258
        }
259
        $queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}");
260 5
        $queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]);
261
262
        $this->returnRes === true ?
263 15
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
264 4
            '';
265
266 15
        if ($this->pretend === false) {
267
            $queryBuilder->execute();
268
        }
269
    }
270
271
272
    /**
273
     * Write the line required for a later LOAD DATA (or \copy)
274 13
     * @SuppressWarnings("unused") - Used dynamically
275
     *
276 13
     * @param  array $row Full row
277
     */
278 11
    private function doBatchUpdate(array $row): void
279 11
    {
280 11
        $fakeData = $this->generateFakeData();
281 11
        $data = [];
282 8
        // Go trough all fields, and take a value by priority
283 11
        foreach (array_keys($this->entityCols) as $field) {
284
            // First take the fake data
285 11
            $data[$field] = $row[$field];
286 11
            if (!empty($row[$field]) && array_key_exists($field, $fakeData)) {
287 11
                $data[$field] = $fakeData[$field];
288
            }
289 11
        }
290 11
291
        $this->csv->write($data);
292 11
    }
293 7
294 4
295
    /**
296 11
     * Insert data into table
297 8
     *
298
     * @param  callable $callback
299 10
     */
300
    private function insertData($callback = null): void
301
    {
302
        $this->expression->evaluateExpressions($this->configuration->getPreActions());
303
304
        for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) {
305
            // Call the right method according to the mode
306
            $this->{$this->insertMode[$this->mode]}($rowNum);
307 4
308
            if (!is_null($callback)) {
309 4
                $callback($rowNum);
310 4
            }
311
        }
312 4
313
        // Run a final method if defined
314 4
        if ($this->mode === 'batch') {
315 4
            $this->loadDataInBatch('insert');
316 4
        }
317
318
        $this->expression->evaluateExpressions($this->configuration->getPostActions());
319
    }
320 4
321 4
322
    /**
323
     * Execute an INSERT with Doctrine QueryBuilder
324
     * @SuppressWarnings("unused") - Used dynamically
325
     */
326
    private function doInsertByQueries(): void
327
    {
328 6
        $data = $this->generateFakeData();
329
330 6
        $queryBuilder = $this->dbUtils->getConn()->createQueryBuilder();
331
        $queryBuilder = $queryBuilder->insert($this->entity);
332 6
        foreach ($data as $field => $value) {
333
            $queryBuilder = $queryBuilder->setValue($field, ":$field");
334 6
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
335 4
        }
336
337
        $this->returnRes === true ?
338
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
339
            '';
340 6
341 2
        if ($this->pretend === false) {
342
            $queryBuilder->execute();
343 5
        }
344
    }
345
346
347
    /**
348
     * Write the line required for a later LOAD DATA (or \copy)
349
     * @SuppressWarnings("unused") - Used dynamically
350 4
     */
351
    private function doBatchInsert(): void
352 4
    {
353
        $data = $this->generateFakeData();
354 4
        $this->csv->write($data);
355 4
    }
356 4
357 4
358 4
    /**
359
     * If a file has been created for the batch mode, destroy it
360
     * @SuppressWarnings("unused") - Used dynamically
361 4
     *
362 2
     * @param string $mode "update" or "insert"
363 2
     */
364
    private function loadDataInBatch(string $mode): void
365 4
    {
366 3
        $fields = array_keys($this->configEntites[$this->entity]['cols']);
367
        // Replace by all fields if update as we have to load everything
368 4
        if ($mode === 'update') {
369
            $fields = array_keys($this->entityCols);
370
        }
371
372
        // Load the data from the helper, only if pretend is false
373
        $filename = $this->csv->getRealPath();
374
        $this->dbHelper->setPretend($this->pretend);
375 2
        $sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode);
376
377 2
        $this->returnRes === true ? array_push($this->queries, $sql) : '';
378 2
379 2
        // Destroy the file
380
        unlink($this->csv->getRealPath());
381
    }
382
383
    /**
384
     * Generate fake data for an entity and return it as an Array
385
     *
386
     * @return array
387 6
     * @throws NeuralizerConfigurationException
388
     */
389 6
    protected function generateFakeData(): array
390
    {
391 6
        $this->checkEntityIsInConfig();
392 4
393
        $colsInConfig = $this->configEntites[$this->entity]['cols'];
394
        $row = [];
395
        foreach ($colsInConfig as $colName => $colProps) {
396 6
            $this->checkColIsInEntity($colName);
397 6
398 6
            $data = \call_user_func_array(
399
                [$this->faker, $colProps['method']],
400 5
                $colProps['params']
401
            );
402
403 5
            if (!is_scalar($data)) {
404 5
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
405
                throw new NeuralizerConfigurationException($msg);
406
            }
407
408
            $row[$colName] = trim($data);
409
410
            $colLength = $this->entityCols[$colName]['length'];
411
            // Cut the value if too long ...
412
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
413
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
414
            }
415
        }
416
417
        return $row;
418
    }
419
}
420