Completed
Pull Request — master (#9)
by Sander
04:40
created

DB::runDelete()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 3

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 16
c 0
b 0
f 0
ccs 10
cts 10
cp 1
rs 9.9666
cc 3
nc 4
nop 1
crap 3
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package   edyan/neuralyzer
12
 *
13
 * @license   GNU General Public License v2.0
14
 *
15
 * @link      https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Doctrine\DBAL\Configuration as DbalConfiguration;
21
use Doctrine\DBAL\Connection;
22
use Doctrine\DBAL\DriverManager as DbalDriverManager;
23
use Edyan\Neuralyzer\Exception\NeuralizerConfigurationException;
24
use Edyan\Neuralyzer\Exception\NeuralizerException;
25
use Edyan\Neuralyzer\Helper\DB as DBHelper;
26
use Edyan\Neuralyzer\Utils\CSVWriter;
27
use Edyan\Neuralyzer\Utils\DBUtils;
28
29
/**
30
 * Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL
31
 */
32
class DB extends AbstractAnonymizer
33
{
34
    /**
35
     * Doctrine DB Adapter
36
     *
37
     * @var Connection
38
     */
39
    private $conn;
40
41
    /**
42
     * A helper for the current driver
43
     *
44
     * @var DBHelper\AbstractDBHelper
45
     */
46
    private $dbHelper;
47
48
    /**
49
     * Various generic utils
50
     *
51
     * @var DBUtils
52
     */
53
    private $dbUtils;
54
55
    /**
56
     * Primary Key
57
     *
58
     * @var string
59
     */
60
    private $priKey;
61
62
    /**
63
     * Define the way we update / insert data
64
     *
65
     * @var string
66
     */
67
    private $mode = 'queries';
68
69
    /**
70
     * Contains queries if returnRes is true
71
     *
72
     * @var array
73
     */
74
    private $queries = [];
75
76
    /**
77
     * File resource for the csv (batch mode)
78
     *
79
     * @var CSVWriter
80
     */
81
    private $csv;
82
83
    /**
84
     * Define available update modes
85
     *
86
     * @var array
87
     */
88
    private $updateMode = [
89
        'queries' => 'doUpdateByQueries',
90
        'batch' => 'doBatchUpdate',
91
    ];
92
93
    /**
94
     * Define available insert modes
95
     *
96
     * @var array
97
     */
98
    private $insertMode = [
99
        'queries' => 'doInsertByQueries',
100
        'batch' => 'doBatchInsert',
101
    ];
102
103
    /**
104
     * Init connection
105
     *
106
     * @param array $params Parameters to send to Doctrine DB
107
     */
108 46
    public function __construct(array $params)
109
    {
110 46
        $dbHelperClass = DBHelper\DriverGuesser::getDBHelper($params['driver']);
111
112
        // Set specific options
113 45
        $params['driverOptions'] = $dbHelperClass::getDriverOptions();
114 45
        $this->conn = DbalDriverManager::getConnection($params, new DbalConfiguration());
115 45
        $this->conn->setFetchMode(\Doctrine\DBAL\FetchMode::ASSOCIATIVE);
116
117 45
        $this->dbUtils = new DBUtils($this->conn);
118 45
        $this->dbHelper = new $dbHelperClass($this->conn);
119 45
    }
120
121
122
    /**
123
     * Get Doctrine Connection
124
     *
125
     * @return Connection
126
     */
127 25
    public function getConn(): Connection
128
    {
129 25
        return $this->conn;
130
    }
131
132
133
    /**
134
     * Set the mode for update / insert
135
     *
136
     * @param string $mode
137
     *
138
     * @return DB
139
     */
140 16
    public function setMode(string $mode): DB
141
    {
142 16
        if (!in_array($mode, ['queries', 'batch'])) {
143 1
            throw new NeuralizerException('Mode could be only queries or batch');
144
        }
145
146 15
        if ($mode === 'batch') {
147 9
            $this->csv = new CSVWriter();
148 9
            $this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV());
149
        }
150
151 15
        $this->mode = $mode;
152
153 15
        return $this;
154
    }
155
156
157
    /**
158
     * Process an entity by reading / writing to the DB
159
     *
160
     * @param string        $entity
161
     * @param callable|null $callback
162
     *
163
     * @return void|array
164
     */
165 28
    public function processEntity(string $entity, callable $callback = null): array
166
    {
167 28
        $this->dbUtils->assertTableExists($entity);
168
169 27
        $this->priKey = $this->dbUtils->getPrimaryKey($entity);
170 26
        $this->entityCols = $this->dbUtils->getTableCols($entity);
171 26
        $this->entity = $entity;
172
173 26
        $actionsOnThatEntity = $this->whatToDoWithEntity();
174 24
        $this->queries = [];
175
176
        // Wrap everything in a transaction
177
        try {
178 24
            $this->conn->beginTransaction();
179
180 24
            if ($actionsOnThatEntity & self::TRUNCATE_TABLE) {
181 5
                $where = $this->getWhereConditionInConfig();
182 5
                $query = $this->runDelete($where);
183 4
                ($this->returnRes === true ? array_push($this->queries, $query) : '');
184
            }
185
186 23
            if ($actionsOnThatEntity & self::UPDATE_TABLE) {
187 17
                $this->updateData($callback);
188
            }
189
190 21
            if ($actionsOnThatEntity & self::INSERT_TABLE) {
191 6
                $this->insertData($callback);
192
            }
193
194 20
            $this->conn->commit();
195 4
        } catch (\Exception $e) {
196 4
            $this->conn->rollback();
197 4
            $this->conn->close(); // To avoid locks
198
199 4
            throw $e;
200
        }
201
202 20
        return $this->queries;
203
    }
204
205
206
    /**
207
     * Execute the Delete with Doctrine Query Builder
208
     *
209
     * @param string $where
210
     *
211
     * @return string
212
     */
213 5
    private function runDelete(string $where): string
214
    {
215 5
        $queryBuilder = $this->conn->createQueryBuilder();
216 5
        $queryBuilder = $queryBuilder->delete($this->entity);
217 5
        if (!empty($where)) {
218 3
            $queryBuilder = $queryBuilder->where($where);
219
        }
220 5
        $sql = $queryBuilder->getSQL();
221
222 5
        if ($this->pretend === true) {
223 1
            return $sql;
224
        }
225
226 4
        $queryBuilder->execute();
227
228 3
        return $sql;
229
    }
230
231
232
    /**
233
     * Update data of db table.
234
     *
235
     * @param  callable $callback
236
     *
237
     * @throws NeuralizerException
238
     */
239 17
    private function updateData($callback = null): void
240
    {
241 17
        $queryBuilder = $this->conn->createQueryBuilder();
242 17
        if ($this->limit === 0) {
243 9
            $this->setLimit($this->dbUtils->countResults($this->entity));
244
        }
245
246 17
        foreach ($this->configuration->getPreQueries() as $preQuery) {
247
            try {
248
                $this->conn->query($preQuery);
249
            } catch (\Exception $e) {
250
                throw new NeuralizerException($e->getMessage());
251
            }
252
        }
253
254 17
        $startAt = 0; // The first part of the limit (offset)
255 17
        $num = 0; // The number of rows updated
256 17
        while ($num < $this->limit) {
257
            $rows = $queryBuilder
258 16
                ->select('*')->from($this->entity)
259 16
                ->setFirstResult($startAt)->setMaxResults($this->batchSize)
260 16
                ->orderBy($this->priKey)
261 16
                ->execute();
262
263
            // I need to read line by line if I have to update the table
264
            // to make sure I do update by update (slower but no other choice for now)
265 16
            foreach ($rows as $row) {
266
                // Call the right method according to the mode
267 16
                $this->{$this->updateMode[$this->mode]}($row);
268
269 14
                if (!is_null($callback)) {
270 9
                    $callback(++$num);
271
                }
272
                // Have to exit now as we have reached the max
273 14
                if ($num >= $this->limit) {
274 14
                    break 2;
275
                }
276
            }
277
            // Move the offset
278
            // Make sure the loop ends if we have nothing to process
279 5
            $num = $startAt += $this->batchSize;
280
        }
281
        // Run a final method if defined
282 15
        if ($this->mode === 'batch') {
283 4
            $this->loadDataInBatch('update');
284
        }
285
286 15
        foreach ($this->configuration->getPostQueries() as $postQuery) {
287
            try {
288
                $this->conn->query($postQuery);
289
            } catch (\Exception $e) {
290
                throw new NeuralizerException($e->getMessage());
291
            }
292
        }
293 15
    }
294
295
296
    /**
297
     * Execute the Update with Doctrine QueryBuilder
298
     * @SuppressWarnings("unused") - Used dynamically
299
     *
300
     * @param  array $row Full row
301
     */
302 12
    private function doUpdateByQueries(array $row): void
303
    {
304 12
        $data = $this->generateFakeData();
305
306 10
        $queryBuilder = $this->conn->createQueryBuilder();
307 10
        $queryBuilder = $queryBuilder->update($this->entity);
308 10
        foreach ($data as $field => $value) {
309 10
            $value = empty($row[$field]) ?
310 8
                $this->dbUtils->getEmptyValue($this->entityCols[$field]['type']) :
311 10
                $value;
312
313 10
            $condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]);
314 10
            $queryBuilder = $queryBuilder->set($field, $condition);
315 10
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
316
        }
317 10
        $queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}");
318 10
        $queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]);
319
320 10
        $this->returnRes === true ?
321 7
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
322 3
            '';
323
324 10
        if ($this->pretend === false) {
325 7
            $queryBuilder->execute();
326
        }
327 10
    }
328
329
330
    /**
331
     * Write the line required for a later LOAD DATA (or \copy)
332
     * @SuppressWarnings("unused") - Used dynamically
333
     *
334
     * @param  array $row Full row
335
     */
336 4
    private function doBatchUpdate(array $row): void
337
    {
338 4
        $fakeData = $this->generateFakeData();
339 4
        $data = [];
340
        // Go trough all fields, and take a value by priority
341 4
        foreach (array_keys($this->entityCols) as $field) {
342
            // First take the fake data
343 4
            $data[$field] = $row[$field];
344 4
            if (!empty($row[$field]) && array_key_exists($field, $fakeData)) {
345 4
                $data[$field] = $fakeData[$field];
346
            }
347
        }
348
349 4
        $this->csv->write($data);
350 4
    }
351
352
353
    /**
354
     * Insert data into table
355
     *
356
     * @param  callable $callback
357
     */
358 6
    private function insertData($callback = null): void
359
    {
360 6
        for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) {
361
            // Call the right method according to the mode
362 6
            $this->{$this->insertMode[$this->mode]}($rowNum);
363
364 6
            if (!is_null($callback)) {
365 4
                $callback($rowNum);
366
            }
367
        }
368
369
        // Run a final method if defined
370 6
        if ($this->mode === 'batch') {
371 2
            $this->loadDataInBatch('insert');
372
        }
373 5
    }
374
375
376
    /**
377
     * Execute an INSERT with Doctrine QueryBuilder
378
     * @SuppressWarnings("unused") - Used dynamically
379
     */
380 4
    private function doInsertByQueries(): void
381
    {
382 4
        $data = $this->generateFakeData();
383
384 4
        $queryBuilder = $this->conn->createQueryBuilder();
385 4
        $queryBuilder = $queryBuilder->insert($this->entity);
386 4
        foreach ($data as $field => $value) {
387 4
            $queryBuilder = $queryBuilder->setValue($field, ":$field");
388 4
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
389
        }
390
391 4
        $this->returnRes === true ?
392 2
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
393 2
            '';
394
395 4
        if ($this->pretend === false) {
396 3
            $queryBuilder->execute();
397
        }
398 4
    }
399
400
401
    /**
402
     * Write the line required for a later LOAD DATA (or \copy)
403
     * @SuppressWarnings("unused") - Used dynamically
404
     */
405 2
    private function doBatchInsert(): void
406
    {
407 2
        $data = $this->generateFakeData();
408 2
        $this->csv->write($data);
409 2
    }
410
411
412
    /**
413
     * If a file has been created for the batch mode, destroy it
414
     * @SuppressWarnings("unused") - Used dynamically
415
     *
416
     * @param string $mode "update" or "insert"
417
     */
418 6
    private function loadDataInBatch(string $mode): void
419
    {
420 6
        $fields = array_keys($this->configEntites[$this->entity]['cols']);
421
        // Replace by all fields if update as we have to load everything
422 6
        if ($mode === 'update') {
423 4
            $fields = array_keys($this->entityCols);
424
        }
425
426
        // Load the data from the helper, only if pretend is false
427 6
        $filename = $this->csv->getRealPath();
428 6
        $this->dbHelper->setPretend($this->pretend);
429 6
        $sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode);
430
431 5
        $this->returnRes === true ? array_push($this->queries, $sql) : '';
432
433
        // Destroy the file
434 5
        unlink($this->csv->getRealPath());
435 5
    }
436
437
    /**
438
     * Generate fake data for an entity and return it as an Array
439
     *
440
     * @return array
441
     * @throws NeuralizerConfigurationException
442
     */
443 22
    protected function generateFakeData(): array
444
    {
445 22
        $this->checkEntityIsInConfig();
446
447 22
        $colsInConfig = $this->configEntites[$this->entity]['cols'];
448 22
        $row = [];
449 22
        foreach ($colsInConfig as $colName => $colProps) {
450 22
            $this->checkColIsInEntity($colName);
451
452
            // Check if faker is already used for this column name, so we can use the same faker.
453 21
            if (!isset($this->fakers[$colName][$colProps['method']])) {
454 21
                $language = $this->configuration->getConfigValues()['language'];
455 21
                $faker = \Faker\Factory::create($language);
456 21
                $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker));
457
458
                // Check if column should be unique.
459 21
                if ($colProps['method'] === 'uniqueWord') {
460
                    // Count number of unique words to be taken from the dictionary.
461
                    $count = $this->dbUtils->countResults($this->entity);
462
                    $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $count, $language));
463
                    $faker = $faker->unique(true);
464
                }
465 21
                $this->fakers[$colName][$colProps['method']] = $faker;
466
            }
467
468 21
            $faker = $this->fakers[$colName][$colProps['method']];
469
470
            $data = \call_user_func_array(
471 21
                [$faker, $colProps['method']],
472 21
                $colProps['params']
473
            );
474
475 21
            if (!is_scalar($data)) {
476 1
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
477 1
                throw new NeuralizerConfigurationException($msg);
478
            }
479
480 21
            $row[$colName] = trim($data);
481
482 21
            $colLength = $this->entityCols[$colName]['length'];
483
            // Cut the value if too long ...
484 21
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
485 21
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
486
            }
487
        }
488
489 20
        return $row;
490
    }
491
492
}
493