Completed
Pull Request — master (#9)
by Sander
04:49
created

DB::getConn()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 3
ccs 0
cts 2
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 0
crap 2
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package   edyan/neuralyzer
12
 *
13
 * @license   GNU General Public License v2.0
14
 *
15
 * @link      https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Doctrine\DBAL\Configuration as DbalConfiguration;
21
use Doctrine\DBAL\Connection;
22
use Doctrine\DBAL\DriverManager as DbalDriverManager;
23
use Edyan\Neuralyzer\Exception\NeuralizerConfigurationException;
24
use Edyan\Neuralyzer\Exception\NeuralizerException;
25
use Edyan\Neuralyzer\Helper\DB as DBHelper;
26
use Edyan\Neuralyzer\Utils\CSVWriter;
27
use Edyan\Neuralyzer\Utils\DBUtils;
28
29
/**
30
 * Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL
31
 */
32
class DB extends AbstractAnonymizer
33
{
34
    /**
35
     * Doctrine DB Adapter
36
     *
37
     * @var Connection
38
     */
39
    private $conn;
40
41
    /**
42
     * A helper for the current driver
43
     *
44
     * @var DBHelper\AbstractDBHelper
45
     */
46
    private $dbHelper;
47
48
    /**
49
     * Various generic utils
50
     *
51
     * @var DBUtils
52
     */
53
    private $dbUtils;
54
55
    /**
56
     * Primary Key
57
     *
58
     * @var string
59
     */
60
    private $priKey;
61
62
    /**
63
     * Define the way we update / insert data
64
     *
65
     * @var string
66
     */
67
    private $mode = 'queries';
68
69
    /**
70
     * Contains queries if returnRes is true
71
     *
72
     * @var array
73
     */
74
    private $queries = [];
75
76
    /**
77
     * File resource for the csv (batch mode)
78
     *
79
     * @var CSVWriter
80
     */
81
    private $csv;
82
83
    /**
84
     * Define available update modes
85
     *
86
     * @var array
87
     */
88
    private $updateMode = [
89
        'queries' => 'doUpdateByQueries',
90
        'batch' => 'doBatchUpdate',
91
    ];
92
93
    /**
94
     * Define available insert modes
95
     *
96
     * @var array
97
     */
98
    private $insertMode = [
99
        'queries' => 'doInsertByQueries',
100
        'batch' => 'doBatchInsert',
101
    ];
102
103
    /**
104
     * @param array $params Parameters to send to Doctrine DB
105
     *
106
     * @throws \Doctrine\DBAL\DBALException
107
     */
108
    public function initDatabaseConnection(array $params)
109
    {
110
        $dbHelperClass = DBHelper\DriverGuesser::getDBHelper($params['driver']);
111
112
        // Set specific options
113
        $params['driverOptions'] = $dbHelperClass::getDriverOptions();
114
        $this->conn = DbalDriverManager::getConnection($params, new DbalConfiguration());
115
        $this->conn->setFetchMode(\Doctrine\DBAL\FetchMode::ASSOCIATIVE);
116
117
        $this->dbUtils = new DBUtils($this->conn);
118
        $this->dbHelper = new $dbHelperClass($this->conn);
119
    }
120
121
    /**
122
     * Get Doctrine Connection
123
     *
124
     * @return Connection
125
     */
126
    public function getConn(): Connection
127
    {
128
        return $this->conn;
129
    }
130
131
132
    /**
133
     * Set the mode for update / insert
134
     *
135
     * @param string $mode
136
     *
137
     * @return DB
138
     */
139
    public function setMode(string $mode): DB
140
    {
141
        if (!in_array($mode, ['queries', 'batch'])) {
142
            throw new NeuralizerException('Mode could be only queries or batch');
143
        }
144
145
        if ($mode === 'batch') {
146
            $this->csv = new CSVWriter();
147
            $this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV());
148
        }
149
150
        $this->mode = $mode;
151
152
        return $this;
153
    }
154
155
156
    /**
157
     * Process an entity by reading / writing to the DB
158
     *
159
     * @param string        $entity
160
     * @param callable|null $callback
161
     *
162
     * @return void|array
163
     */
164 1
    public function processEntity(string $entity, callable $callback = null): array
165
    {
166 1
        $this->dbUtils->assertTableExists($entity);
167
168
        $this->priKey = $this->dbUtils->getPrimaryKey($entity);
169
        $this->entityCols = $this->dbUtils->getTableCols($entity);
170
        $this->entity = $entity;
171
172
        $actionsOnThatEntity = $this->whatToDoWithEntity();
173
        $this->queries = [];
174
175
        // Wrap everything in a transaction
176
        try {
177
            $this->conn->beginTransaction();
178
179
            if ($actionsOnThatEntity & self::TRUNCATE_TABLE) {
180
                $where = $this->getWhereConditionInConfig();
181
                $query = $this->runDelete($where);
182
                ($this->returnRes === true ? array_push($this->queries, $query) : '');
183
            }
184
185
            if ($actionsOnThatEntity & self::UPDATE_TABLE) {
186
                $this->updateData($callback);
187
            }
188
189
            if ($actionsOnThatEntity & self::INSERT_TABLE) {
190
                $this->insertData($callback);
191
            }
192
193
            $this->conn->commit();
194
        } catch (\Exception $e) {
195
            $this->conn->rollback();
196
            $this->conn->close(); // To avoid locks
197
198
            throw $e;
199
        }
200
201
        return $this->queries;
202
    }
203
204
205
    /**
206
     * Execute the Delete with Doctrine Query Builder
207
     *
208
     * @param string $where
209
     *
210
     * @return string
211
     */
212
    private function runDelete(string $where): string
213
    {
214
        $queryBuilder = $this->conn->createQueryBuilder();
215
        $queryBuilder = $queryBuilder->delete($this->entity);
216
        if (!empty($where)) {
217
            $queryBuilder = $queryBuilder->where($where);
218
        }
219
        $sql = $queryBuilder->getSQL();
220
221
        if ($this->pretend === true) {
222
            return $sql;
223
        }
224
225
        $queryBuilder->execute();
226
227
        return $sql;
228
    }
229
230
231
    /**
232
     * Update data of db table.
233
     *
234
     * @param  callable $callback
235
     *
236
     * @throws NeuralizerException
237
     */
238
    private function updateData($callback = null): void
239
    {
240
        $queryBuilder = $this->conn->createQueryBuilder();
241
        if ($this->limit === 0) {
242
            $this->setLimit($this->dbUtils->countResults($this->entity));
243
        }
244
245
        $this->evaluateExpressionUtils($this->configuration->getPreActions());
246
247
        $startAt = 0; // The first part of the limit (offset)
248
        $num = 0; // The number of rows updated
249
        while ($num < $this->limit) {
250
            $rows = $queryBuilder
251
                ->select('*')->from($this->entity)
252
                ->setFirstResult($startAt)->setMaxResults($this->batchSize)
253
                ->orderBy($this->priKey)
254
                ->execute();
255
256
            // I need to read line by line if I have to update the table
257
            // to make sure I do update by update (slower but no other choice for now)
258
            foreach ($rows as $row) {
259
                // Call the right method according to the mode
260
                $this->{$this->updateMode[$this->mode]}($row);
261
262
                if (!is_null($callback)) {
263
                    $callback(++$num);
264
                }
265
                // Have to exit now as we have reached the max
266
                if ($num >= $this->limit) {
267
                    break 2;
268
                }
269
            }
270
            // Move the offset
271
            // Make sure the loop ends if we have nothing to process
272
            $num = $startAt += $this->batchSize;
273
        }
274
        // Run a final method if defined
275
        if ($this->mode === 'batch') {
276
            $this->loadDataInBatch('update');
277
        }
278
279
        $this->evaluateExpressionUtils($this->configuration->getPostActions());
280
    }
281
282
283
    /**
284
     * Execute the Update with Doctrine QueryBuilder
285
     * @SuppressWarnings("unused") - Used dynamically
286
     *
287
     * @param  array $row Full row
288
     */
289
    private function doUpdateByQueries(array $row): void
290
    {
291
        $data = $this->generateFakeData();
292
293
        $queryBuilder = $this->conn->createQueryBuilder();
294
        $queryBuilder = $queryBuilder->update($this->entity);
295
        foreach ($data as $field => $value) {
296
            $value = empty($row[$field]) ?
297
                $this->dbUtils->getEmptyValue($this->entityCols[$field]['type']) :
298
                $value;
299
300
            $condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]);
301
            $queryBuilder = $queryBuilder->set($field, $condition);
302
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
303
        }
304
        $queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}");
305
        $queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]);
306
307
        $this->returnRes === true ?
308
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
309
            '';
310
311
        if ($this->pretend === false) {
312
            $queryBuilder->execute();
313
        }
314
    }
315
316
317
    /**
318
     * Write the line required for a later LOAD DATA (or \copy)
319
     * @SuppressWarnings("unused") - Used dynamically
320
     *
321
     * @param  array $row Full row
322
     */
323
    private function doBatchUpdate(array $row): void
324
    {
325
        $fakeData = $this->generateFakeData();
326
        $data = [];
327
        // Go trough all fields, and take a value by priority
328
        foreach (array_keys($this->entityCols) as $field) {
329
            // First take the fake data
330
            $data[$field] = $row[$field];
331
            if (!empty($row[$field]) && array_key_exists($field, $fakeData)) {
332
                $data[$field] = $fakeData[$field];
333
            }
334
        }
335
336
        $this->csv->write($data);
337
    }
338
339
340
    /**
341
     * Insert data into table
342
     *
343
     * @param  callable $callback
344
     */
345
    private function insertData($callback = null): void
346
    {
347
        for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) {
348
            // Call the right method according to the mode
349
            $this->{$this->insertMode[$this->mode]}($rowNum);
350
351
            if (!is_null($callback)) {
352
                $callback($rowNum);
353
            }
354
        }
355
356
        // Run a final method if defined
357
        if ($this->mode === 'batch') {
358
            $this->loadDataInBatch('insert');
359
        }
360
    }
361
362
363
    /**
364
     * Execute an INSERT with Doctrine QueryBuilder
365
     * @SuppressWarnings("unused") - Used dynamically
366
     */
367
    private function doInsertByQueries(): void
368
    {
369
        $data = $this->generateFakeData();
370
371
        $queryBuilder = $this->conn->createQueryBuilder();
372
        $queryBuilder = $queryBuilder->insert($this->entity);
373
        foreach ($data as $field => $value) {
374
            $queryBuilder = $queryBuilder->setValue($field, ":$field");
375
            $queryBuilder = $queryBuilder->setParameter(":$field", $value);
376
        }
377
378
        $this->returnRes === true ?
379
            array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) :
380
            '';
381
382
        if ($this->pretend === false) {
383
            $queryBuilder->execute();
384
        }
385
    }
386
387
388
    /**
389
     * Write the line required for a later LOAD DATA (or \copy)
390
     * @SuppressWarnings("unused") - Used dynamically
391
     */
392
    private function doBatchInsert(): void
393
    {
394
        $data = $this->generateFakeData();
395
        $this->csv->write($data);
396
    }
397
398
399
    /**
400
     * If a file has been created for the batch mode, destroy it
401
     * @SuppressWarnings("unused") - Used dynamically
402
     *
403
     * @param string $mode "update" or "insert"
404
     */
405
    private function loadDataInBatch(string $mode): void
406
    {
407
        $fields = array_keys($this->configEntites[$this->entity]['cols']);
408
        // Replace by all fields if update as we have to load everything
409
        if ($mode === 'update') {
410
            $fields = array_keys($this->entityCols);
411
        }
412
413
        // Load the data from the helper, only if pretend is false
414
        $filename = $this->csv->getRealPath();
415
        $this->dbHelper->setPretend($this->pretend);
416
        $sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode);
417
418
        $this->returnRes === true ? array_push($this->queries, $sql) : '';
419
420
        // Destroy the file
421
        unlink($this->csv->getRealPath());
422
    }
423
424
    /**
425
     * Generate fake data for an entity and return it as an Array
426
     *
427
     * @return array
428
     * @throws NeuralizerConfigurationException
429
     */
430
    protected function generateFakeData(): array
431
    {
432
        $this->checkEntityIsInConfig();
433
434
        $colsInConfig = $this->configEntites[$this->entity]['cols'];
435
        $row = [];
436
        foreach ($colsInConfig as $colName => $colProps) {
437
            $this->checkColIsInEntity($colName);
438
439
            // Check if faker is already used for this column name, so we can use the same faker.
440
            if (!isset($this->fakers[$colName][$colProps['method']])) {
441
                $language = $this->configuration->getConfigValues()['language'];
442
                $faker = \Faker\Factory::create($language);
443
                $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker));
444
445
                // Check if column should be unique.
446
                if ($colProps['method'] === 'uniqueWord') {
447
                    // Count number of unique words to be taken from the dictionary.
448
                    $count = $this->dbUtils->countResults($this->entity);
449
                    $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $count, $language));
450
                    $faker = $faker->unique(true);
451
                }
452
                $this->fakers[$colName][$colProps['method']] = $faker;
453
            }
454
455
            $faker = $this->fakers[$colName][$colProps['method']];
456
457
            $data = \call_user_func_array(
458
                [$faker, $colProps['method']],
459
                $colProps['params']
460
            );
461
462
            if (!is_scalar($data)) {
463
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
464
                throw new NeuralizerConfigurationException($msg);
465
            }
466
467
            $row[$colName] = trim($data);
468
469
            $colLength = $this->entityCols[$colName]['length'];
470
            // Cut the value if too long ...
471
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
472
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
473
            }
474
        }
475
476
        return $row;
477
    }
478
}
479