1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* neuralyzer : Data Anonymization Library and CLI Tool |
4
|
|
|
* |
5
|
|
|
* PHP Version 7.1 |
6
|
|
|
* |
7
|
|
|
* @author Emmanuel Dyan |
8
|
|
|
* @author Rémi Sauvat |
9
|
|
|
* @copyright 2018 Emmanuel Dyan |
10
|
|
|
* |
11
|
|
|
* @package edyan/neuralyzer |
12
|
|
|
* |
13
|
|
|
* @license GNU General Public License v2.0 |
14
|
|
|
* |
15
|
|
|
* @link https://github.com/edyan/neuralyzer |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
namespace Edyan\Neuralyzer\Anonymizer; |
19
|
|
|
|
20
|
|
|
use Doctrine\DBAL\Configuration as DbalConfiguration; |
21
|
|
|
use Doctrine\DBAL\Connection; |
22
|
|
|
use Doctrine\DBAL\DriverManager as DbalDriverManager; |
23
|
|
|
use Edyan\Neuralyzer\Exception\NeuralizerConfigurationException; |
24
|
|
|
use Edyan\Neuralyzer\Exception\NeuralizerException; |
25
|
|
|
use Edyan\Neuralyzer\Helper\DB as DBHelper; |
26
|
|
|
use Edyan\Neuralyzer\Utils\CSVWriter; |
27
|
|
|
use Edyan\Neuralyzer\Utils\DBUtils; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL |
31
|
|
|
*/ |
32
|
|
|
class DB extends AbstractAnonymizer |
33
|
|
|
{ |
34
|
|
|
/** |
35
|
|
|
* Doctrine DB Adapter |
36
|
|
|
* |
37
|
|
|
* @var Connection |
38
|
|
|
*/ |
39
|
|
|
private $conn; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* A helper for the current driver |
43
|
|
|
* |
44
|
|
|
* @var DBHelper\AbstractDBHelper |
45
|
|
|
*/ |
46
|
|
|
private $dbHelper; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Various generic utils |
50
|
|
|
* |
51
|
|
|
* @var DBUtils |
52
|
|
|
*/ |
53
|
|
|
private $dbUtils; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Primary Key |
57
|
|
|
* |
58
|
|
|
* @var string |
59
|
|
|
*/ |
60
|
|
|
private $priKey; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Define the way we update / insert data |
64
|
|
|
* |
65
|
|
|
* @var string |
66
|
|
|
*/ |
67
|
|
|
private $mode = 'queries'; |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* Contains queries if returnRes is true |
71
|
|
|
* |
72
|
|
|
* @var array |
73
|
|
|
*/ |
74
|
|
|
private $queries = []; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* File resource for the csv (batch mode) |
78
|
|
|
* |
79
|
|
|
* @var CSVWriter |
80
|
|
|
*/ |
81
|
|
|
private $csv; |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Define available update modes |
85
|
|
|
* |
86
|
|
|
* @var array |
87
|
|
|
*/ |
88
|
|
|
private $updateMode = [ |
89
|
|
|
'queries' => 'doUpdateByQueries', |
90
|
|
|
'batch' => 'doBatchUpdate', |
91
|
|
|
]; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Define available insert modes |
95
|
|
|
* |
96
|
|
|
* @var array |
97
|
|
|
*/ |
98
|
|
|
private $insertMode = [ |
99
|
|
|
'queries' => 'doInsertByQueries', |
100
|
|
|
'batch' => 'doBatchInsert', |
101
|
|
|
]; |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* Init connection |
105
|
|
|
* |
106
|
|
|
* @param array $params Parameters to send to Doctrine DB |
107
|
|
|
*/ |
108
|
46 |
|
public function __construct(array $params) |
109
|
|
|
{ |
110
|
46 |
|
$dbHelperClass = DBHelper\DriverGuesser::getDBHelper($params['driver']); |
111
|
|
|
|
112
|
|
|
// Set specific options |
113
|
45 |
|
$params['driverOptions'] = $dbHelperClass::getDriverOptions(); |
114
|
45 |
|
$this->conn = DbalDriverManager::getConnection($params, new DbalConfiguration()); |
115
|
45 |
|
$this->conn->setFetchMode(\Doctrine\DBAL\FetchMode::ASSOCIATIVE); |
116
|
|
|
|
117
|
45 |
|
$this->dbUtils = new DBUtils($this->conn); |
118
|
45 |
|
$this->dbHelper = new $dbHelperClass($this->conn); |
119
|
45 |
|
} |
120
|
|
|
|
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Get Doctrine Connection |
124
|
|
|
* |
125
|
|
|
* @return Connection |
126
|
|
|
*/ |
127
|
25 |
|
public function getConn(): Connection |
128
|
|
|
{ |
129
|
25 |
|
return $this->conn; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
|
133
|
|
|
/** |
134
|
|
|
* Set the mode for update / insert |
135
|
|
|
* |
136
|
|
|
* @param string $mode |
137
|
|
|
* |
138
|
|
|
* @return DB |
139
|
|
|
*/ |
140
|
16 |
|
public function setMode(string $mode): DB |
141
|
|
|
{ |
142
|
16 |
|
if (!in_array($mode, ['queries', 'batch'])) { |
143
|
1 |
|
throw new NeuralizerException('Mode could be only queries or batch'); |
144
|
|
|
} |
145
|
|
|
|
146
|
15 |
|
if ($mode === 'batch') { |
147
|
9 |
|
$this->csv = new CSVWriter(); |
148
|
9 |
|
$this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV()); |
149
|
|
|
} |
150
|
|
|
|
151
|
15 |
|
$this->mode = $mode; |
152
|
|
|
|
153
|
15 |
|
return $this; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
|
157
|
|
|
/** |
158
|
|
|
* Process an entity by reading / writing to the DB |
159
|
|
|
* |
160
|
|
|
* @param string $entity |
161
|
|
|
* @param callable|null $callback |
162
|
|
|
* |
163
|
|
|
* @return void|array |
164
|
|
|
*/ |
165
|
28 |
|
public function processEntity(string $entity, callable $callback = null): array |
166
|
|
|
{ |
167
|
28 |
|
$this->dbUtils->assertTableExists($entity); |
168
|
|
|
|
169
|
27 |
|
$this->priKey = $this->dbUtils->getPrimaryKey($entity); |
170
|
26 |
|
$this->entityCols = $this->dbUtils->getTableCols($entity); |
171
|
26 |
|
$this->entity = $entity; |
172
|
|
|
|
173
|
26 |
|
$actionsOnThatEntity = $this->whatToDoWithEntity(); |
174
|
24 |
|
$this->queries = []; |
175
|
|
|
|
176
|
|
|
// Wrap everything in a transaction |
177
|
|
|
try { |
178
|
24 |
|
$this->conn->beginTransaction(); |
179
|
|
|
|
180
|
24 |
|
if ($actionsOnThatEntity & self::TRUNCATE_TABLE) { |
181
|
5 |
|
$where = $this->getWhereConditionInConfig(); |
182
|
5 |
|
$query = $this->runDelete($where); |
183
|
4 |
|
($this->returnRes === true ? array_push($this->queries, $query) : ''); |
184
|
|
|
} |
185
|
|
|
|
186
|
23 |
|
if ($actionsOnThatEntity & self::UPDATE_TABLE) { |
187
|
17 |
|
$this->updateData($callback); |
188
|
|
|
} |
189
|
|
|
|
190
|
21 |
|
if ($actionsOnThatEntity & self::INSERT_TABLE) { |
191
|
6 |
|
$this->insertData($callback); |
192
|
|
|
} |
193
|
|
|
|
194
|
20 |
|
$this->conn->commit(); |
195
|
4 |
|
} catch (\Exception $e) { |
196
|
4 |
|
$this->conn->rollback(); |
197
|
4 |
|
$this->conn->close(); // To avoid locks |
198
|
|
|
|
199
|
4 |
|
throw $e; |
200
|
|
|
} |
201
|
|
|
|
202
|
20 |
|
return $this->queries; |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
|
206
|
|
|
/** |
207
|
|
|
* Execute the Delete with Doctrine Query Builder |
208
|
|
|
* |
209
|
|
|
* @param string $where |
210
|
|
|
* |
211
|
|
|
* @return string |
212
|
|
|
*/ |
213
|
5 |
|
private function runDelete(string $where): string |
214
|
|
|
{ |
215
|
5 |
|
$queryBuilder = $this->conn->createQueryBuilder(); |
216
|
5 |
|
$queryBuilder = $queryBuilder->delete($this->entity); |
217
|
5 |
|
if (!empty($where)) { |
218
|
3 |
|
$queryBuilder = $queryBuilder->where($where); |
219
|
|
|
} |
220
|
5 |
|
$sql = $queryBuilder->getSQL(); |
221
|
|
|
|
222
|
5 |
|
if ($this->pretend === true) { |
223
|
1 |
|
return $sql; |
224
|
|
|
} |
225
|
|
|
|
226
|
4 |
|
$queryBuilder->execute(); |
227
|
|
|
|
228
|
3 |
|
return $sql; |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
|
232
|
|
|
/** |
233
|
|
|
* Update data of db table. |
234
|
|
|
* |
235
|
|
|
* @param callable $callback |
236
|
|
|
* |
237
|
|
|
* @throws NeuralizerException |
238
|
|
|
*/ |
239
|
17 |
|
private function updateData($callback = null): void |
240
|
|
|
{ |
241
|
17 |
|
$queryBuilder = $this->conn->createQueryBuilder(); |
242
|
17 |
|
if ($this->limit === 0) { |
243
|
9 |
|
$this->setLimit($this->dbUtils->countResults($this->entity)); |
244
|
|
|
} |
245
|
|
|
|
246
|
17 |
|
foreach ($this->configuration->getPreQueries() as $preQuery) { |
247
|
|
|
try { |
248
|
|
|
$this->conn->query($preQuery); |
249
|
|
|
} catch (\Exception $e) { |
250
|
|
|
throw new NeuralizerException($e->getMessage()); |
251
|
|
|
} |
252
|
|
|
} |
253
|
|
|
|
254
|
17 |
|
$startAt = 0; // The first part of the limit (offset) |
255
|
17 |
|
$num = 0; // The number of rows updated |
256
|
17 |
|
while ($num < $this->limit) { |
257
|
|
|
$rows = $queryBuilder |
258
|
16 |
|
->select('*')->from($this->entity) |
259
|
16 |
|
->setFirstResult($startAt)->setMaxResults($this->batchSize) |
260
|
16 |
|
->orderBy($this->priKey) |
261
|
16 |
|
->execute(); |
262
|
|
|
|
263
|
|
|
// I need to read line by line if I have to update the table |
264
|
|
|
// to make sure I do update by update (slower but no other choice for now) |
265
|
16 |
|
foreach ($rows as $row) { |
266
|
|
|
// Call the right method according to the mode |
267
|
16 |
|
$this->{$this->updateMode[$this->mode]}($row); |
268
|
|
|
|
269
|
14 |
|
if (!is_null($callback)) { |
270
|
9 |
|
$callback(++$num); |
271
|
|
|
} |
272
|
|
|
// Have to exit now as we have reached the max |
273
|
14 |
|
if ($num >= $this->limit) { |
274
|
14 |
|
break 2; |
275
|
|
|
} |
276
|
|
|
} |
277
|
|
|
// Move the offset |
278
|
|
|
// Make sure the loop ends if we have nothing to process |
279
|
5 |
|
$num = $startAt += $this->batchSize; |
280
|
|
|
} |
281
|
|
|
// Run a final method if defined |
282
|
15 |
|
if ($this->mode === 'batch') { |
283
|
4 |
|
$this->loadDataInBatch('update'); |
284
|
|
|
} |
285
|
|
|
|
286
|
15 |
|
foreach ($this->configuration->getPostQueries() as $postQuery) { |
287
|
|
|
try { |
288
|
|
|
$this->conn->query($postQuery); |
289
|
|
|
} catch (\Exception $e) { |
290
|
|
|
throw new NeuralizerException($e->getMessage()); |
291
|
|
|
} |
292
|
|
|
} |
293
|
15 |
|
} |
294
|
|
|
|
295
|
|
|
|
296
|
|
|
/** |
297
|
|
|
* Execute the Update with Doctrine QueryBuilder |
298
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
299
|
|
|
* |
300
|
|
|
* @param array $row Full row |
301
|
|
|
*/ |
302
|
12 |
|
private function doUpdateByQueries(array $row): void |
303
|
|
|
{ |
304
|
12 |
|
$data = $this->generateFakeData(); |
305
|
|
|
|
306
|
10 |
|
$queryBuilder = $this->conn->createQueryBuilder(); |
307
|
10 |
|
$queryBuilder = $queryBuilder->update($this->entity); |
308
|
10 |
|
foreach ($data as $field => $value) { |
309
|
10 |
|
$value = empty($row[$field]) ? |
310
|
8 |
|
$this->dbUtils->getEmptyValue($this->entityCols[$field]['type']) : |
311
|
10 |
|
$value; |
312
|
|
|
|
313
|
10 |
|
$condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]); |
314
|
10 |
|
$queryBuilder = $queryBuilder->set($field, $condition); |
315
|
10 |
|
$queryBuilder = $queryBuilder->setParameter(":$field", $value); |
316
|
|
|
} |
317
|
10 |
|
$queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}"); |
318
|
10 |
|
$queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]); |
319
|
|
|
|
320
|
10 |
|
$this->returnRes === true ? |
321
|
7 |
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
322
|
3 |
|
''; |
323
|
|
|
|
324
|
10 |
|
if ($this->pretend === false) { |
325
|
7 |
|
$queryBuilder->execute(); |
326
|
|
|
} |
327
|
10 |
|
} |
328
|
|
|
|
329
|
|
|
|
330
|
|
|
/** |
331
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
332
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
333
|
|
|
* |
334
|
|
|
* @param array $row Full row |
335
|
|
|
*/ |
336
|
4 |
|
private function doBatchUpdate(array $row): void |
337
|
|
|
{ |
338
|
4 |
|
$fakeData = $this->generateFakeData(); |
339
|
4 |
|
$data = []; |
340
|
|
|
// Go trough all fields, and take a value by priority |
341
|
4 |
|
foreach (array_keys($this->entityCols) as $field) { |
342
|
|
|
// First take the fake data |
343
|
4 |
|
$data[$field] = $row[$field]; |
344
|
4 |
|
if (!empty($row[$field]) && array_key_exists($field, $fakeData)) { |
345
|
4 |
|
$data[$field] = $fakeData[$field]; |
346
|
|
|
} |
347
|
|
|
} |
348
|
|
|
|
349
|
4 |
|
$this->csv->write($data); |
350
|
4 |
|
} |
351
|
|
|
|
352
|
|
|
|
353
|
|
|
/** |
354
|
|
|
* Insert data into table |
355
|
|
|
* |
356
|
|
|
* @param callable $callback |
357
|
|
|
*/ |
358
|
6 |
|
private function insertData($callback = null): void |
359
|
|
|
{ |
360
|
6 |
|
for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) { |
361
|
|
|
// Call the right method according to the mode |
362
|
6 |
|
$this->{$this->insertMode[$this->mode]}($rowNum); |
363
|
|
|
|
364
|
6 |
|
if (!is_null($callback)) { |
365
|
4 |
|
$callback($rowNum); |
366
|
|
|
} |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
// Run a final method if defined |
370
|
6 |
|
if ($this->mode === 'batch') { |
371
|
2 |
|
$this->loadDataInBatch('insert'); |
372
|
|
|
} |
373
|
5 |
|
} |
374
|
|
|
|
375
|
|
|
|
376
|
|
|
/** |
377
|
|
|
* Execute an INSERT with Doctrine QueryBuilder |
378
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
379
|
|
|
*/ |
380
|
4 |
|
private function doInsertByQueries(): void |
381
|
|
|
{ |
382
|
4 |
|
$data = $this->generateFakeData(); |
383
|
|
|
|
384
|
4 |
|
$queryBuilder = $this->conn->createQueryBuilder(); |
385
|
4 |
|
$queryBuilder = $queryBuilder->insert($this->entity); |
386
|
4 |
|
foreach ($data as $field => $value) { |
387
|
4 |
|
$queryBuilder = $queryBuilder->setValue($field, ":$field"); |
388
|
4 |
|
$queryBuilder = $queryBuilder->setParameter(":$field", $value); |
389
|
|
|
} |
390
|
|
|
|
391
|
4 |
|
$this->returnRes === true ? |
392
|
2 |
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
393
|
2 |
|
''; |
394
|
|
|
|
395
|
4 |
|
if ($this->pretend === false) { |
396
|
3 |
|
$queryBuilder->execute(); |
397
|
|
|
} |
398
|
4 |
|
} |
399
|
|
|
|
400
|
|
|
|
401
|
|
|
/** |
402
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
403
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
404
|
|
|
*/ |
405
|
2 |
|
private function doBatchInsert(): void |
406
|
|
|
{ |
407
|
2 |
|
$data = $this->generateFakeData(); |
408
|
2 |
|
$this->csv->write($data); |
409
|
2 |
|
} |
410
|
|
|
|
411
|
|
|
|
412
|
|
|
/** |
413
|
|
|
* If a file has been created for the batch mode, destroy it |
414
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
415
|
|
|
* |
416
|
|
|
* @param string $mode "update" or "insert" |
417
|
|
|
*/ |
418
|
6 |
|
private function loadDataInBatch(string $mode): void |
419
|
|
|
{ |
420
|
6 |
|
$fields = array_keys($this->configEntites[$this->entity]['cols']); |
421
|
|
|
// Replace by all fields if update as we have to load everything |
422
|
6 |
|
if ($mode === 'update') { |
423
|
4 |
|
$fields = array_keys($this->entityCols); |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
// Load the data from the helper, only if pretend is false |
427
|
6 |
|
$filename = $this->csv->getRealPath(); |
428
|
6 |
|
$this->dbHelper->setPretend($this->pretend); |
429
|
6 |
|
$sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode); |
430
|
|
|
|
431
|
5 |
|
$this->returnRes === true ? array_push($this->queries, $sql) : ''; |
432
|
|
|
|
433
|
|
|
// Destroy the file |
434
|
5 |
|
unlink($this->csv->getRealPath()); |
435
|
5 |
|
} |
436
|
|
|
|
437
|
|
|
/** |
438
|
|
|
* Generate fake data for an entity and return it as an Array |
439
|
|
|
* |
440
|
|
|
* @return array |
441
|
|
|
* @throws NeuralizerConfigurationException |
442
|
|
|
*/ |
443
|
22 |
|
protected function generateFakeData(): array |
444
|
|
|
{ |
445
|
22 |
|
$this->checkEntityIsInConfig(); |
446
|
|
|
|
447
|
22 |
|
$colsInConfig = $this->configEntites[$this->entity]['cols']; |
448
|
22 |
|
$row = []; |
449
|
22 |
|
foreach ($colsInConfig as $colName => $colProps) { |
450
|
22 |
|
$this->checkColIsInEntity($colName); |
451
|
|
|
|
452
|
|
|
// Check if faker is already used for this column name, so we can use the same faker. |
453
|
21 |
|
if (!isset($this->fakers[$colName][$colProps['method']])) { |
454
|
21 |
|
$language = $this->configuration->getConfigValues()['language']; |
455
|
21 |
|
$faker = \Faker\Factory::create($language); |
456
|
21 |
|
$faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker)); |
457
|
|
|
|
458
|
|
|
// Check if column should be unique. |
459
|
21 |
|
if ($colProps['method'] === 'uniqueWord') { |
460
|
|
|
// Count number of unique words to be taken from the dictionary. |
461
|
|
|
$count = $this->dbUtils->countResults($this->entity); |
462
|
|
|
$faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $count, $language)); |
463
|
|
|
$faker = $faker->unique(true); |
464
|
|
|
} |
465
|
21 |
|
$this->fakers[$colName][$colProps['method']] = $faker; |
466
|
|
|
} |
467
|
|
|
|
468
|
21 |
|
$faker = $this->fakers[$colName][$colProps['method']]; |
469
|
|
|
|
470
|
|
|
$data = \call_user_func_array( |
471
|
21 |
|
[$faker, $colProps['method']], |
472
|
21 |
|
$colProps['params'] |
473
|
|
|
); |
474
|
|
|
|
475
|
21 |
|
if (!is_scalar($data)) { |
476
|
1 |
|
$msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden"; |
477
|
1 |
|
throw new NeuralizerConfigurationException($msg); |
478
|
|
|
} |
479
|
|
|
|
480
|
21 |
|
$row[$colName] = trim($data); |
481
|
|
|
|
482
|
21 |
|
$colLength = $this->entityCols[$colName]['length']; |
483
|
|
|
// Cut the value if too long ... |
484
|
21 |
|
if (!empty($colLength) && \strlen($row[$colName]) > $colLength) { |
485
|
21 |
|
$row[$colName] = substr($row[$colName], 0, $colLength - 1); |
486
|
|
|
} |
487
|
|
|
} |
488
|
|
|
|
489
|
20 |
|
return $row; |
490
|
|
|
} |
491
|
|
|
|
492
|
|
|
} |
493
|
|
|
|