1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* neuralyzer : Data Anonymization Library and CLI Tool |
4
|
|
|
* |
5
|
|
|
* PHP Version 7.1 |
6
|
|
|
* |
7
|
|
|
* @author Emmanuel Dyan |
8
|
|
|
* @author Rémi Sauvat |
9
|
|
|
* @copyright 2018 Emmanuel Dyan |
10
|
|
|
* |
11
|
|
|
* @package edyan/neuralyzer |
12
|
|
|
* |
13
|
|
|
* @license GNU General Public License v2.0 |
14
|
|
|
* |
15
|
|
|
* @link https://github.com/edyan/neuralyzer |
16
|
|
|
*/ |
17
|
|
|
|
18
|
|
|
namespace Edyan\Neuralyzer\Anonymizer; |
19
|
|
|
|
20
|
|
|
use Doctrine\DBAL\Configuration as DbalConfiguration; |
21
|
|
|
use Doctrine\DBAL\Connection; |
22
|
|
|
use Doctrine\DBAL\DriverManager as DbalDriverManager; |
23
|
|
|
use Edyan\Neuralyzer\Exception\NeuralizerConfigurationException; |
24
|
|
|
use Edyan\Neuralyzer\Exception\NeuralizerException; |
25
|
|
|
use Edyan\Neuralyzer\Helper\DB as DBHelper; |
26
|
|
|
use Edyan\Neuralyzer\Utils\CSVWriter; |
27
|
|
|
use Edyan\Neuralyzer\Utils\DBUtils; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL |
31
|
|
|
*/ |
32
|
|
|
class DB extends AbstractAnonymizer |
33
|
|
|
{ |
34
|
|
|
/** |
35
|
|
|
* Doctrine DB Adapter |
36
|
|
|
* |
37
|
|
|
* @var Connection |
38
|
|
|
*/ |
39
|
|
|
private $conn; |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* A helper for the current driver |
43
|
|
|
* |
44
|
|
|
* @var DBHelper\AbstractDBHelper |
45
|
|
|
*/ |
46
|
|
|
private $dbHelper; |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Various generic utils |
50
|
|
|
* |
51
|
|
|
* @var DBUtils |
52
|
|
|
*/ |
53
|
|
|
private $dbUtils; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* Primary Key |
57
|
|
|
* |
58
|
|
|
* @var string |
59
|
|
|
*/ |
60
|
|
|
private $priKey; |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Define the way we update / insert data |
64
|
|
|
* |
65
|
|
|
* @var string |
66
|
|
|
*/ |
67
|
|
|
private $mode = 'queries'; |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* Contains queries if returnRes is true |
71
|
|
|
* |
72
|
|
|
* @var array |
73
|
|
|
*/ |
74
|
|
|
private $queries = []; |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* File resource for the csv (batch mode) |
78
|
|
|
* |
79
|
|
|
* @var CSVWriter |
80
|
|
|
*/ |
81
|
|
|
private $csv; |
82
|
|
|
|
83
|
|
|
/** |
84
|
|
|
* Define available update modes |
85
|
|
|
* |
86
|
|
|
* @var array |
87
|
|
|
*/ |
88
|
|
|
private $updateMode = [ |
89
|
|
|
'queries' => 'doUpdateByQueries', |
90
|
|
|
'batch' => 'doBatchUpdate', |
91
|
|
|
]; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* Define available insert modes |
95
|
|
|
* |
96
|
|
|
* @var array |
97
|
|
|
*/ |
98
|
|
|
private $insertMode = [ |
99
|
|
|
'queries' => 'doInsertByQueries', |
100
|
|
|
'batch' => 'doBatchInsert', |
101
|
|
|
]; |
102
|
|
|
|
103
|
|
|
/** |
104
|
|
|
* @param array $params Parameters to send to Doctrine DB |
105
|
|
|
* |
106
|
|
|
* @throws \Doctrine\DBAL\DBALException |
107
|
|
|
*/ |
108
|
|
|
public function initDatabaseConnection(array $params) |
109
|
|
|
{ |
110
|
|
|
$dbHelperClass = DBHelper\DriverGuesser::getDBHelper($params['driver']); |
111
|
|
|
|
112
|
|
|
// Set specific options |
113
|
|
|
$params['driverOptions'] = $dbHelperClass::getDriverOptions(); |
114
|
|
|
$this->conn = DbalDriverManager::getConnection($params, new DbalConfiguration()); |
115
|
|
|
$this->conn->setFetchMode(\Doctrine\DBAL\FetchMode::ASSOCIATIVE); |
116
|
|
|
|
117
|
|
|
$this->dbUtils = new DBUtils($this->conn); |
118
|
|
|
$this->dbHelper = new $dbHelperClass($this->conn); |
119
|
|
|
} |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* Get Doctrine Connection |
123
|
|
|
* |
124
|
|
|
* @return Connection |
125
|
|
|
*/ |
126
|
|
|
public function getConn(): Connection |
127
|
|
|
{ |
128
|
|
|
return $this->conn; |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
|
132
|
|
|
/** |
133
|
|
|
* Set the mode for update / insert |
134
|
|
|
* |
135
|
|
|
* @param string $mode |
136
|
|
|
* |
137
|
|
|
* @return DB |
138
|
|
|
*/ |
139
|
|
|
public function setMode(string $mode): DB |
140
|
|
|
{ |
141
|
|
|
if (!in_array($mode, ['queries', 'batch'])) { |
142
|
|
|
throw new NeuralizerException('Mode could be only queries or batch'); |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
if ($mode === 'batch') { |
146
|
|
|
$this->csv = new CSVWriter(); |
147
|
|
|
$this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV()); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
$this->mode = $mode; |
151
|
|
|
|
152
|
|
|
return $this; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
|
156
|
|
|
/** |
157
|
|
|
* Process an entity by reading / writing to the DB |
158
|
|
|
* |
159
|
|
|
* @param string $entity |
160
|
|
|
* @param callable|null $callback |
161
|
|
|
* |
162
|
|
|
* @return void|array |
163
|
|
|
*/ |
164
|
1 |
|
public function processEntity(string $entity, callable $callback = null): array |
165
|
|
|
{ |
166
|
1 |
|
$this->dbUtils->assertTableExists($entity); |
167
|
|
|
|
168
|
|
|
$this->priKey = $this->dbUtils->getPrimaryKey($entity); |
169
|
|
|
$this->entityCols = $this->dbUtils->getTableCols($entity); |
170
|
|
|
$this->entity = $entity; |
171
|
|
|
|
172
|
|
|
$actionsOnThatEntity = $this->whatToDoWithEntity(); |
173
|
|
|
$this->queries = []; |
174
|
|
|
|
175
|
|
|
// Wrap everything in a transaction |
176
|
|
|
try { |
177
|
|
|
$this->conn->beginTransaction(); |
178
|
|
|
|
179
|
|
|
if ($actionsOnThatEntity & self::TRUNCATE_TABLE) { |
180
|
|
|
$where = $this->getWhereConditionInConfig(); |
181
|
|
|
$query = $this->runDelete($where); |
182
|
|
|
($this->returnRes === true ? array_push($this->queries, $query) : ''); |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
if ($actionsOnThatEntity & self::UPDATE_TABLE) { |
186
|
|
|
$this->updateData($callback); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
if ($actionsOnThatEntity & self::INSERT_TABLE) { |
190
|
|
|
$this->insertData($callback); |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
$this->conn->commit(); |
194
|
|
|
} catch (\Exception $e) { |
195
|
|
|
$this->conn->rollback(); |
196
|
|
|
$this->conn->close(); // To avoid locks |
197
|
|
|
|
198
|
|
|
throw $e; |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
return $this->queries; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* Execute the Delete with Doctrine Query Builder |
207
|
|
|
* |
208
|
|
|
* @param string $where |
209
|
|
|
* |
210
|
|
|
* @return string |
211
|
|
|
*/ |
212
|
|
|
private function runDelete(string $where): string |
213
|
|
|
{ |
214
|
|
|
$queryBuilder = $this->conn->createQueryBuilder(); |
215
|
|
|
$queryBuilder = $queryBuilder->delete($this->entity); |
216
|
|
|
if (!empty($where)) { |
217
|
|
|
$queryBuilder = $queryBuilder->where($where); |
218
|
|
|
} |
219
|
|
|
$sql = $queryBuilder->getSQL(); |
220
|
|
|
|
221
|
|
|
if ($this->pretend === true) { |
222
|
|
|
return $sql; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
$queryBuilder->execute(); |
226
|
|
|
|
227
|
|
|
return $sql; |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* Update data of db table. |
233
|
|
|
* |
234
|
|
|
* @param callable $callback |
235
|
|
|
* |
236
|
|
|
* @throws NeuralizerException |
237
|
|
|
*/ |
238
|
|
|
private function updateData($callback = null): void |
239
|
|
|
{ |
240
|
|
|
$queryBuilder = $this->conn->createQueryBuilder(); |
241
|
|
|
if ($this->limit === 0) { |
242
|
|
|
$this->setLimit($this->dbUtils->countResults($this->entity)); |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
$this->evaluateExpressionUtils($this->configuration->getPreActions()); |
246
|
|
|
|
247
|
|
|
$startAt = 0; // The first part of the limit (offset) |
248
|
|
|
$num = 0; // The number of rows updated |
249
|
|
|
while ($num < $this->limit) { |
250
|
|
|
$rows = $queryBuilder |
251
|
|
|
->select('*')->from($this->entity) |
252
|
|
|
->setFirstResult($startAt)->setMaxResults($this->batchSize) |
253
|
|
|
->orderBy($this->priKey) |
254
|
|
|
->execute(); |
255
|
|
|
|
256
|
|
|
// I need to read line by line if I have to update the table |
257
|
|
|
// to make sure I do update by update (slower but no other choice for now) |
258
|
|
|
foreach ($rows as $row) { |
259
|
|
|
// Call the right method according to the mode |
260
|
|
|
$this->{$this->updateMode[$this->mode]}($row); |
261
|
|
|
|
262
|
|
|
if (!is_null($callback)) { |
263
|
|
|
$callback(++$num); |
264
|
|
|
} |
265
|
|
|
// Have to exit now as we have reached the max |
266
|
|
|
if ($num >= $this->limit) { |
267
|
|
|
break 2; |
268
|
|
|
} |
269
|
|
|
} |
270
|
|
|
// Move the offset |
271
|
|
|
// Make sure the loop ends if we have nothing to process |
272
|
|
|
$num = $startAt += $this->batchSize; |
273
|
|
|
} |
274
|
|
|
// Run a final method if defined |
275
|
|
|
if ($this->mode === 'batch') { |
276
|
|
|
$this->loadDataInBatch('update'); |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
$this->evaluateExpressionUtils($this->configuration->getPostActions()); |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Execute the Update with Doctrine QueryBuilder |
285
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
286
|
|
|
* |
287
|
|
|
* @param array $row Full row |
288
|
|
|
*/ |
289
|
|
|
private function doUpdateByQueries(array $row): void |
290
|
|
|
{ |
291
|
|
|
$data = $this->generateFakeData(); |
292
|
|
|
|
293
|
|
|
$queryBuilder = $this->conn->createQueryBuilder(); |
294
|
|
|
$queryBuilder = $queryBuilder->update($this->entity); |
295
|
|
|
foreach ($data as $field => $value) { |
296
|
|
|
$value = empty($row[$field]) ? |
297
|
|
|
$this->dbUtils->getEmptyValue($this->entityCols[$field]['type']) : |
298
|
|
|
$value; |
299
|
|
|
|
300
|
|
|
$condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]); |
301
|
|
|
$queryBuilder = $queryBuilder->set($field, $condition); |
302
|
|
|
$queryBuilder = $queryBuilder->setParameter(":$field", $value); |
303
|
|
|
} |
304
|
|
|
$queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}"); |
305
|
|
|
$queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]); |
306
|
|
|
|
307
|
|
|
$this->returnRes === true ? |
308
|
|
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
309
|
|
|
''; |
310
|
|
|
|
311
|
|
|
if ($this->pretend === false) { |
312
|
|
|
$queryBuilder->execute(); |
313
|
|
|
} |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
|
317
|
|
|
/** |
318
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
319
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
320
|
|
|
* |
321
|
|
|
* @param array $row Full row |
322
|
|
|
*/ |
323
|
|
|
private function doBatchUpdate(array $row): void |
324
|
|
|
{ |
325
|
|
|
$fakeData = $this->generateFakeData(); |
326
|
|
|
$data = []; |
327
|
|
|
// Go trough all fields, and take a value by priority |
328
|
|
|
foreach (array_keys($this->entityCols) as $field) { |
329
|
|
|
// First take the fake data |
330
|
|
|
$data[$field] = $row[$field]; |
331
|
|
|
if (!empty($row[$field]) && array_key_exists($field, $fakeData)) { |
332
|
|
|
$data[$field] = $fakeData[$field]; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
$this->csv->write($data); |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
|
340
|
|
|
/** |
341
|
|
|
* Insert data into table |
342
|
|
|
* |
343
|
|
|
* @param callable $callback |
344
|
|
|
*/ |
345
|
|
|
private function insertData($callback = null): void |
346
|
|
|
{ |
347
|
|
|
for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) { |
348
|
|
|
// Call the right method according to the mode |
349
|
|
|
$this->{$this->insertMode[$this->mode]}($rowNum); |
350
|
|
|
|
351
|
|
|
if (!is_null($callback)) { |
352
|
|
|
$callback($rowNum); |
353
|
|
|
} |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
// Run a final method if defined |
357
|
|
|
if ($this->mode === 'batch') { |
358
|
|
|
$this->loadDataInBatch('insert'); |
359
|
|
|
} |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* Execute an INSERT with Doctrine QueryBuilder |
365
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
366
|
|
|
*/ |
367
|
|
|
private function doInsertByQueries(): void |
368
|
|
|
{ |
369
|
|
|
$data = $this->generateFakeData(); |
370
|
|
|
|
371
|
|
|
$queryBuilder = $this->conn->createQueryBuilder(); |
372
|
|
|
$queryBuilder = $queryBuilder->insert($this->entity); |
373
|
|
|
foreach ($data as $field => $value) { |
374
|
|
|
$queryBuilder = $queryBuilder->setValue($field, ":$field"); |
375
|
|
|
$queryBuilder = $queryBuilder->setParameter(":$field", $value); |
376
|
|
|
} |
377
|
|
|
|
378
|
|
|
$this->returnRes === true ? |
379
|
|
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
380
|
|
|
''; |
381
|
|
|
|
382
|
|
|
if ($this->pretend === false) { |
383
|
|
|
$queryBuilder->execute(); |
384
|
|
|
} |
385
|
|
|
} |
386
|
|
|
|
387
|
|
|
|
388
|
|
|
/** |
389
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
390
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
391
|
|
|
*/ |
392
|
|
|
private function doBatchInsert(): void |
393
|
|
|
{ |
394
|
|
|
$data = $this->generateFakeData(); |
395
|
|
|
$this->csv->write($data); |
396
|
|
|
} |
397
|
|
|
|
398
|
|
|
|
399
|
|
|
/** |
400
|
|
|
* If a file has been created for the batch mode, destroy it |
401
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
402
|
|
|
* |
403
|
|
|
* @param string $mode "update" or "insert" |
404
|
|
|
*/ |
405
|
|
|
private function loadDataInBatch(string $mode): void |
406
|
|
|
{ |
407
|
|
|
$fields = array_keys($this->configEntites[$this->entity]['cols']); |
408
|
|
|
// Replace by all fields if update as we have to load everything |
409
|
|
|
if ($mode === 'update') { |
410
|
|
|
$fields = array_keys($this->entityCols); |
411
|
|
|
} |
412
|
|
|
|
413
|
|
|
// Load the data from the helper, only if pretend is false |
414
|
|
|
$filename = $this->csv->getRealPath(); |
415
|
|
|
$this->dbHelper->setPretend($this->pretend); |
416
|
|
|
$sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode); |
417
|
|
|
|
418
|
|
|
$this->returnRes === true ? array_push($this->queries, $sql) : ''; |
419
|
|
|
|
420
|
|
|
// Destroy the file |
421
|
|
|
unlink($this->csv->getRealPath()); |
422
|
|
|
} |
423
|
|
|
|
424
|
|
|
/** |
425
|
|
|
* Generate fake data for an entity and return it as an Array |
426
|
|
|
* |
427
|
|
|
* @return array |
428
|
|
|
* @throws NeuralizerConfigurationException |
429
|
|
|
*/ |
430
|
|
|
protected function generateFakeData(): array |
431
|
|
|
{ |
432
|
|
|
$this->checkEntityIsInConfig(); |
433
|
|
|
|
434
|
|
|
$colsInConfig = $this->configEntites[$this->entity]['cols']; |
435
|
|
|
$row = []; |
436
|
|
|
foreach ($colsInConfig as $colName => $colProps) { |
437
|
|
|
$this->checkColIsInEntity($colName); |
438
|
|
|
|
439
|
|
|
// Check if faker is already used for this column name, so we can use the same faker. |
440
|
|
|
if (!isset($this->fakers[$colName][$colProps['method']])) { |
441
|
|
|
$language = $this->configuration->getConfigValues()['language']; |
442
|
|
|
$faker = \Faker\Factory::create($language); |
443
|
|
|
$faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker)); |
444
|
|
|
|
445
|
|
|
// Check if column should be unique. |
446
|
|
|
if ($colProps['method'] === 'uniqueWord') { |
447
|
|
|
// Count number of unique words to be taken from the dictionary. |
448
|
|
|
$count = $this->dbUtils->countResults($this->entity); |
449
|
|
|
$faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $count, $language)); |
450
|
|
|
$faker = $faker->unique(true); |
451
|
|
|
} |
452
|
|
|
$this->fakers[$colName][$colProps['method']] = $faker; |
453
|
|
|
} |
454
|
|
|
|
455
|
|
|
$faker = $this->fakers[$colName][$colProps['method']]; |
456
|
|
|
|
457
|
|
|
$data = \call_user_func_array( |
458
|
|
|
[$faker, $colProps['method']], |
459
|
|
|
$colProps['params'] |
460
|
|
|
); |
461
|
|
|
|
462
|
|
|
if (!is_scalar($data)) { |
463
|
|
|
$msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden"; |
464
|
|
|
throw new NeuralizerConfigurationException($msg); |
465
|
|
|
} |
466
|
|
|
|
467
|
|
|
$row[$colName] = trim($data); |
468
|
|
|
|
469
|
|
|
$colLength = $this->entityCols[$colName]['length']; |
470
|
|
|
// Cut the value if too long ... |
471
|
|
|
if (!empty($colLength) && \strlen($row[$colName]) > $colLength) { |
472
|
|
|
$row[$colName] = substr($row[$colName], 0, $colLength - 1); |
473
|
|
|
} |
474
|
|
|
} |
475
|
|
|
|
476
|
|
|
return $row; |
477
|
|
|
} |
478
|
|
|
} |
479
|
|
|
|