1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
/** |
6
|
|
|
* neuralyzer : Data Anonymization Library and CLI Tool |
7
|
|
|
* |
8
|
|
|
* PHP Version 7.2 |
9
|
|
|
* |
10
|
|
|
* @author Emmanuel Dyan |
11
|
|
|
* @author Rémi Sauvat |
12
|
|
|
* |
13
|
|
|
* @copyright 2020 Emmanuel Dyan |
14
|
|
|
* |
15
|
|
|
* @package edyan/neuralyzer |
16
|
|
|
* |
17
|
|
|
* @license GNU General Public License v2.0 |
18
|
|
|
* |
19
|
|
|
* @link https://github.com/edyan/neuralyzer |
20
|
|
|
*/ |
21
|
|
|
|
22
|
|
|
namespace Edyan\Neuralyzer\Anonymizer; |
23
|
|
|
|
24
|
|
|
use Edyan\Neuralyzer\Exception\NeuralyzerConfigurationException; |
25
|
|
|
use Edyan\Neuralyzer\Exception\NeuralyzerException; |
26
|
|
|
use Edyan\Neuralyzer\Helper\DB\AbstractDBHelper; |
27
|
|
|
use Edyan\Neuralyzer\Utils\CSVWriter; |
28
|
|
|
use Edyan\Neuralyzer\Utils\DBUtils; |
29
|
|
|
use Edyan\Neuralyzer\Utils\Expression; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Implement AbstractAnonymizer for DB, to read and write data via Doctrine DBAL |
33
|
|
|
*/ |
34
|
|
|
class DB extends AbstractAnonymizer |
35
|
|
|
{ |
36
|
|
|
/** |
37
|
|
|
* Various generic utils |
38
|
|
|
* |
39
|
|
|
* @var Expression |
40
|
|
|
*/ |
41
|
|
|
private $expression; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Various generic utils |
45
|
|
|
* |
46
|
|
|
* @var DBUtils |
47
|
|
|
*/ |
48
|
|
|
private $dbUtils; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* Various generic utils |
52
|
|
|
* |
53
|
|
|
* @var AbstractDBHelper |
54
|
|
|
*/ |
55
|
|
|
private $dbHelper; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Primary Key |
59
|
|
|
* |
60
|
|
|
* @var string |
61
|
|
|
*/ |
62
|
|
|
private $priKey; |
63
|
|
|
|
64
|
|
|
/** |
65
|
|
|
* Define the way we update / insert data |
66
|
|
|
* |
67
|
|
|
* @var string |
68
|
|
|
*/ |
69
|
|
|
private $mode = 'queries'; |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Contains queries if returnRes is true |
73
|
|
|
* |
74
|
|
|
* @var array |
75
|
|
|
*/ |
76
|
|
|
private $queries = []; |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* File resource for the csv (batch mode) |
80
|
|
|
* |
81
|
|
|
* @var CSVWriter |
82
|
|
|
*/ |
83
|
|
|
private $csv; |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* Define available update modes |
87
|
|
|
* |
88
|
|
|
* @var array |
89
|
|
|
*/ |
90
|
|
|
private $updateMode = [ |
91
|
|
|
'queries' => 'doUpdateByQueries', |
92
|
|
|
'batch' => 'doBatchUpdate', |
93
|
|
|
]; |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Define available insert modes |
97
|
|
|
* |
98
|
|
|
* @var array |
99
|
|
|
*/ |
100
|
|
|
private $insertMode = [ |
101
|
|
|
'queries' => 'doInsertByQueries', |
102
|
|
|
'batch' => 'doBatchInsert', |
103
|
|
|
]; |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* DB constructor. |
107
|
33 |
|
*/ |
108
|
|
|
public function __construct(Expression $expression, DBUtils $dbUtils) |
109
|
33 |
|
{ |
110
|
33 |
|
$this->expression = $expression; |
111
|
33 |
|
$this->dbUtils = $dbUtils; |
112
|
33 |
|
$this->dbHelper = $this->dbUtils->getDBHelper(); |
113
|
33 |
|
$this->dbHelper->registerCustomTypes(); |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Returns the dependency |
118
|
|
|
*/ |
119
|
1 |
|
public function getDbUtils(): DBUtils |
120
|
|
|
{ |
121
|
1 |
|
return $this->dbUtils; |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
/** |
125
|
|
|
* Set the mode for update / insert |
126
|
|
|
* |
127
|
|
|
* @throws NeuralyzerException |
128
|
|
|
*/ |
129
|
|
|
public function setMode(string $mode): DB |
130
|
|
|
{ |
131
|
17 |
|
if (! in_array($mode, ['queries', 'batch'])) { |
132
|
|
|
throw new NeuralyzerException('Mode could be only queries or batch'); |
133
|
17 |
|
} |
134
|
1 |
|
|
135
|
|
|
$this->mode = $mode; |
136
|
|
|
|
137
|
16 |
|
return $this; |
138
|
|
|
} |
139
|
16 |
|
|
140
|
|
|
/** |
141
|
|
|
* Process an entity by reading / writing to the DB |
142
|
|
|
* |
143
|
|
|
* @throws \Exception |
144
|
|
|
* |
145
|
|
|
* @return array |
146
|
|
|
*/ |
147
|
|
|
public function processEntity(string $entity, ?callable $callback = null): array |
148
|
|
|
{ |
149
|
|
|
$this->dbUtils->assertTableExists($entity); |
150
|
|
|
|
151
|
28 |
|
$this->priKey = $this->dbUtils->getPrimaryKey($entity); |
152
|
|
|
$this->entityCols = $this->dbUtils->getTableCols($entity); |
153
|
28 |
|
$this->entity = $entity; |
154
|
|
|
|
155
|
27 |
|
$actionsOnThatEntity = $this->whatToDoWithEntity(); |
156
|
26 |
|
$this->queries = []; |
157
|
26 |
|
|
158
|
|
|
// Prepare CSV |
159
|
26 |
|
if ($this->mode === 'batch') { |
160
|
24 |
|
$this->csv = new CSVWriter(); |
161
|
|
|
$this->csv->setCsvControl('|', $this->dbHelper->getEnclosureForCSV()); |
162
|
|
|
} |
163
|
24 |
|
|
164
|
6 |
|
// Wrap everything in a transaction |
165
|
6 |
|
$conn = $this->dbUtils->getConn(); |
166
|
|
|
try { |
167
|
|
|
$conn->beginTransaction(); |
168
|
|
|
|
169
|
24 |
|
if ($actionsOnThatEntity & self::UPDATE_TABLE) { |
170
|
|
|
$this->updateData($callback); |
171
|
24 |
|
} |
172
|
|
|
|
173
|
24 |
|
if ($actionsOnThatEntity & self::INSERT_TABLE) { |
174
|
18 |
|
$this->insertData($callback); |
175
|
|
|
} |
176
|
|
|
|
177
|
20 |
|
$conn->commit(); |
178
|
6 |
|
} catch (\Exception $e) { |
179
|
|
|
$conn->rollBack(); |
180
|
|
|
$conn->close(); // To avoid locks |
181
|
19 |
|
|
182
|
5 |
|
throw $e; |
183
|
5 |
|
} |
184
|
5 |
|
|
185
|
|
|
return $this->queries; |
186
|
5 |
|
} |
187
|
|
|
|
188
|
|
|
/** |
189
|
19 |
|
* Generate fake data for an entity and return it as an Array |
190
|
|
|
* |
191
|
|
|
* @return array |
192
|
|
|
* |
193
|
|
|
* @throws NeuralyzerConfigurationException |
194
|
|
|
*/ |
195
|
|
|
protected function generateFakeData(): array |
196
|
|
|
{ |
197
|
|
|
$this->checkEntityIsInConfig(); |
198
|
|
|
|
199
|
|
|
$colsInConfig = $this->configEntities[$this->entity]['cols']; |
200
|
18 |
|
$row = []; |
201
|
|
|
foreach ($colsInConfig as $colName => $colProps) { |
202
|
18 |
|
$this->checkColIsInEntity($colName); |
203
|
18 |
|
|
204
|
9 |
|
$data = \call_user_func_array( |
205
|
|
|
[$this->getFakerObject($this->entity, $colName, $colProps), $colProps['method']], |
206
|
|
|
$colProps['params'] |
207
|
18 |
|
); |
208
|
|
|
|
209
|
17 |
|
if (! is_scalar($data)) { |
210
|
17 |
|
$msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden"; |
211
|
17 |
|
throw new NeuralyzerConfigurationException($msg); |
212
|
|
|
} |
213
|
17 |
|
|
214
|
17 |
|
$row[$colName] = is_string($data) ? trim($data) : $data; |
215
|
17 |
|
|
216
|
17 |
|
$colLength = $this->entityCols[$colName]['length']; |
217
|
|
|
// Cut the value if too long ... |
218
|
|
|
if (! empty($colLength) && \strlen($row[$colName]) > $colLength) { |
219
|
|
|
$row[$colName] = substr($row[$colName], 0, $colLength - 1); |
220
|
17 |
|
} |
221
|
|
|
} |
222
|
16 |
|
|
223
|
|
|
return $row; |
224
|
13 |
|
} |
225
|
9 |
|
|
226
|
|
|
/** |
227
|
|
|
* Update data of db table. |
228
|
13 |
|
*/ |
229
|
13 |
|
private function updateData(?callable $callback = null): void |
230
|
|
|
{ |
231
|
|
|
$queryBuilder = $this->dbUtils->getConn()->createQueryBuilder(); |
232
|
|
|
if ($this->limit === 0) { |
233
|
|
|
$this->setLimit($this->dbUtils->countResults($this->entity)); |
234
|
5 |
|
} |
235
|
|
|
|
236
|
|
|
$this->expression->evaluateExpressions( |
237
|
14 |
|
$this->configEntities[$this->entity]['pre_actions'] |
238
|
4 |
|
); |
239
|
|
|
|
240
|
|
|
$startAt = 0; // The first part of the limit (offset) |
241
|
14 |
|
$num = 0; // The number of rows updated |
242
|
14 |
|
while ($num < $this->limit) { |
243
|
|
|
$rows = $queryBuilder |
244
|
|
|
->select('*')->from($this->entity) |
245
|
|
|
->setFirstResult($startAt)->setMaxResults($this->batchSize) |
246
|
|
|
->orderBy($this->priKey) |
247
|
|
|
->execute(); |
248
|
|
|
|
249
|
|
|
// I need to read line by line if I have to update the table |
250
|
|
|
// to make sure I do update by update (slower but no other choice for now) |
251
|
|
|
foreach ($rows as $row) { |
252
|
12 |
|
// Call the right method according to the mode |
253
|
|
|
$this->{$this->updateMode[$this->mode]}($row); |
254
|
12 |
|
|
255
|
|
|
if ($callback !== null) { |
256
|
10 |
|
$callback(++$num); |
257
|
10 |
|
} |
258
|
10 |
|
// Have to exit now as we have reached the max |
259
|
10 |
|
if ($num >= $this->limit) { |
260
|
7 |
|
break 2; |
261
|
10 |
|
} |
262
|
|
|
} |
263
|
10 |
|
// Move the offset |
264
|
10 |
|
// Make sure the loop ends if we have nothing to process |
265
|
10 |
|
$num = $startAt += $this->batchSize; |
266
|
|
|
} |
267
|
10 |
|
// Run a final method if defined |
268
|
10 |
|
if ($this->mode === 'batch') { |
269
|
|
|
$this->loadDataInBatch('update'); |
270
|
10 |
|
} |
271
|
6 |
|
|
272
|
4 |
|
$this->expression->evaluateExpressions( |
273
|
|
|
$this->configEntities[$this->entity]['post_actions'] |
274
|
10 |
|
); |
275
|
8 |
|
} |
276
|
|
|
|
277
|
9 |
|
/** |
278
|
|
|
* Execute the Update with Doctrine QueryBuilder |
279
|
|
|
* |
280
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
281
|
|
|
* |
282
|
|
|
* @param array $row Full row |
283
|
|
|
* |
284
|
|
|
* @throws NeuralyzerConfigurationException |
285
|
|
|
*/ |
286
|
|
|
private function doUpdateByQueries(array $row): void |
287
|
4 |
|
{ |
288
|
|
|
$data = $this->generateFakeData(); |
289
|
4 |
|
|
290
|
4 |
|
$queryBuilder = $this->dbUtils->getConn()->createQueryBuilder(); |
291
|
|
|
$queryBuilder = $queryBuilder->update($this->entity); |
292
|
4 |
|
foreach ($data as $field => $value) { |
293
|
|
|
$value = empty($row[$field]) ? |
294
|
4 |
|
$this->dbUtils->getEmptyValue($this->entityCols[$field]['type']->getName()) : |
295
|
4 |
|
$value; |
296
|
4 |
|
|
297
|
|
|
$condition = $this->dbUtils->getCondition($field, $this->entityCols[$field]); |
298
|
|
|
$queryBuilder = $queryBuilder->set($field, $condition); |
299
|
|
|
$queryBuilder = $queryBuilder->setParameter(":${field}", $value); |
300
|
4 |
|
} |
301
|
4 |
|
$queryBuilder = $queryBuilder->where("{$this->priKey} = :{$this->priKey}"); |
302
|
|
|
$queryBuilder = $queryBuilder->setParameter(":{$this->priKey}", $row[$this->priKey]); |
303
|
|
|
|
304
|
|
|
$this->returnRes === true ? |
305
|
|
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
306
|
|
|
null; |
307
|
|
|
|
308
|
|
|
if ($this->pretend === false) { |
309
|
6 |
|
$queryBuilder->execute(); |
310
|
|
|
} |
311
|
6 |
|
} |
312
|
|
|
|
313
|
6 |
|
/** |
314
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
315
|
6 |
|
* |
316
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
317
|
6 |
|
* |
318
|
4 |
|
* @param array $row Full row |
319
|
|
|
* |
320
|
|
|
* @throws NeuralyzerConfigurationException |
321
|
|
|
*/ |
322
|
|
|
private function doBatchUpdate(array $row): void |
323
|
6 |
|
{ |
324
|
2 |
|
$fakeData = $this->generateFakeData(); |
325
|
|
|
$data = []; |
326
|
|
|
// Go trough all fields, and take a value by priority |
327
|
5 |
|
foreach (array_keys($this->entityCols) as $field) { |
328
|
5 |
|
// First take the fake data |
329
|
|
|
$data[$field] = $row[$field]; |
330
|
|
|
if (! empty($row[$field]) && array_key_exists($field, $fakeData)) { |
331
|
|
|
$data[$field] = $fakeData[$field]; |
332
|
|
|
} |
333
|
|
|
} |
334
|
|
|
|
335
|
4 |
|
$this->csv->write($data); |
336
|
|
|
} |
337
|
4 |
|
|
338
|
|
|
/** |
339
|
4 |
|
* Insert data into table |
340
|
4 |
|
*/ |
341
|
4 |
|
private function insertData(?callable $callback = null): void |
342
|
4 |
|
{ |
343
|
4 |
|
$this->expression->evaluateExpressions( |
344
|
|
|
$this->configEntities[$this->entity]['pre_actions'] |
345
|
|
|
); |
346
|
4 |
|
|
347
|
2 |
|
for ($rowNum = 1; $rowNum <= $this->limit; $rowNum++) { |
348
|
2 |
|
// Call the right method according to the mode |
349
|
|
|
$this->{$this->insertMode[$this->mode]}($rowNum); |
350
|
4 |
|
|
351
|
3 |
|
if (! is_null($callback)) { |
352
|
|
|
$callback($rowNum); |
353
|
4 |
|
} |
354
|
|
|
} |
355
|
|
|
|
356
|
|
|
// Run a final method if defined |
357
|
|
|
if ($this->mode === 'batch') { |
358
|
|
|
$this->loadDataInBatch('insert'); |
359
|
|
|
} |
360
|
2 |
|
|
361
|
|
|
$this->expression->evaluateExpressions( |
362
|
2 |
|
$this->configEntities[$this->entity]['post_actions'] |
363
|
2 |
|
); |
364
|
2 |
|
} |
365
|
|
|
|
366
|
|
|
/** |
367
|
|
|
* Execute an INSERT with Doctrine QueryBuilder |
368
|
|
|
* |
369
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
370
|
|
|
*/ |
371
|
|
|
private function doInsertByQueries(): void |
372
|
|
|
{ |
373
|
6 |
|
$data = $this->generateFakeData(); |
374
|
|
|
|
375
|
6 |
|
$queryBuilder = $this->dbUtils->getConn()->createQueryBuilder(); |
376
|
|
|
$queryBuilder = $queryBuilder->insert($this->entity); |
377
|
6 |
|
foreach ($data as $field => $value) { |
378
|
4 |
|
$queryBuilder = $queryBuilder->setValue($field, ":${field}"); |
379
|
|
|
$queryBuilder = $queryBuilder->setParameter(":${field}", $value); |
380
|
|
|
} |
381
|
|
|
|
382
|
6 |
|
$this->returnRes === true ? |
383
|
6 |
|
array_push($this->queries, $this->dbUtils->getRawSQL($queryBuilder)) : |
384
|
6 |
|
null; |
385
|
|
|
|
386
|
5 |
|
if ($this->pretend === false) { |
387
|
|
|
$queryBuilder->execute(); |
388
|
|
|
} |
389
|
5 |
|
} |
390
|
5 |
|
|
391
|
|
|
/** |
392
|
|
|
* Write the line required for a later LOAD DATA (or \copy) |
393
|
|
|
* |
394
|
|
|
* @SuppressWarnings("unused") - Used dynamically |
395
|
|
|
*/ |
396
|
|
|
private function doBatchInsert(): void |
397
|
|
|
{ |
398
|
22 |
|
$data = $this->generateFakeData(); |
399
|
|
|
$this->csv->write($data); |
400
|
22 |
|
} |
401
|
|
|
|
402
|
22 |
|
/** |
403
|
22 |
|
* If a file has been created for the batch mode, destroy it |
404
|
22 |
|
* |
405
|
22 |
|
* @SuppressWarnings("unused") - Used dynamically |
406
|
|
|
* |
407
|
|
|
* @param string $mode "update" or "insert" |
408
|
21 |
|
*/ |
409
|
21 |
|
private function loadDataInBatch(string $mode): void |
410
|
|
|
{ |
411
|
|
|
$fields = array_keys($this->configEntities[$this->entity]['cols']); |
412
|
21 |
|
// Replace by all fields if update as we have to load everything |
413
|
1 |
|
if ($mode === 'update') { |
414
|
1 |
|
$fields = array_keys($this->entityCols); |
415
|
|
|
} |
416
|
|
|
|
417
|
21 |
|
// Load the data from the helper, only if pretend is false |
418
|
|
|
$filename = $this->csv->getPathName(); |
419
|
21 |
|
$this->dbHelper->setPretend($this->pretend); |
420
|
|
|
$sql = $this->dbHelper->loadData($this->entity, $filename, $fields, $mode); |
421
|
21 |
|
|
422
|
21 |
|
$this->returnRes === true ? array_push($this->queries, $sql) : null; |
423
|
|
|
|
424
|
|
|
// Destroy the file |
425
|
|
|
unlink($this->csv->getRealPath()); |
426
|
20 |
|
} |
427
|
|
|
} |
428
|
|
|
|