webnet-fr /
database-anonymizer
| 1 | <?php |
||
| 2 | |||
| 3 | namespace WebnetFr\DatabaseAnonymizer\ConfigGuesser; |
||
| 4 | |||
| 5 | use Doctrine\DBAL\Connection; |
||
| 6 | use WebnetFr\DatabaseAnonymizer\Exception\GuesserMissingHintException; |
||
| 7 | |||
| 8 | /** |
||
| 9 | * @author Vlad Riabchenko <[email protected]> |
||
| 10 | */ |
||
| 11 | class ConfigGuesser |
||
| 12 | { |
||
| 13 | /** |
||
| 14 | * @var ConfigGuesserHint[] |
||
| 15 | */ |
||
| 16 | private static $hints; |
||
| 17 | |||
| 18 | public function __construct() |
||
| 19 | { |
||
| 20 | self::$hints = [ |
||
| 21 | (new ConfigGuesserHint('firstName'))->words([['first', 'name'], 'firstname'])->arguments([null]), |
||
| 22 | (new ConfigGuesserHint('firstName'))->words(['prenom'])->locale('fr_FR')->arguments([null]), |
||
| 23 | (new ConfigGuesserHint('lastName'))->words([['last', 'name'], 'lastname']), |
||
| 24 | (new ConfigGuesserHint('lastName'))->words(['nom'])->locale('fr_FR'), |
||
| 25 | (new ConfigGuesserHint('city'))->words(['city', 'town', 'ville']), |
||
| 26 | (new ConfigGuesserHint('streetAddress'))->words(['address', 'adresse']), |
||
| 27 | (new ConfigGuesserHint('postcode'))->words([['post', 'code'], 'zip']), |
||
| 28 | (new ConfigGuesserHint('postcode'))->words([['code', 'postal'], 'cp'])->locale('fr_FR'), |
||
| 29 | (new ConfigGuesserHint('country'))->words(['country', 'pays']), |
||
| 30 | (new ConfigGuesserHint('phoneNumber'))->words(['phone']), |
||
| 31 | (new ConfigGuesserHint('realText'))->words(['comment'])->arguments([200, 2]), |
||
| 32 | (new ConfigGuesserHint('realText'))->words(['commentaire'])->arguments([200, 2])->locale('fr_FR'), |
||
| 33 | (new ConfigGuesserHint('dateTimeBetween'))->words(['birthdate', 'birthday'])->arguments(['-30 years', 'now', null])->date(true), |
||
| 34 | (new ConfigGuesserHint('safeEmail'))->words(['email', 'mail']), |
||
| 35 | (new ConfigGuesserHint('userName'))->words([['user', 'name'], 'username'])->unique(true), |
||
| 36 | (new ConfigGuesserHint('password'))->words(['password']), |
||
| 37 | (new ConfigGuesserHint('creditCardNumber'))->words([['credit', 'card'], ['credit', 'carte'], 'cb']), |
||
| 38 | (new ConfigGuesserHint('siren'))->words(['siren'])->locale('fr_FR')->unique(true), |
||
| 39 | (new ConfigGuesserHint('siret'))->words(['siret'])->locale('fr_FR')->unique(true), |
||
| 40 | (new ConfigGuesserHint('vat'))->words(['vat'])->locale('fr_FR')->unique(true), |
||
| 41 | (new ConfigGuesserHint('nir'))->words(['nir', ['securite', 'sociale']])->locale('fr_FR')->unique(true), |
||
| 42 | ]; |
||
| 43 | } |
||
| 44 | |||
| 45 | /** |
||
| 46 | * @param Connection $connection |
||
| 47 | * |
||
| 48 | * @return array |
||
| 49 | */ |
||
| 50 | public static function guess(Connection $connection) |
||
| 51 | { |
||
| 52 | $hints = []; |
||
| 53 | $sm = $connection->getSchemaManager(); |
||
| 54 | |||
| 55 | foreach ($sm->listTables() as $table) { |
||
| 56 | $tableName = $table->getName(); |
||
| 57 | |||
| 58 | foreach ($table->getColumns() as $column) { |
||
| 59 | $columnName = $column->getName(); |
||
| 60 | |||
| 61 | try { |
||
| 62 | $hints[$tableName][$columnName] = self::guessColumn($columnName); |
||
| 63 | } catch (GuesserMissingHintException $e) { |
||
| 64 | // Column cannot be guessed and it does not seem to be |
||
| 65 | // personal information. Skip it. |
||
| 66 | } |
||
| 67 | } |
||
| 68 | } |
||
| 69 | |||
| 70 | return $hints; |
||
| 71 | } |
||
| 72 | |||
| 73 | /** |
||
| 74 | * @param string $name |
||
| 75 | * |
||
| 76 | * @throws GuesserMissingHintException |
||
| 77 | * |
||
| 78 | * @return ConfigGuesserHint |
||
| 79 | */ |
||
| 80 | public static function guessColumn(string $name) |
||
| 81 | { |
||
| 82 | $columnWords = self::toWords($name); |
||
| 83 | |||
| 84 | foreach (self::$hints as $hint) { |
||
| 85 | foreach ($hint->words as $word) { |
||
| 86 | if (\is_string($word) && \in_array($word, $columnWords, true)) { |
||
| 87 | return $hint; |
||
| 88 | } |
||
| 89 | |||
| 90 | if (\is_array($word) && \count(array_intersect($word, $columnWords)) == \count($word)) { |
||
| 91 | return $hint; |
||
| 92 | } |
||
| 93 | } |
||
| 94 | } |
||
| 95 | |||
| 96 | throw new GuesserMissingHintException(); |
||
| 97 | } |
||
| 98 | |||
| 99 | /** |
||
| 100 | * @param string $str |
||
| 101 | * |
||
| 102 | * @return array |
||
| 103 | * |
||
| 104 | * @author Vlad Riabchenko <[email protected]> |
||
| 105 | */ |
||
| 106 | private static function toWords(string $str) |
||
| 107 | { |
||
| 108 | $snake = self::toSnakeCase(preg_replace('/\d/', '', $str)); |
||
| 109 | |||
| 110 | return explode('_', $snake); |
||
| 111 | } |
||
| 112 | |||
| 113 | /** |
||
| 114 | * @param string $str |
||
| 115 | * |
||
| 116 | * @return string |
||
| 117 | */ |
||
| 118 | private static function toSnakeCase(string $str) |
||
| 119 | { |
||
| 120 | $pieces = preg_split('/((?<=.)(?=[[:upper:]][[:lower:]])|(?<=[[:lower:]])(?=[[:upper:]]))/', $str); |
||
| 121 | |||
| 122 | return strtolower(implode('_', $pieces)); |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 123 | } |
||
| 124 | } |
||
| 125 |