Completed
Push — master ( fb988b...6625be )
by Emmanuel
07:10 queued 05:31
created

AbstractAnonymizer   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 263
Duplicated Lines 0 %

Test Coverage

Coverage 69.69%

Importance

Changes 0
Metric Value
eloc 68
dl 0
loc 263
ccs 46
cts 66
cp 0.6969
rs 10
c 0
b 0
f 0
wmc 20

9 Methods

Rating   Name   Duplication   Size   Complexity  
A checkColIsInEntity() 0 4 2
A setPretend() 0 5 1
A setReturnRes() 0 5 1
A initFaker() 0 6 1
A setConfiguration() 0 5 1
A whatToDoWithEntity() 0 19 4
A setLimit() 0 8 2
A generateFakeData() 0 28 5
A checkEntityIsInConfig() 0 10 3
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package edyan/neuralyzer
12
 *
13
 * @license GNU General Public License v2.0
14
 *
15
 * @link https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Edyan\Neuralyzer\Configuration\Reader;
21
use Edyan\Neuralyzer\Exception\NeuralyzerConfigurationException;
22
23
/**
24
 * Abstract Anonymizer, that can be implemented as DB Anonymizer for example
25
 * Its goal is only to anonymize any data, from a simple array
26
 * not to write or read it from anywhere
27
 *
28
 */
29
abstract class AbstractAnonymizer
30
{
31
    /**
32
     * Update data into table
33
     */
34
    public const UPDATE_TABLE = 1;
35
36
    /**
37
     * Insert data into table
38
     */
39
    public const INSERT_TABLE = 2;
40
41
    /**
42
     * Set the batch size for updates
43
     *
44
     * @var int
45
     */
46
    protected $batchSize = 1000;
47
48
    /**
49
     * Contains the configuration object
50
     *
51
     * @var Reader
52
     */
53
    protected $configuration;
54
55
    /**
56
     * Configuration of entities
57
     *
58
     * @var array
59
     */
60
    protected $configEntities = [];
61
62
    /**
63
     * List of used fakers
64
     *
65
     * @var array
66
     */
67
    protected $fakers = [];
68
69
    /**
70
     * Current table (entity) to process
71
     *
72
     * @var string
73
     */
74
    protected $entity;
75
76
    /**
77
     * Current table (entity) Columns
78
     *
79
     * @var array
80
     */
81
    protected $entityCols;
82
83
    /**
84
     * Limit the number of updates or create
85
     *
86
     * @var int
87
     */
88
    protected $limit = 0;
89
90
    /**
91
     * Pretend we do the update, but do nothing
92
     *
93
     * @var bool
94
     */
95
    protected $pretend = true;
96
97
    /**
98
     * Return the generated SQL
99
     *
100
     * @var bool
101
     */
102
    protected $returnRes = false;
103
104
    /**
105
     * @var \Faker\Generator
106
     */
107
    protected $faker;
108
109
110
    /**
111
     * Process the entity according to the anonymizer type
112
     *
113
     * @param string        $entity   Entity's name
114
     * @param callable|null $callback Callback function with current row num as parameter
115
     *
116
     * @return array
117
     */
118
    abstract public function processEntity(
119
        string $entity,
120
        callable $callback = null
121
    ): array;
122
123
124
    /**
125
     * Set the configuration
126
     *
127
     * @param Reader $configuration
128
     */
129 31
    public function setConfiguration(Reader $configuration): void
130
    {
131 31
        $this->configuration = $configuration;
132 31
        $this->configEntities = $configuration->getConfigValues()['entities'];
133 31
        $this->initFaker();
134 31
    }
135
136
137
    /**
138
     * Limit of fake generated records for updates and creates
139
     *
140
     * @param int $limit
141
     *
142
     * @return mixed
143
     */
144 24
    public function setLimit(int $limit)
145
    {
146 24
        $this->limit = $limit;
147 24
        if ($this->limit < $this->batchSize) {
148 23
            $this->batchSize = $this->limit;
149
        }
150
151 24
        return $this;
152
    }
153
154
155
    /**
156
     * Activate or deactivate the pretending mode (dry run)
157
     *
158
     * @param  bool $pretend
159
     *
160
     * @return mixed
161
     */
162 28
    public function setPretend(bool $pretend)
163
    {
164 28
        $this->pretend = $pretend;
165
166 28
        return $this;
167
    }
168
169
170
    /**
171
     * Return or not a result (like an SQL Query that has
172
     * been generated with fake data)
173
     *
174
     * @param  bool $returnRes
175
     *
176
     * @return mixed
177
     */
178 27
    public function setReturnRes(bool $returnRes)
179
    {
180 27
        $this->returnRes = $returnRes;
181
182 27
        return $this;
183
    }
184
185
186
    /**
187
     * Evaluate, from the configuration if I have to update or Truncate the table
188
     *
189
     * @return int
190
     * @throws NeuralyzerConfigurationException
191
     */
192 26
    protected function whatToDoWithEntity(): int
193
    {
194 26
        $this->checkEntityIsInConfig();
195
196 24
        $entityConfig = $this->configEntities[$this->entity];
197
198 24
        $actions = 0;
199 24
        if (array_key_exists('cols', $entityConfig)) {
200 24
            switch ($entityConfig['action']) {
201 24
                case 'update':
202 18
                    $actions |= self::UPDATE_TABLE;
203 18
                    break;
204 6
                case 'insert':
205 6
                    $actions |= self::INSERT_TABLE;
206 6
                    break;
207
            }
208
        }
209
210 24
        return $actions;
211
    }
212
213
    /**
214
     * Generate fake data for an entity and return it as an Array
215
     *
216
     * @return array
217
     * @throws NeuralyzerConfigurationException
218
     */
219
    protected function generateFakeData(): array
220
    {
221
        $this->checkEntityIsInConfig();
222
        $language = $this->configuration->getConfigValues()['language'];
223
        $faker = \Faker\Factory::create($language);
224
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker));
225
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $language));
226
        $colsInConfig = $this->configEntities[$this->entity]['cols'];
227
        $row = [];
228
        foreach ($colsInConfig as $colName => $colProps) {
229
            $this->checkColIsInEntity($colName);
230
            $data = \call_user_func_array(
231
                [$faker, $colProps['method']],
232
                $colProps['params']
233
            );
234
            if (!is_scalar($data)) {
235
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
236
                throw new NeuralyzerConfigurationException($msg);
237
            }
238
            $row[$colName] = trim($data);
239
            $colLength = $this->entityCols[$colName]['length'];
240
            // Cut the value if too long ...
241
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
242
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
243
            }
244
        }
245
246
        return $row;
247
    }
248
249
250
    /**
251
     * Make sure that entity is defined in the configuration
252
     *
253
     * @throws NeuralyzerConfigurationException
254
     */
255 26
    protected function checkEntityIsInConfig(): void
256
    {
257 26
        if (empty($this->configEntities)) {
258 1
            throw new NeuralyzerConfigurationException(
259 1
                'No entities found. Have you loaded a configuration file ?'
260
            );
261
        }
262 25
        if (!array_key_exists($this->entity, $this->configEntities)) {
263 1
            throw new NeuralyzerConfigurationException(
264 1
                "No configuration for that entity ({$this->entity})"
265
            );
266
        }
267 24
    }
268
269
    /**
270
     * Verify a column is defined in the real entityCols
271
     *
272
     * @param string $colName
273
     *
274
     * @throws NeuralyzerConfigurationException
275
     */
276 22
    protected function checkColIsInEntity(string $colName): void
277
    {
278 22
        if (!array_key_exists($colName, $this->entityCols)) {
279 1
            throw new NeuralyzerConfigurationException("Col $colName does not exist");
280
        }
281 21
    }
282
283
    /**
284
     * Init Faker and add additional methods
285
     */
286 31
    protected function initFaker(): void
287
    {
288 31
        $language = $this->configuration->getConfigValues()['language'];
289 31
        $this->faker = \Faker\Factory::create($language);
290 31
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($this->faker));
291 31
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($this->faker, $language));
292 31
    }
293
}
294