Passed
Pull Request — master (#15)
by Jeroen
02:53
created

AbstractAnonymizer::generateFakeData()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 30
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 22
nc 4
nop 0
dl 0
loc 30
ccs 17
cts 17
cp 1
crap 7
rs 8.6346
c 0
b 0
f 0
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package edyan/neuralyzer
12
 *
13
 * @license GNU General Public License v2.0
14
 *
15
 * @link https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Edyan\Neuralyzer\Configuration\Reader;
21
use Edyan\Neuralyzer\Exception\NeuralyzerConfigurationException;
22
23
/**
24
 * Abstract Anonymizer, that can be implemented as DB Anonymizer for example
25
 * Its goal is only to anonymize any data, from a simple array
26
 * not to write or read it from anywhere
27
 *
28
 */
29
abstract class AbstractAnonymizer
30
{
31
    /**
32
     * Update data into table
33
     */
34
    public const UPDATE_TABLE = 1;
35
36
    /**
37
     * Insert data into table
38
     */
39
    public const INSERT_TABLE = 2;
40
41
    /**
42
     * Set the batch size for updates
43
     *
44
     * @var int
45
     */
46
    protected $batchSize = 1000;
47
48
    /**
49
     * Contains the configuration object
50
     *
51
     * @var Reader
52
     */
53
    protected $configuration;
54
55
    /**
56
     * Configuration of entities
57
     *
58
     * @var array
59
     */
60
    protected $configEntities = [];
61
62
    /**
63
     * List of used fakers
64
     *
65
     * @var array
66
     */
67
    protected $fakers = [];
68
69
    /**
70
     * Current table (entity) to process
71
     *
72
     * @var string
73
     */
74
    protected $entity;
75
76
    /**
77
     * Current table (entity) Columns
78
     *
79
     * @var array
80
     */
81
    protected $entityCols;
82
83
    /**
84
     * Limit the number of updates or create
85
     *
86
     * @var int
87
     */
88
    protected $limit = 0;
89
90
    /**
91
     * Pretend we do the update, but do nothing
92
     *
93
     * @var bool
94
     */
95
    protected $pretend = true;
96
97
    /**
98
     * Return the generated SQL
99
     *
100
     * @var bool
101
     */
102
    protected $returnRes = false;
103
104
    /**
105
     * @var \Faker\Generator
106
     */
107
    protected $faker;
108
109
110
    /**
111
     * Process the entity according to the anonymizer type
112 32
     *
113
     * @param string        $entity   Entity's name
114 32
     * @param callable|null $callback Callback function with current row num as parameter
115 32
     *
116 32
     * @return array
117
     */
118
    abstract public function processEntity(
119
        string $entity,
120
        callable $callback = null
121
    ): array;
122
123
124 24
    /**
125
     * Set the configuration
126 24
     *
127 24
     * @param Reader $configuration
128 23
     */
129
    public function setConfiguration(Reader $configuration): void
130
    {
131 24
        $this->configuration = $configuration;
132
        $this->configEntities = $configuration->getConfigValues()['entities'];
133
        $this->initFaker();
134
    }
135
136
137
    /**
138
     * Limit of fake generated records for updates and creates
139
     *
140 29
     * @param int $limit
141
     *
142 29
     * @return mixed
143
     */
144 29
    public function setLimit(int $limit)
145
    {
146
        $this->limit = $limit;
147
        if ($this->limit < $this->batchSize) {
148
            $this->batchSize = $this->limit;
149
        }
150
151
        return $this;
152
    }
153
154 28
155
    /**
156 28
     * Activate or deactivate the pretending mode (dry run)
157
     *
158 28
     * @param  bool $pretend
159
     *
160
     * @return mixed
161
     */
162
    public function setPretend(bool $pretend)
163
    {
164
        $this->pretend = $pretend;
165
166
        return $this;
167 27
    }
168
169 27
170
    /**
171 25
     * Return or not a result (like an SQL Query that has
172
     * been generated with fake data)
173 25
     *
174 25
     * @param  bool $returnRes
175 5
     *
176
     * @return mixed
177
     */
178 25
    public function setReturnRes(bool $returnRes)
179 25
    {
180 25
        $this->returnRes = $returnRes;
181 19
182 19
        return $this;
183 6
    }
184 6
185 6
186
    /**
187
     * Evaluate, from the configuration if I have to update or Truncate the table
188
     *
189 25
     * @return int
190
     * @throws NeuralyzerConfigurationException
191
     */
192
    protected function whatToDoWithEntity(): int
193
    {
194
        $this->checkEntityIsInConfig();
195
196
        $entityConfig = $this->configEntities[$this->entity];
197
198 5
        $actions = 0;
199
        if (array_key_exists('cols', $entityConfig)) {
200 5
            switch ($entityConfig['action']) {
201
                case 'update':
202 5
                    $actions |= self::UPDATE_TABLE;
203 2
                    break;
204
                case 'insert':
205
                    $actions |= self::INSERT_TABLE;
206 3
                    break;
207
            }
208
        }
209
210
        return $actions;
211
    }
212
213
    /**
214
     * Generate fake data for an entity and return it as an Array
215 23
     *
216
     * @return array
217 23
     * @throws NeuralyzerConfigurationException
218
     */
219 23
    protected function generateFakeData(): array
220 23
    {
221
        $this->checkEntityIsInConfig();
222 23
        $language = $this->configuration->getConfigValues()['language'];
223 23
        $faker = \Faker\Factory::create($language);
224 23
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker));
225 23
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $language));
226 22
        $colsInConfig = $this->configEntities[$this->entity]['cols'];
227 22
        $fakerInstanceUnique = $faker->unique();
228 22
        $fakerInstance = $faker;
229
        $row = [];
230
        foreach ($colsInConfig as $colName => $colProps) {
231 22
            $this->checkColIsInEntity($colName);
232 1
            $data = \call_user_func_array(
233 1
                [isset($colProps['unique']) && $colProps['unique'] === true ? $fakerInstanceUnique : $fakerInstance, $colProps['method']],
234
                $colProps['params']
235
            );
236 22
            if (!is_scalar($data)) {
237
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
238 22
                throw new NeuralyzerConfigurationException($msg);
239
            }
240 22
            $row[$colName] = trim($data);
241 22
            $colLength = $this->entityCols[$colName]['length'];
242
            // Cut the value if too long ...
243
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
244
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
245 21
            }
246
        }
247
248
        return $row;
249
    }
250
251
252
    /**
253
     * Make sure that entity is defined in the configuration
254 27
     *
255
     * @throws NeuralyzerConfigurationException
256 27
     */
257 1
    protected function checkEntityIsInConfig(): void
258 1
    {
259
        if (empty($this->configEntities)) {
260
            throw new NeuralyzerConfigurationException(
261 26
                'No entities found. Have you loaded a configuration file ?'
262 1
            );
263 1
        }
264
        if (!array_key_exists($this->entity, $this->configEntities)) {
265
            throw new NeuralyzerConfigurationException(
266 25
                "No configuration for that entity ({$this->entity})"
267
            );
268
        }
269
    }
270
271
    /**
272
     * Verify a column is defined in the real entityCols
273
     *
274 23
     * @param string $colName
275
     *
276 23
     * @throws NeuralyzerConfigurationException
277 1
     */
278
    protected function checkColIsInEntity(string $colName): void
279 22
    {
280
        if (!array_key_exists($colName, $this->entityCols)) {
281
            throw new NeuralyzerConfigurationException("Col $colName does not exist");
282
        }
283
    }
284
285
    /**
286
     * Init Faker and add additional methods
287
     */
288
    protected function initFaker(): void
289
    {
290
        $language = $this->configuration->getConfigValues()['language'];
291
        $this->faker = \Faker\Factory::create($language);
292
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($this->faker));
293
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($this->faker, $language));
294
    }
295
}
296