Passed
Pull Request — master (#15)
by Jeroen
04:36
created

AbstractAnonymizer::generateFakeData()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 28
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 20
nc 4
nop 0
dl 0
loc 28
ccs 17
cts 17
cp 1
crap 7
rs 8.6666
c 0
b 0
f 0
1
<?php
2
/**
3
 * neuralyzer : Data Anonymization Library and CLI Tool
4
 *
5
 * PHP Version 7.1
6
 *
7
 * @author    Emmanuel Dyan
8
 * @author    Rémi Sauvat
9
 * @copyright 2018 Emmanuel Dyan
10
 *
11
 * @package edyan/neuralyzer
12
 *
13
 * @license GNU General Public License v2.0
14
 *
15
 * @link https://github.com/edyan/neuralyzer
16
 */
17
18
namespace Edyan\Neuralyzer\Anonymizer;
19
20
use Edyan\Neuralyzer\Configuration\Reader;
21
use Edyan\Neuralyzer\Exception\NeuralyzerConfigurationException;
22
23
/**
24
 * Abstract Anonymizer, that can be implemented as DB Anonymizer for example
25
 * Its goal is only to anonymize any data, from a simple array
26
 * not to write or read it from anywhere
27
 *
28
 */
29
abstract class AbstractAnonymizer
30
{
31
    /**
32
     * Update data into table
33
     */
34
    public const UPDATE_TABLE = 1;
35
36
    /**
37
     * Insert data into table
38
     */
39
    public const INSERT_TABLE = 2;
40
41
    /**
42
     * Set the batch size for updates
43
     *
44
     * @var int
45
     */
46
    protected $batchSize = 1000;
47
48
    /**
49
     * Contains the configuration object
50
     *
51
     * @var Reader
52
     */
53
    protected $configuration;
54
55
    /**
56
     * Configuration of entities
57
     *
58
     * @var array
59
     */
60
    protected $configEntities = [];
61
62
    /**
63
     * List of used fakers
64
     *
65
     * @var array
66
     */
67
    protected $fakers = [];
68
69
    /**
70
     * Current table (entity) to process
71
     *
72
     * @var string
73
     */
74
    protected $entity;
75
76
    /**
77
     * Current table (entity) Columns
78
     *
79
     * @var array
80
     */
81
    protected $entityCols;
82
83
    /**
84
     * Limit the number of updates or create
85
     *
86
     * @var int
87
     */
88
    protected $limit = 0;
89
90
    /**
91
     * Pretend we do the update, but do nothing
92
     *
93
     * @var bool
94
     */
95
    protected $pretend = true;
96
97
    /**
98
     * Return the generated SQL
99
     *
100
     * @var bool
101
     */
102
    protected $returnRes = false;
103
104
    /**
105
     * @var \Faker\Generator
106
     */
107
    protected $faker;
108
109
110
    /**
111
     * Process the entity according to the anonymizer type
112 32
     *
113
     * @param string        $entity   Entity's name
114 32
     * @param callable|null $callback Callback function with current row num as parameter
115 32
     *
116 32
     * @return array
117
     */
118
    abstract public function processEntity(
119
        string $entity,
120
        callable $callback = null
121
    ): array;
122
123
124 24
    /**
125
     * Set the configuration
126 24
     *
127 24
     * @param Reader $configuration
128 23
     */
129
    public function setConfiguration(Reader $configuration): void
130
    {
131 24
        $this->configuration = $configuration;
132
        $this->configEntities = $configuration->getConfigValues()['entities'];
133
        $this->initFaker();
134
    }
135
136
137
    /**
138
     * Limit of fake generated records for updates and creates
139
     *
140 29
     * @param int $limit
141
     *
142 29
     * @return mixed
143
     */
144 29
    public function setLimit(int $limit)
145
    {
146
        $this->limit = $limit;
147
        if ($this->limit < $this->batchSize) {
148
            $this->batchSize = $this->limit;
149
        }
150
151
        return $this;
152
    }
153
154 28
155
    /**
156 28
     * Activate or deactivate the pretending mode (dry run)
157
     *
158 28
     * @param  bool $pretend
159
     *
160
     * @return mixed
161
     */
162
    public function setPretend(bool $pretend)
163
    {
164
        $this->pretend = $pretend;
165
166
        return $this;
167 27
    }
168
169 27
170
    /**
171 25
     * Return or not a result (like an SQL Query that has
172
     * been generated with fake data)
173 25
     *
174 25
     * @param  bool $returnRes
175 5
     *
176
     * @return mixed
177
     */
178 25
    public function setReturnRes(bool $returnRes)
179 25
    {
180 25
        $this->returnRes = $returnRes;
181 19
182 19
        return $this;
183 6
    }
184 6
185 6
186
    /**
187
     * Evaluate, from the configuration if I have to update or Truncate the table
188
     *
189 25
     * @return int
190
     * @throws NeuralyzerConfigurationException
191
     */
192
    protected function whatToDoWithEntity(): int
193
    {
194
        $this->checkEntityIsInConfig();
195
196
        $entityConfig = $this->configEntities[$this->entity];
197
198 5
        $actions = 0;
199
        if (array_key_exists('cols', $entityConfig)) {
200 5
            switch ($entityConfig['action']) {
201
                case 'update':
202 5
                    $actions |= self::UPDATE_TABLE;
203 2
                    break;
204
                case 'insert':
205
                    $actions |= self::INSERT_TABLE;
206 3
                    break;
207
            }
208
        }
209
210
        return $actions;
211
    }
212
213
    /**
214
     * Generate fake data for an entity and return it as an Array
215 23
     *
216
     * @return array
217 23
     * @throws NeuralyzerConfigurationException
218
     */
219 23
    protected function generateFakeData(): array
220 23
    {
221
        $this->checkEntityIsInConfig();
222 23
        $language = $this->configuration->getConfigValues()['language'];
223 23
        $faker = \Faker\Factory::create($language);
224 23
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($faker));
225 23
        $faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($faker, $language));
226 22
        $colsInConfig = $this->configEntities[$this->entity]['cols'];
227 22
        $row = [];
228 22
        foreach ($colsInConfig as $colName => $colProps) {
229
            $this->checkColIsInEntity($colName);
230
            $data = \call_user_func_array(
231 22
                [isset($colProps['unique']) && $colProps['unique'] === true ? $faker->unique() : $faker, $colProps['method']],
232 1
                $colProps['params']
233 1
            );
234
            if (!is_scalar($data)) {
235
                $msg = "You must use faker methods that generate strings: '{$colProps['method']}' forbidden";
236 22
                throw new NeuralyzerConfigurationException($msg);
237
            }
238 22
            $row[$colName] = trim($data);
239
            $colLength = $this->entityCols[$colName]['length'];
240 22
            // Cut the value if too long ...
241 22
            if (!empty($colLength) && \strlen($row[$colName]) > $colLength) {
242
                $row[$colName] = substr($row[$colName], 0, $colLength - 1);
243
            }
244
        }
245 21
246
        return $row;
247
    }
248
249
250
    /**
251
     * Make sure that entity is defined in the configuration
252
     *
253
     * @throws NeuralyzerConfigurationException
254 27
     */
255
    protected function checkEntityIsInConfig(): void
256 27
    {
257 1
        if (empty($this->configEntities)) {
258 1
            throw new NeuralyzerConfigurationException(
259
                'No entities found. Have you loaded a configuration file ?'
260
            );
261 26
        }
262 1
        if (!array_key_exists($this->entity, $this->configEntities)) {
263 1
            throw new NeuralyzerConfigurationException(
264
                "No configuration for that entity ({$this->entity})"
265
            );
266 25
        }
267
    }
268
269
    /**
270
     * Verify a column is defined in the real entityCols
271
     *
272
     * @param string $colName
273
     *
274 23
     * @throws NeuralyzerConfigurationException
275
     */
276 23
    protected function checkColIsInEntity(string $colName): void
277 1
    {
278
        if (!array_key_exists($colName, $this->entityCols)) {
279 22
            throw new NeuralyzerConfigurationException("Col $colName does not exist");
280
        }
281
    }
282
283
    /**
284
     * Init Faker and add additional methods
285
     */
286
    protected function initFaker(): void
287
    {
288
        $language = $this->configuration->getConfigValues()['language'];
289
        $this->faker = \Faker\Factory::create($language);
290
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\Base($this->faker));
291
        $this->faker->addProvider(new \Edyan\Neuralyzer\Faker\Provider\UniqueWord($this->faker, $language));
292
    }
293
}
294