Guesser   A
last analyzed

Complexity

Total Complexity 9

Size/Duplication

Total Lines 126
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 52
c 2
b 0
f 0
dl 0
loc 126
ccs 21
cts 21
cp 1
rs 10
wmc 9

4 Methods

Rating   Name   Duplication   Size   Complexity  
A getColsNameMapping() 0 23 1
A getVersion() 0 3 1
A getColsTypeMapping() 0 36 1
A mapCol() 0 29 6
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * neuralyzer : Data Anonymization Library and CLI Tool
7
 *
8
 * PHP Version 7.2
9
 *
10
 * @author Emmanuel Dyan
11
 * @author Rémi Sauvat
12
 *
13
 * @copyright 2020 Emmanuel Dyan
14
 *
15
 * @package edyan/neuralyzer
16
 *
17
 * @license GNU General Public License v2.0
18
 *
19
 * @link https://github.com/edyan/neuralyzer
20
 */
21
22
namespace Edyan\Neuralyzer;
23
24
use Edyan\Neuralyzer\Exception\NeuralyzerGuesserException;
25
26
/**
27
 * Guesser to map field type to Faker Class
28
 */
29
class Guesser implements GuesserInterface
30
{
31
    /**
32
     * Returns the version of your guesser
33 47
     */
34
    public function getVersion(): string
35 47
    {
36
        return '3.0';
37
    }
38
39
    /**
40
     * Returns an array of fieldName => Faker class
41
     *
42
     * @return array
43 15
     */
44
    public function getColsNameMapping(): array
45
    {
46
        // can contain regexp
47
        return [
48 15
            // Internet
49
            '.*email.*' => ['method' => 'email'],
50
            '.*url' => ['method' => 'url'],
51
52
            // Address and coordinates
53
            '.*address.*' => ['method' => 'streetAddress'],
54
            '.*street.*' => ['method' => 'streetAddress'],
55
            '.*postalcode.*' => ['method' => 'postcode'],
56
            '.*city.*' => ['method' => 'city'],
57
            '.*state.*' => ['method' => 'state'],
58
            '.*country.*' => ['method' => 'country'],
59
            '.*phone.*' => ['method' => 'phoneNumber'],
60
61
            // Text
62
            '.*\.(comments|description)' => ['method' => 'sentence', 'params' => [20]],
63
64
            // Person
65
            '.*first_?name' => ['method' => 'firstName'],
66
            '.*last_?name' => ['method' => 'lastName'],
67
        ];
68
    }
69
70
    /**
71
     * Returns an array of fieldType => Faker method
72
     *
73
     * @param  mixed $length  Field's length
74
     *
75
     * @return array
76 9
     */
77
    public function getColsTypeMapping($length): array
78
    {
79
        return [
80 9
            // Strings
81
            'string' => ['method' => 'sentence', 'params' => [$length]],
82
            'enum' => [
83
                'method' => 'randomElement',
84
                'params' => [['SET', 'YOUR', 'VALUES', 'HERE']]
85
            ],
86
            'simplearray' => [
87
                'method' => 'randomElement',
88
                'params' => [['SET', 'YOUR', 'VALUES', 'HERE']]
89
            ],
90
91
            // Text & Blobs
92
            'text' => ['method' => 'sentence',        'params' => [20]],
93
            'blob' => ['method' => 'sentence',        'params' => [20]],
94
            'json' => ['method' => 'jsonWordsObject', 'params' => [5]],
95 9
96
            // DateTime
97
            'date' => ['method' => 'date',     'params' => ['Y-m-d']],
98
            'datetime' => ['method' => 'date', 'params' => ['Y-m-d H:i:s']],
99
            'time' => ['method' => 'time',     'params' => ['H:i:s']],
100
101
            // Integer
102
            'boolean' => ['method' => 'randomElement',  'params' => [[0, 1]]],
103
            'smallint' => ['method' => 'randomNumber', 'params' => [4]],
104
            'integer' => ['method' => 'randomNumber',  'params' => [9]],
105
            'bigint' => [
106
                'method' => 'randomNumber',
107
                'params' => [strlen(strval(mt_getrandmax())) - 1]
108
            ],
109
110
            // Decimal
111
            'float' => ['method' => 'randomFloat',   'params' => [2, 0, 999999]],
112
            'decimal' => ['method' => 'randomFloat', 'params' => [2, 0, 999999]],
113
        ];
114
    }
115 14
116
    /**
117
     * Will map cols first by looking for field name then by looking for field type
118 14
     * if the first returned nothing
119 14
     *
120 14
     * @param mixed $len Used to get options from enum (stored in length)
121 14
     *
122 14
     * @return array
123
     *
124
     * @throws NeuralyzerGuesserException
125
     */
126
    public function mapCol(string $table, string $name, string $type, $len = null): array
127 9
    {
128
        // Try to find by colsName
129 1
        $colsName = $this->getColsNameMapping();
130 1
        foreach ($colsName as $colRegex => $params) {
131
            preg_match("/^${colRegex}\$/i", $table. '.' . $name, $matches);
132
            if (! empty($matches)) {
133
                return $params;
134
            }
135 8
        }
136 8
137 1
        // Hardcoded type, we have an enum with values
138 1
        // into the len
139
        if ($type === 'enum' && is_string($len)) {
140
            return [
141 7
                'method' => 'randomElement',
142
                'params' => [explode("','", substr($len, 1, -1))],
143
            ];
144
        }
145
146
        // Try to find by fieldType
147
        $colsType = $this->getColsTypeMapping($len);
148
        if (! array_key_exists($type, $colsType)) {
149
            $msg = "Can't guess the type ${type} ({$table}.{$name})" . PHP_EOL;
150
            $msg .= print_r($colsType, true);
151
            throw new NeuralyzerGuesserException($msg);
152
        }
153
154
        return $colsType[$type];
155
    }
156
}
157