1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\Preprocessing; |
6
|
|
|
|
7
|
|
|
use Phpml\Exception\NormalizerException; |
8
|
|
|
use Phpml\Math\Statistic\StandardDeviation; |
9
|
|
|
use Phpml\Math\Statistic\Mean; |
10
|
|
|
|
11
|
|
|
class Normalizer implements Preprocessor |
12
|
|
|
{ |
13
|
|
|
const NORM_L1 = 1; |
14
|
|
|
const NORM_L2 = 2; |
15
|
|
|
const NORM_STD= 3; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* @var int |
19
|
|
|
*/ |
20
|
|
|
private $norm; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* @var bool |
24
|
|
|
*/ |
25
|
|
|
private $fitted = false; |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* @var array |
29
|
|
|
*/ |
30
|
|
|
private $std; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @var array |
34
|
|
|
*/ |
35
|
|
|
private $mean; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* @param int $norm |
39
|
|
|
* |
40
|
|
|
* @throws NormalizerException |
41
|
|
|
*/ |
42
|
|
|
public function __construct(int $norm = self::NORM_L2) |
43
|
|
|
{ |
44
|
|
|
if (!in_array($norm, [self::NORM_L1, self::NORM_L2, self::NORM_STD])) { |
45
|
|
|
throw NormalizerException::unknownNorm(); |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
$this->norm = $norm; |
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* @param array $samples |
53
|
|
|
*/ |
54
|
|
|
public function fit(array $samples) |
55
|
|
|
{ |
56
|
|
|
if ($this->fitted) { |
57
|
|
|
return; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
if ($this->norm == self::NORM_STD) { |
61
|
|
|
$features = range(0, count($samples[0]) - 1); |
62
|
|
|
foreach ($features as $i) { |
63
|
|
|
$values = array_column($samples, $i); |
64
|
|
|
$this->std[$i] = StandardDeviation::population($values); |
65
|
|
|
$this->mean[$i] = Mean::arithmetic($values); |
66
|
|
|
} |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
$this->fitted = true; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
/** |
73
|
|
|
* @param array $samples |
74
|
|
|
*/ |
75
|
|
|
public function transform(array &$samples) |
76
|
|
|
{ |
77
|
|
|
$methods = [ |
78
|
|
|
self::NORM_L1 => 'normalizeL1', |
79
|
|
|
self::NORM_L2 => 'normalizeL2', |
80
|
|
|
self::NORM_STD=> 'normalizeSTD' |
81
|
|
|
]; |
82
|
|
|
$method = $methods[$this->norm]; |
83
|
|
|
|
84
|
|
|
$this->fit($samples); |
85
|
|
|
|
86
|
|
|
foreach ($samples as &$sample) { |
87
|
|
|
$this->$method($sample); |
88
|
|
|
} |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
/** |
92
|
|
|
* @param array $sample |
93
|
|
|
*/ |
94
|
|
View Code Duplication |
private function normalizeL1(array &$sample) |
|
|
|
|
95
|
|
|
{ |
96
|
|
|
$norm1 = 0; |
97
|
|
|
foreach ($sample as $feature) { |
98
|
|
|
$norm1 += abs($feature); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
if (0 == $norm1) { |
102
|
|
|
$count = count($sample); |
103
|
|
|
$sample = array_fill(0, $count, 1.0 / $count); |
104
|
|
|
} else { |
105
|
|
|
foreach ($sample as &$feature) { |
106
|
|
|
$feature /= $norm1; |
107
|
|
|
} |
108
|
|
|
} |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* @param array $sample |
113
|
|
|
*/ |
114
|
|
View Code Duplication |
private function normalizeL2(array &$sample) |
|
|
|
|
115
|
|
|
{ |
116
|
|
|
$norm2 = 0; |
117
|
|
|
foreach ($sample as $feature) { |
118
|
|
|
$norm2 += $feature * $feature; |
119
|
|
|
} |
120
|
|
|
$norm2 = sqrt((float)$norm2); |
121
|
|
|
|
122
|
|
|
if (0 == $norm2) { |
123
|
|
|
$sample = array_fill(0, count($sample), 1); |
124
|
|
|
} else { |
125
|
|
|
foreach ($sample as &$feature) { |
126
|
|
|
$feature /= $norm2; |
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
/** |
132
|
|
|
* @param array $sample |
133
|
|
|
*/ |
134
|
|
|
private function normalizeSTD(array &$sample) |
135
|
|
|
{ |
136
|
|
|
foreach ($sample as $i => $val) { |
137
|
|
|
$sample[$i] = ($sample[$i] - $this->mean[$i]) / $this->std[$i]; |
138
|
|
|
} |
139
|
|
|
} |
140
|
|
|
} |
141
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.