These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Preprocessing; |
||
6 | |||
7 | use Phpml\Exception\NormalizerException; |
||
8 | use Phpml\Math\Statistic\Mean; |
||
9 | use Phpml\Math\Statistic\StandardDeviation; |
||
10 | |||
11 | class Normalizer implements Preprocessor |
||
12 | { |
||
13 | const NORM_L1 = 1; |
||
14 | const NORM_L2 = 2; |
||
15 | const NORM_STD = 3; |
||
16 | |||
17 | /** |
||
18 | * @var int |
||
19 | */ |
||
20 | private $norm; |
||
21 | |||
22 | /** |
||
23 | * @var bool |
||
24 | */ |
||
25 | private $fitted = false; |
||
26 | |||
27 | /** |
||
28 | * @var array |
||
29 | */ |
||
30 | private $std; |
||
31 | |||
32 | /** |
||
33 | * @var array |
||
34 | */ |
||
35 | private $mean; |
||
36 | |||
37 | /** |
||
38 | * @throws NormalizerException |
||
39 | */ |
||
40 | public function __construct(int $norm = self::NORM_L2) |
||
41 | { |
||
42 | if (!in_array($norm, [self::NORM_L1, self::NORM_L2, self::NORM_STD])) { |
||
43 | throw NormalizerException::unknownNorm(); |
||
44 | } |
||
45 | |||
46 | $this->norm = $norm; |
||
47 | } |
||
48 | |||
49 | public function fit(array $samples) |
||
50 | { |
||
51 | if ($this->fitted) { |
||
52 | return; |
||
53 | } |
||
54 | |||
55 | if ($this->norm == self::NORM_STD) { |
||
56 | $features = range(0, count($samples[0]) - 1); |
||
57 | foreach ($features as $i) { |
||
58 | $values = array_column($samples, $i); |
||
59 | $this->std[$i] = StandardDeviation::population($values); |
||
60 | $this->mean[$i] = Mean::arithmetic($values); |
||
61 | } |
||
62 | } |
||
63 | |||
64 | $this->fitted = true; |
||
65 | } |
||
66 | |||
67 | public function transform(array &$samples) |
||
68 | { |
||
69 | $methods = [ |
||
70 | self::NORM_L1 => 'normalizeL1', |
||
71 | self::NORM_L2 => 'normalizeL2', |
||
72 | self::NORM_STD => 'normalizeSTD' |
||
73 | ]; |
||
74 | $method = $methods[$this->norm]; |
||
75 | |||
76 | $this->fit($samples); |
||
77 | |||
78 | foreach ($samples as &$sample) { |
||
79 | $this->{$method}($sample); |
||
80 | } |
||
81 | } |
||
82 | |||
83 | View Code Duplication | private function normalizeL1(array &$sample) |
|
0 ignored issues
–
show
|
|||
84 | { |
||
85 | $norm1 = 0; |
||
86 | foreach ($sample as $feature) { |
||
87 | $norm1 += abs($feature); |
||
88 | } |
||
89 | |||
90 | if (0 == $norm1) { |
||
91 | $count = count($sample); |
||
92 | $sample = array_fill(0, $count, 1.0 / $count); |
||
93 | } else { |
||
94 | foreach ($sample as &$feature) { |
||
95 | $feature /= $norm1; |
||
96 | } |
||
97 | } |
||
98 | } |
||
99 | |||
100 | View Code Duplication | private function normalizeL2(array &$sample) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository.
Loading history...
|
|||
101 | { |
||
102 | $norm2 = 0; |
||
103 | foreach ($sample as $feature) { |
||
104 | $norm2 += $feature * $feature; |
||
105 | } |
||
106 | $norm2 = sqrt((float) $norm2); |
||
107 | |||
108 | if (0 == $norm2) { |
||
109 | $sample = array_fill(0, count($sample), 1); |
||
110 | } else { |
||
111 | foreach ($sample as &$feature) { |
||
112 | $feature /= $norm2; |
||
113 | } |
||
114 | } |
||
115 | } |
||
116 | |||
117 | private function normalizeSTD(array &$sample) |
||
118 | { |
||
119 | foreach ($sample as $i => $val) { |
||
120 | if ($this->std[$i] != 0) { |
||
121 | $sample[$i] = ($sample[$i] - $this->mean[$i]) / $this->std[$i]; |
||
122 | } else { |
||
123 | // Same value for all samples. |
||
124 | $sample[$i] = 0; |
||
125 | } |
||
126 | } |
||
127 | } |
||
128 | } |
||
129 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.