1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace MachineLearning\Application\Normalization; |
4
|
|
|
|
5
|
|
|
use MachineLearning\Domain\Model\Value\VectorValue; |
6
|
|
|
use MachineLearning\Domain\Model\Dataset; |
7
|
|
|
use MachineLearning\Domain\Model\ValueInterface; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* Normalize substracting dataset mean and divide by value range |
11
|
|
|
* So value ranges goes to -1,1 |
12
|
|
|
*/ |
13
|
|
|
class MeanScaleNormalization extends AbstractNormalization |
14
|
|
|
{ |
15
|
|
|
const COEFFICIENT_AVERAGE = 0; |
16
|
|
|
const COEFFICIENT_RANGE = 1; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @inheritdoc |
20
|
|
|
*/ |
21
|
1 |
|
public function normalizeValue(ValueInterface $value, ValueInterface $coefficient) |
22
|
|
|
{ |
23
|
1 |
|
$rawCoefficient = $coefficient->getValue(); |
24
|
1 |
|
$rawValue = $value->getValue(); |
25
|
|
|
|
26
|
1 |
|
$numberColumns = count($rawValue); |
27
|
1 |
|
for ($i=0; $i<$numberColumns; $i++) { |
28
|
1 |
|
$rawValue[$i] = |
29
|
|
|
( |
30
|
1 |
|
$rawValue[$i] - $rawCoefficient[static::COEFFICIENT_AVERAGE][$i] |
31
|
1 |
|
) |
32
|
1 |
|
/ $rawCoefficient[static::COEFFICIENT_RANGE][$i] |
33
|
1 |
|
; |
34
|
1 |
|
} |
35
|
|
|
|
36
|
1 |
|
return new VectorValue($rawValue); |
37
|
|
|
|
38
|
|
|
} |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @inheritdoc |
42
|
|
|
*/ |
43
|
1 |
|
public function calculateCoefficient(Dataset $data) |
44
|
|
|
{ |
45
|
1 |
|
$numberFeatures = count($data->first()->getIndependentVariable()->getValue()); |
46
|
1 |
|
$numberRows = count($data); |
47
|
|
|
|
48
|
1 |
|
$featuresMinimumValue = []; |
49
|
1 |
|
$featuresMaximumValue = []; |
50
|
1 |
|
$featuresSum = array_fill(0, $numberFeatures, 0); |
51
|
1 |
|
$featuresAverage = array_fill(0, $numberFeatures, 0); |
52
|
1 |
|
$featuresRange = array_fill(0, $numberFeatures, 0); |
53
|
|
|
|
54
|
|
|
//@todo solve this in another way...tremendous |
55
|
1 |
|
list($featuresMaximumValue, $featuresMinimumValue, $featuresSum) = $this->prepareNormalizationData( |
56
|
1 |
|
$data, |
57
|
1 |
|
$numberFeatures, |
58
|
1 |
|
$featuresMaximumValue, |
59
|
1 |
|
$featuresMinimumValue, |
60
|
|
|
$featuresSum |
61
|
1 |
|
); |
62
|
|
|
|
63
|
1 |
|
foreach ($featuresSum as $i => $featureSum) { |
64
|
1 |
|
$featuresAverage[$i] = $featureSum / $numberRows; |
65
|
1 |
|
$featuresRange[$i] = ($featuresMaximumValue[$i] - $featuresMinimumValue[$i]) > 0 ? |
66
|
1 |
|
($featuresMaximumValue[$i] - $featuresMinimumValue[$i]) |
67
|
1 |
|
: 1; |
68
|
1 |
|
} |
69
|
|
|
|
70
|
1 |
|
return new VectorValue([ |
71
|
1 |
|
static::COEFFICIENT_AVERAGE => $featuresAverage, |
72
|
1 |
|
static::COEFFICIENT_RANGE => $featuresRange |
73
|
1 |
|
]); |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* @param Dataset $data |
78
|
|
|
* @param $numberFeatures |
79
|
|
|
* @param $featuresMaximumValue |
80
|
|
|
* @param $featuresMinimumValue |
81
|
|
|
* @param $featuresAverage |
82
|
|
|
* @return array |
83
|
|
|
*/ |
84
|
1 |
|
protected function prepareNormalizationData(Dataset $data, $numberFeatures, $featuresMaximumValue, $featuresMinimumValue, $featuresSum) |
85
|
|
|
{ |
86
|
1 |
|
foreach ($data as $row) { |
87
|
1 |
|
$features = $row->getIndependentVariable()->getValue(); |
88
|
1 |
|
for ($i = 0; $i < $numberFeatures; $i++) { |
89
|
1 |
View Code Duplication |
if (!isset($featuresMaximumValue[$i]) || $featuresMaximumValue[$i] < $features[$i]) { |
|
|
|
|
90
|
1 |
|
$featuresMaximumValue[$i] = $features[$i]; |
91
|
1 |
|
} |
92
|
1 |
View Code Duplication |
if (!isset($featuresMinimumValue[$i]) || $featuresMinimumValue[$i] > $features[$i]) { |
|
|
|
|
93
|
1 |
|
$featuresMinimumValue[$i] = $features[$i]; |
94
|
1 |
|
} |
95
|
1 |
|
$featuresSum[$i] = $featuresSum[$i] + $features[$i]; |
96
|
1 |
|
} |
97
|
1 |
|
} |
98
|
1 |
|
return array($featuresMaximumValue, $featuresMinimumValue, $featuresSum); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
} |
102
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.