@@ -67,7 +67,7 @@ discard block |
||
67 | 67 | }, $db); |
68 | 68 | } else { |
69 | 69 | $this->mean[$label][$i] = Mean::arithmetic($values); |
70 | - // Add epsilon in order to avoid zero stdev |
|
70 | + // Add epsilon in order to avoid zero stdev |
|
71 | 71 | $this->std[$label][$i] = 1e-10 + StandardDeviation::population($values, false); |
72 | 72 | } |
73 | 73 | } |
@@ -75,7 +75,7 @@ discard block |
||
75 | 75 | |
76 | 76 | /** |
77 | 77 | * Calculates the probability P(label|sample_n) |
78 | - * |
|
78 | + * |
|
79 | 79 | * @param array $sample |
80 | 80 | * @param int $feature |
81 | 81 | * @param string $label |
@@ -94,14 +94,14 @@ discard block |
||
94 | 94 | $mean= $this->mean[$label][$feature]; |
95 | 95 | // Calculate the probability density by use of normal/Gaussian distribution |
96 | 96 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
97 | - // |
|
98 | - // In order to avoid numerical errors because of small or zero values, |
|
99 | - // some libraries adopt taking log of calculations such as |
|
100 | - // scikit-learn did. |
|
101 | - // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
|
102 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
103 | - $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
|
104 | - return $pdf; |
|
97 | + // |
|
98 | + // In order to avoid numerical errors because of small or zero values, |
|
99 | + // some libraries adopt taking log of calculations such as |
|
100 | + // scikit-learn did. |
|
101 | + // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
|
102 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
103 | + $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
|
104 | + return $pdf; |
|
105 | 105 | } |
106 | 106 | |
107 | 107 | /** |
@@ -1,6 +1,6 @@ discard block |
||
1 | 1 | <?php |
2 | 2 | |
3 | -declare(strict_types=1); |
|
3 | +declare(strict_types = 1); |
|
4 | 4 | |
5 | 5 | namespace Phpml\Classification; |
6 | 6 | |
@@ -12,11 +12,11 @@ discard block |
||
12 | 12 | class NaiveBayes implements Classifier |
13 | 13 | { |
14 | 14 | use Trainable, Predictable; |
15 | - const CONTINUOS = 1; |
|
16 | - const NOMINAL = 2; |
|
15 | + const CONTINUOS = 1; |
|
16 | + const NOMINAL = 2; |
|
17 | 17 | const EPSILON = 1e-10; |
18 | 18 | private $std = array(); |
19 | - private $mean= array(); |
|
19 | + private $mean = array(); |
|
20 | 20 | private $discreteProb = array(); |
21 | 21 | private $dataType = array(); |
22 | 22 | private $p = array(); |
@@ -48,10 +48,10 @@ discard block |
||
48 | 48 | private function calculateStatistics($label, $samples) |
49 | 49 | { |
50 | 50 | $this->std[$label] = array_fill(0, $this->featureCount, 0); |
51 | - $this->mean[$label]= array_fill(0, $this->featureCount, 0); |
|
51 | + $this->mean[$label] = array_fill(0, $this->featureCount, 0); |
|
52 | 52 | $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
53 | 53 | $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
54 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
54 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
55 | 55 | // Get the values of nth column in the samples array |
56 | 56 | // Mean::arithmetic is called twice, can be optimized |
57 | 57 | $values = array_column($samples, $i); |
@@ -62,7 +62,7 @@ discard block |
||
62 | 62 | $this->dataType[$label][$i] = self::NOMINAL; |
63 | 63 | $this->discreteProb[$label][$i] = array_count_values($values); |
64 | 64 | $db = &$this->discreteProb[$label][$i]; |
65 | - $db = array_map(function ($el) use ($numValues) { |
|
65 | + $db = array_map(function($el) use ($numValues) { |
|
66 | 66 | return $el / $numValues; |
67 | 67 | }, $db); |
68 | 68 | } else { |
@@ -84,14 +84,14 @@ discard block |
||
84 | 84 | { |
85 | 85 | $value = $sample[$feature]; |
86 | 86 | if ($this->dataType[$label][$feature] == self::NOMINAL) { |
87 | - if (! isset($this->discreteProb[$label][$feature][$value]) || |
|
87 | + if (!isset($this->discreteProb[$label][$feature][$value]) || |
|
88 | 88 | $this->discreteProb[$label][$feature][$value] == 0) { |
89 | 89 | return self::EPSILON; |
90 | 90 | } |
91 | 91 | return $this->discreteProb[$label][$feature][$value]; |
92 | 92 | } |
93 | - $std = $this->std[$label][$feature] ; |
|
94 | - $mean= $this->mean[$label][$feature]; |
|
93 | + $std = $this->std[$label][$feature]; |
|
94 | + $mean = $this->mean[$label][$feature]; |
|
95 | 95 | // Calculate the probability density by use of normal/Gaussian distribution |
96 | 96 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
97 | 97 | // |
@@ -99,7 +99,7 @@ discard block |
||
99 | 99 | // some libraries adopt taking log of calculations such as |
100 | 100 | // scikit-learn did. |
101 | 101 | // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
102 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
102 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
103 | 103 | $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
104 | 104 | return $pdf; |
105 | 105 | } |
@@ -112,7 +112,7 @@ discard block |
||
112 | 112 | private function getSamplesByLabel($label) |
113 | 113 | { |
114 | 114 | $samples = array(); |
115 | - for ($i=0; $i<$this->sampleCount; $i++) { |
|
115 | + for ($i = 0; $i < $this->sampleCount; $i++) { |
|
116 | 116 | if ($this->targets[$i] == $label) { |
117 | 117 | $samples[] = $this->samples[$i]; |
118 | 118 | } |
@@ -139,7 +139,7 @@ discard block |
||
139 | 139 | $predictions = array(); |
140 | 140 | foreach ($this->labels as $label) { |
141 | 141 | $p = $this->p[$label]; |
142 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
142 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
143 | 143 | $Plf = $this->sampleProbability($sample, $i, $label); |
144 | 144 | $p += $Plf; |
145 | 145 | } |
@@ -149,7 +149,7 @@ discard block |
||
149 | 149 | reset($predictions); |
150 | 150 | $samplePredictions[] = key($predictions); |
151 | 151 | } |
152 | - if (! $isArray) { |
|
152 | + if (!$isArray) { |
|
153 | 153 | return $samplePredictions[0]; |
154 | 154 | } |
155 | 155 | return $samplePredictions; |