@@ -67,7 +67,7 @@ discard block |
||
| 67 | 67 | }, $db); |
| 68 | 68 | } else { |
| 69 | 69 | $this->mean[$label][$i] = Mean::arithmetic($values); |
| 70 | - // Add epsilon in order to avoid zero stdev |
|
| 70 | + // Add epsilon in order to avoid zero stdev |
|
| 71 | 71 | $this->std[$label][$i] = 1e-10 + StandardDeviation::population($values, false); |
| 72 | 72 | } |
| 73 | 73 | } |
@@ -75,7 +75,7 @@ discard block |
||
| 75 | 75 | |
| 76 | 76 | /** |
| 77 | 77 | * Calculates the probability P(label|sample_n) |
| 78 | - * |
|
| 78 | + * |
|
| 79 | 79 | * @param array $sample |
| 80 | 80 | * @param int $feature |
| 81 | 81 | * @param string $label |
@@ -94,14 +94,14 @@ discard block |
||
| 94 | 94 | $mean= $this->mean[$label][$feature]; |
| 95 | 95 | // Calculate the probability density by use of normal/Gaussian distribution |
| 96 | 96 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
| 97 | - // |
|
| 98 | - // In order to avoid numerical errors because of small or zero values, |
|
| 99 | - // some libraries adopt taking log of calculations such as |
|
| 100 | - // scikit-learn did. |
|
| 101 | - // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
|
| 102 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 103 | - $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
|
| 104 | - return $pdf; |
|
| 97 | + // |
|
| 98 | + // In order to avoid numerical errors because of small or zero values, |
|
| 99 | + // some libraries adopt taking log of calculations such as |
|
| 100 | + // scikit-learn did. |
|
| 101 | + // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
|
| 102 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 103 | + $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
|
| 104 | + return $pdf; |
|
| 105 | 105 | } |
| 106 | 106 | |
| 107 | 107 | /** |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -12,11 +12,11 @@ discard block |
||
| 12 | 12 | class NaiveBayes implements Classifier |
| 13 | 13 | { |
| 14 | 14 | use Trainable, Predictable; |
| 15 | - const CONTINUOS = 1; |
|
| 16 | - const NOMINAL = 2; |
|
| 15 | + const CONTINUOS = 1; |
|
| 16 | + const NOMINAL = 2; |
|
| 17 | 17 | const EPSILON = 1e-10; |
| 18 | 18 | private $std = array(); |
| 19 | - private $mean= array(); |
|
| 19 | + private $mean = array(); |
|
| 20 | 20 | private $discreteProb = array(); |
| 21 | 21 | private $dataType = array(); |
| 22 | 22 | private $p = array(); |
@@ -48,10 +48,10 @@ discard block |
||
| 48 | 48 | private function calculateStatistics($label, $samples) |
| 49 | 49 | { |
| 50 | 50 | $this->std[$label] = array_fill(0, $this->featureCount, 0); |
| 51 | - $this->mean[$label]= array_fill(0, $this->featureCount, 0); |
|
| 51 | + $this->mean[$label] = array_fill(0, $this->featureCount, 0); |
|
| 52 | 52 | $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 53 | 53 | $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 54 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 54 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 55 | 55 | // Get the values of nth column in the samples array |
| 56 | 56 | // Mean::arithmetic is called twice, can be optimized |
| 57 | 57 | $values = array_column($samples, $i); |
@@ -62,7 +62,7 @@ discard block |
||
| 62 | 62 | $this->dataType[$label][$i] = self::NOMINAL; |
| 63 | 63 | $this->discreteProb[$label][$i] = array_count_values($values); |
| 64 | 64 | $db = &$this->discreteProb[$label][$i]; |
| 65 | - $db = array_map(function ($el) use ($numValues) { |
|
| 65 | + $db = array_map(function($el) use ($numValues) { |
|
| 66 | 66 | return $el / $numValues; |
| 67 | 67 | }, $db); |
| 68 | 68 | } else { |
@@ -84,14 +84,14 @@ discard block |
||
| 84 | 84 | { |
| 85 | 85 | $value = $sample[$feature]; |
| 86 | 86 | if ($this->dataType[$label][$feature] == self::NOMINAL) { |
| 87 | - if (! isset($this->discreteProb[$label][$feature][$value]) || |
|
| 87 | + if (!isset($this->discreteProb[$label][$feature][$value]) || |
|
| 88 | 88 | $this->discreteProb[$label][$feature][$value] == 0) { |
| 89 | 89 | return self::EPSILON; |
| 90 | 90 | } |
| 91 | 91 | return $this->discreteProb[$label][$feature][$value]; |
| 92 | 92 | } |
| 93 | - $std = $this->std[$label][$feature] ; |
|
| 94 | - $mean= $this->mean[$label][$feature]; |
|
| 93 | + $std = $this->std[$label][$feature]; |
|
| 94 | + $mean = $this->mean[$label][$feature]; |
|
| 95 | 95 | // Calculate the probability density by use of normal/Gaussian distribution |
| 96 | 96 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
| 97 | 97 | // |
@@ -99,7 +99,7 @@ discard block |
||
| 99 | 99 | // some libraries adopt taking log of calculations such as |
| 100 | 100 | // scikit-learn did. |
| 101 | 101 | // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
| 102 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 102 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 103 | 103 | $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
| 104 | 104 | return $pdf; |
| 105 | 105 | } |
@@ -112,7 +112,7 @@ discard block |
||
| 112 | 112 | private function getSamplesByLabel($label) |
| 113 | 113 | { |
| 114 | 114 | $samples = array(); |
| 115 | - for ($i=0; $i<$this->sampleCount; $i++) { |
|
| 115 | + for ($i = 0; $i < $this->sampleCount; $i++) { |
|
| 116 | 116 | if ($this->targets[$i] == $label) { |
| 117 | 117 | $samples[] = $this->samples[$i]; |
| 118 | 118 | } |
@@ -139,7 +139,7 @@ discard block |
||
| 139 | 139 | $predictions = array(); |
| 140 | 140 | foreach ($this->labels as $label) { |
| 141 | 141 | $p = $this->p[$label]; |
| 142 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 142 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 143 | 143 | $Plf = $this->sampleProbability($sample, $i, $label); |
| 144 | 144 | $p += $Plf; |
| 145 | 145 | } |
@@ -149,7 +149,7 @@ discard block |
||
| 149 | 149 | reset($predictions); |
| 150 | 150 | $samplePredictions[] = key($predictions); |
| 151 | 151 | } |
| 152 | - if (! $isArray) { |
|
| 152 | + if (!$isArray) { |
|
| 153 | 153 | return $samplePredictions[0]; |
| 154 | 154 | } |
| 155 | 155 | return $samplePredictions; |