@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -75,7 +75,7 @@ discard block |
||
| 75 | 75 | protected function getColumnTypes(array $samples) |
| 76 | 76 | { |
| 77 | 77 | $types = []; |
| 78 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 78 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 79 | 79 | $values = array_column($samples, $i); |
| 80 | 80 | $isCategorical = $this->isCategoricalColumn($values); |
| 81 | 81 | $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOS; |
@@ -95,7 +95,7 @@ discard block |
||
| 95 | 95 | $this->actualDepth = $depth; |
| 96 | 96 | } |
| 97 | 97 | $leftRecords = []; |
| 98 | - $rightRecords= []; |
|
| 98 | + $rightRecords = []; |
|
| 99 | 99 | $remainingTargets = []; |
| 100 | 100 | $prevRecord = null; |
| 101 | 101 | $allSame = true; |
@@ -108,10 +108,10 @@ discard block |
||
| 108 | 108 | if ($split->evaluate($record)) { |
| 109 | 109 | $leftRecords[] = $recordNo; |
| 110 | 110 | } else { |
| 111 | - $rightRecords[]= $recordNo; |
|
| 111 | + $rightRecords[] = $recordNo; |
|
| 112 | 112 | } |
| 113 | 113 | $target = $this->targets[$recordNo]; |
| 114 | - if (! in_array($target, $remainingTargets)) { |
|
| 114 | + if (!in_array($target, $remainingTargets)) { |
|
| 115 | 115 | $remainingTargets[] = $target; |
| 116 | 116 | } |
| 117 | 117 | } |
@@ -126,7 +126,7 @@ discard block |
||
| 126 | 126 | $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); |
| 127 | 127 | } |
| 128 | 128 | if ($rightRecords) { |
| 129 | - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 129 | + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 130 | 130 | } |
| 131 | 131 | } |
| 132 | 132 | return $split; |
@@ -143,7 +143,7 @@ discard block |
||
| 143 | 143 | $samples = array_combine($records, $this->preprocess($samples)); |
| 144 | 144 | $bestGiniVal = 1; |
| 145 | 145 | $bestSplit = null; |
| 146 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 146 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 147 | 147 | $colValues = []; |
| 148 | 148 | $baseValue = null; |
| 149 | 149 | foreach ($samples as $index => $row) { |
@@ -183,7 +183,7 @@ discard block |
||
| 183 | 183 | $countMatrix[$label][$rowIndex]++; |
| 184 | 184 | } |
| 185 | 185 | $giniParts = [0, 0]; |
| 186 | - for ($i=0; $i<=1; $i++) { |
|
| 186 | + for ($i = 0; $i <= 1; $i++) { |
|
| 187 | 187 | $part = 0; |
| 188 | 188 | $sum = array_sum(array_column($countMatrix, $i)); |
| 189 | 189 | if ($sum > 0) { |
@@ -205,7 +205,7 @@ discard block |
||
| 205 | 205 | // Detect and convert continuous data column values into |
| 206 | 206 | // discrete values by using the median as a threshold value |
| 207 | 207 | $columns = []; |
| 208 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 208 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 209 | 209 | $values = array_column($samples, $i); |
| 210 | 210 | if ($this->columnTypes[$i] == self::CONTINUOS) { |
| 211 | 211 | $median = Mean::median($values); |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -13,8 +13,8 @@ discard block |
||
| 13 | 13 | { |
| 14 | 14 | use Trainable, Predictable; |
| 15 | 15 | |
| 16 | - const CONTINUOS = 1; |
|
| 17 | - const NOMINAL = 2; |
|
| 16 | + const CONTINUOS = 1; |
|
| 17 | + const NOMINAL = 2; |
|
| 18 | 18 | const EPSILON = 1e-10; |
| 19 | 19 | |
| 20 | 20 | /** |
@@ -25,7 +25,7 @@ discard block |
||
| 25 | 25 | /** |
| 26 | 26 | * @var array |
| 27 | 27 | */ |
| 28 | - private $mean= []; |
|
| 28 | + private $mean = []; |
|
| 29 | 29 | |
| 30 | 30 | /** |
| 31 | 31 | * @var array |
@@ -86,10 +86,10 @@ discard block |
||
| 86 | 86 | private function calculateStatistics($label, $samples) |
| 87 | 87 | { |
| 88 | 88 | $this->std[$label] = array_fill(0, $this->featureCount, 0); |
| 89 | - $this->mean[$label]= array_fill(0, $this->featureCount, 0); |
|
| 89 | + $this->mean[$label] = array_fill(0, $this->featureCount, 0); |
|
| 90 | 90 | $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 91 | 91 | $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 92 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 92 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 93 | 93 | // Get the values of nth column in the samples array |
| 94 | 94 | // Mean::arithmetic is called twice, can be optimized |
| 95 | 95 | $values = array_column($samples, $i); |
@@ -100,7 +100,7 @@ discard block |
||
| 100 | 100 | $this->dataType[$label][$i] = self::NOMINAL; |
| 101 | 101 | $this->discreteProb[$label][$i] = array_count_values($values); |
| 102 | 102 | $db = &$this->discreteProb[$label][$i]; |
| 103 | - $db = array_map(function ($el) use ($numValues) { |
|
| 103 | + $db = array_map(function($el) use ($numValues) { |
|
| 104 | 104 | return $el / $numValues; |
| 105 | 105 | }, $db); |
| 106 | 106 | } else { |
@@ -123,14 +123,14 @@ discard block |
||
| 123 | 123 | { |
| 124 | 124 | $value = $sample[$feature]; |
| 125 | 125 | if ($this->dataType[$label][$feature] == self::NOMINAL) { |
| 126 | - if (! isset($this->discreteProb[$label][$feature][$value]) || |
|
| 126 | + if (!isset($this->discreteProb[$label][$feature][$value]) || |
|
| 127 | 127 | $this->discreteProb[$label][$feature][$value] == 0) { |
| 128 | 128 | return self::EPSILON; |
| 129 | 129 | } |
| 130 | 130 | return $this->discreteProb[$label][$feature][$value]; |
| 131 | 131 | } |
| 132 | - $std = $this->std[$label][$feature] ; |
|
| 133 | - $mean= $this->mean[$label][$feature]; |
|
| 132 | + $std = $this->std[$label][$feature]; |
|
| 133 | + $mean = $this->mean[$label][$feature]; |
|
| 134 | 134 | // Calculate the probability density by use of normal/Gaussian distribution |
| 135 | 135 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
| 136 | 136 | // |
@@ -138,7 +138,7 @@ discard block |
||
| 138 | 138 | // some libraries adopt taking log of calculations such as |
| 139 | 139 | // scikit-learn did. |
| 140 | 140 | // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
| 141 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 141 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 142 | 142 | $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
| 143 | 143 | return $pdf; |
| 144 | 144 | } |
@@ -151,7 +151,7 @@ discard block |
||
| 151 | 151 | private function getSamplesByLabel($label) |
| 152 | 152 | { |
| 153 | 153 | $samples = []; |
| 154 | - for ($i=0; $i<$this->sampleCount; $i++) { |
|
| 154 | + for ($i = 0; $i < $this->sampleCount; $i++) { |
|
| 155 | 155 | if ($this->targets[$i] == $label) { |
| 156 | 156 | $samples[] = $this->samples[$i]; |
| 157 | 157 | } |
@@ -171,7 +171,7 @@ discard block |
||
| 171 | 171 | $predictions = []; |
| 172 | 172 | foreach ($this->labels as $label) { |
| 173 | 173 | $p = $this->p[$label]; |
| 174 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 174 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 175 | 175 | $Plf = $this->sampleProbability($sample, $i, $label); |
| 176 | 176 | $p += $Plf; |
| 177 | 177 | } |