@@ -1,6 +1,6 @@ discard block |
||
1 | 1 | <?php |
2 | 2 | |
3 | -declare(strict_types=1); |
|
3 | +declare(strict_types = 1); |
|
4 | 4 | |
5 | 5 | namespace Phpml\Classification; |
6 | 6 | |
@@ -75,7 +75,7 @@ discard block |
||
75 | 75 | protected function getColumnTypes(array $samples) |
76 | 76 | { |
77 | 77 | $types = []; |
78 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
78 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
79 | 79 | $values = array_column($samples, $i); |
80 | 80 | $isCategorical = $this->isCategoricalColumn($values); |
81 | 81 | $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOS; |
@@ -95,7 +95,7 @@ discard block |
||
95 | 95 | $this->actualDepth = $depth; |
96 | 96 | } |
97 | 97 | $leftRecords = []; |
98 | - $rightRecords= []; |
|
98 | + $rightRecords = []; |
|
99 | 99 | $remainingTargets = []; |
100 | 100 | $prevRecord = null; |
101 | 101 | $allSame = true; |
@@ -108,10 +108,10 @@ discard block |
||
108 | 108 | if ($split->evaluate($record)) { |
109 | 109 | $leftRecords[] = $recordNo; |
110 | 110 | } else { |
111 | - $rightRecords[]= $recordNo; |
|
111 | + $rightRecords[] = $recordNo; |
|
112 | 112 | } |
113 | 113 | $target = $this->targets[$recordNo]; |
114 | - if (! in_array($target, $remainingTargets)) { |
|
114 | + if (!in_array($target, $remainingTargets)) { |
|
115 | 115 | $remainingTargets[] = $target; |
116 | 116 | } |
117 | 117 | } |
@@ -126,7 +126,7 @@ discard block |
||
126 | 126 | $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); |
127 | 127 | } |
128 | 128 | if ($rightRecords) { |
129 | - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); |
|
129 | + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); |
|
130 | 130 | } |
131 | 131 | } |
132 | 132 | return $split; |
@@ -143,7 +143,7 @@ discard block |
||
143 | 143 | $samples = array_combine($records, $this->preprocess($samples)); |
144 | 144 | $bestGiniVal = 1; |
145 | 145 | $bestSplit = null; |
146 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
146 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
147 | 147 | $colValues = []; |
148 | 148 | $baseValue = null; |
149 | 149 | foreach ($samples as $index => $row) { |
@@ -183,7 +183,7 @@ discard block |
||
183 | 183 | $countMatrix[$label][$rowIndex]++; |
184 | 184 | } |
185 | 185 | $giniParts = [0, 0]; |
186 | - for ($i=0; $i<=1; $i++) { |
|
186 | + for ($i = 0; $i <= 1; $i++) { |
|
187 | 187 | $part = 0; |
188 | 188 | $sum = array_sum(array_column($countMatrix, $i)); |
189 | 189 | if ($sum > 0) { |
@@ -205,7 +205,7 @@ discard block |
||
205 | 205 | // Detect and convert continuous data column values into |
206 | 206 | // discrete values by using the median as a threshold value |
207 | 207 | $columns = []; |
208 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
208 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
209 | 209 | $values = array_column($samples, $i); |
210 | 210 | if ($this->columnTypes[$i] == self::CONTINUOS) { |
211 | 211 | $median = Mean::median($values); |
@@ -1,6 +1,6 @@ discard block |
||
1 | 1 | <?php |
2 | 2 | |
3 | -declare(strict_types=1); |
|
3 | +declare(strict_types = 1); |
|
4 | 4 | |
5 | 5 | namespace Phpml\Classification; |
6 | 6 | |
@@ -13,8 +13,8 @@ discard block |
||
13 | 13 | { |
14 | 14 | use Trainable, Predictable; |
15 | 15 | |
16 | - const CONTINUOS = 1; |
|
17 | - const NOMINAL = 2; |
|
16 | + const CONTINUOS = 1; |
|
17 | + const NOMINAL = 2; |
|
18 | 18 | const EPSILON = 1e-10; |
19 | 19 | |
20 | 20 | /** |
@@ -25,7 +25,7 @@ discard block |
||
25 | 25 | /** |
26 | 26 | * @var array |
27 | 27 | */ |
28 | - private $mean= []; |
|
28 | + private $mean = []; |
|
29 | 29 | |
30 | 30 | /** |
31 | 31 | * @var array |
@@ -86,10 +86,10 @@ discard block |
||
86 | 86 | private function calculateStatistics($label, $samples) |
87 | 87 | { |
88 | 88 | $this->std[$label] = array_fill(0, $this->featureCount, 0); |
89 | - $this->mean[$label]= array_fill(0, $this->featureCount, 0); |
|
89 | + $this->mean[$label] = array_fill(0, $this->featureCount, 0); |
|
90 | 90 | $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
91 | 91 | $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
92 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
92 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
93 | 93 | // Get the values of nth column in the samples array |
94 | 94 | // Mean::arithmetic is called twice, can be optimized |
95 | 95 | $values = array_column($samples, $i); |
@@ -100,7 +100,7 @@ discard block |
||
100 | 100 | $this->dataType[$label][$i] = self::NOMINAL; |
101 | 101 | $this->discreteProb[$label][$i] = array_count_values($values); |
102 | 102 | $db = &$this->discreteProb[$label][$i]; |
103 | - $db = array_map(function ($el) use ($numValues) { |
|
103 | + $db = array_map(function($el) use ($numValues) { |
|
104 | 104 | return $el / $numValues; |
105 | 105 | }, $db); |
106 | 106 | } else { |
@@ -123,14 +123,14 @@ discard block |
||
123 | 123 | { |
124 | 124 | $value = $sample[$feature]; |
125 | 125 | if ($this->dataType[$label][$feature] == self::NOMINAL) { |
126 | - if (! isset($this->discreteProb[$label][$feature][$value]) || |
|
126 | + if (!isset($this->discreteProb[$label][$feature][$value]) || |
|
127 | 127 | $this->discreteProb[$label][$feature][$value] == 0) { |
128 | 128 | return self::EPSILON; |
129 | 129 | } |
130 | 130 | return $this->discreteProb[$label][$feature][$value]; |
131 | 131 | } |
132 | - $std = $this->std[$label][$feature] ; |
|
133 | - $mean= $this->mean[$label][$feature]; |
|
132 | + $std = $this->std[$label][$feature]; |
|
133 | + $mean = $this->mean[$label][$feature]; |
|
134 | 134 | // Calculate the probability density by use of normal/Gaussian distribution |
135 | 135 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
136 | 136 | // |
@@ -138,7 +138,7 @@ discard block |
||
138 | 138 | // some libraries adopt taking log of calculations such as |
139 | 139 | // scikit-learn did. |
140 | 140 | // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
141 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
141 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
142 | 142 | $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
143 | 143 | return $pdf; |
144 | 144 | } |
@@ -151,7 +151,7 @@ discard block |
||
151 | 151 | private function getSamplesByLabel($label) |
152 | 152 | { |
153 | 153 | $samples = []; |
154 | - for ($i=0; $i<$this->sampleCount; $i++) { |
|
154 | + for ($i = 0; $i < $this->sampleCount; $i++) { |
|
155 | 155 | if ($this->targets[$i] == $label) { |
156 | 156 | $samples[] = $this->samples[$i]; |
157 | 157 | } |
@@ -171,7 +171,7 @@ discard block |
||
171 | 171 | $predictions = []; |
172 | 172 | foreach ($this->labels as $label) { |
173 | 173 | $p = $this->p[$label]; |
174 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
174 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
175 | 175 | $Plf = $this->sampleProbability($sample, $i, $label); |
176 | 176 | $p += $Plf; |
177 | 177 | } |