@@ -1,5 +1,5 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | -declare(strict_types=1); |
|
| 2 | +declare(strict_types = 1); |
|
| 3 | 3 | |
| 4 | 4 | namespace Phpml\Clustering; |
| 5 | 5 | |
@@ -85,15 +85,15 @@ discard block |
||
| 85 | 85 | protected function generateRandomMembership(int $rows, int $cols) |
| 86 | 86 | { |
| 87 | 87 | $this->membership = []; |
| 88 | - for ($i=0; $i < $rows; $i++) { |
|
| 88 | + for ($i = 0; $i < $rows; $i++) { |
|
| 89 | 89 | $row = []; |
| 90 | 90 | $total = 0.0; |
| 91 | - for ($k=0; $k < $cols; $k++) { |
|
| 91 | + for ($k = 0; $k < $cols; $k++) { |
|
| 92 | 92 | $val = rand(1, 5) / 10.0; |
| 93 | 93 | $row[] = $val; |
| 94 | 94 | $total += $val; |
| 95 | 95 | } |
| 96 | - $this->membership[] = array_map(function ($val) use ($total) { |
|
| 96 | + $this->membership[] = array_map(function($val) use ($total) { |
|
| 97 | 97 | return $val / $total; |
| 98 | 98 | }, $row); |
| 99 | 99 | } |
@@ -102,17 +102,17 @@ discard block |
||
| 102 | 102 | protected function updateClusters() |
| 103 | 103 | { |
| 104 | 104 | $dim = $this->space->getDimension(); |
| 105 | - if (! $this->clusters) { |
|
| 105 | + if (!$this->clusters) { |
|
| 106 | 106 | $this->clusters = []; |
| 107 | - for ($i=0; $i<$this->clustersNumber; $i++) { |
|
| 107 | + for ($i = 0; $i < $this->clustersNumber; $i++) { |
|
| 108 | 108 | $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); |
| 109 | 109 | } |
| 110 | 110 | } |
| 111 | 111 | |
| 112 | - for ($i=0; $i<$this->clustersNumber; $i++) { |
|
| 112 | + for ($i = 0; $i < $this->clustersNumber; $i++) { |
|
| 113 | 113 | $cluster = $this->clusters[$i]; |
| 114 | 114 | $center = $cluster->getCoordinates(); |
| 115 | - for ($k=0; $k<$dim; $k++) { |
|
| 115 | + for ($k = 0; $k < $dim; $k++) { |
|
| 116 | 116 | $a = $this->getMembershipRowTotal($i, $k, true); |
| 117 | 117 | $b = $this->getMembershipRowTotal($i, $k, false); |
| 118 | 118 | $center[$k] = $a / $b; |
@@ -202,7 +202,7 @@ discard block |
||
| 202 | 202 | { |
| 203 | 203 | // Initialize variables, clusters and membership matrix |
| 204 | 204 | $this->sampleCount = count($samples); |
| 205 | - $this->samples =& $samples; |
|
| 205 | + $this->samples = & $samples; |
|
| 206 | 206 | $this->space = new Space(count($samples[0])); |
| 207 | 207 | $this->initClusters(); |
| 208 | 208 | |
@@ -223,7 +223,7 @@ discard block |
||
| 223 | 223 | } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations); |
| 224 | 224 | |
| 225 | 225 | // Attach (hard cluster) each data point to the nearest cluster |
| 226 | - for ($k=0; $k<$this->sampleCount; $k++) { |
|
| 226 | + for ($k = 0; $k < $this->sampleCount; $k++) { |
|
| 227 | 227 | $column = array_column($this->membership, $k); |
| 228 | 228 | arsort($column); |
| 229 | 229 | reset($column); |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -13,8 +13,8 @@ discard block |
||
| 13 | 13 | { |
| 14 | 14 | use Trainable, Predictable; |
| 15 | 15 | |
| 16 | - const CONTINUOS = 1; |
|
| 17 | - const NOMINAL = 2; |
|
| 16 | + const CONTINUOS = 1; |
|
| 17 | + const NOMINAL = 2; |
|
| 18 | 18 | const EPSILON = 1e-10; |
| 19 | 19 | |
| 20 | 20 | /** |
@@ -25,7 +25,7 @@ discard block |
||
| 25 | 25 | /** |
| 26 | 26 | * @var array |
| 27 | 27 | */ |
| 28 | - private $mean= []; |
|
| 28 | + private $mean = []; |
|
| 29 | 29 | |
| 30 | 30 | /** |
| 31 | 31 | * @var array |
@@ -86,10 +86,10 @@ discard block |
||
| 86 | 86 | private function calculateStatistics($label, $samples) |
| 87 | 87 | { |
| 88 | 88 | $this->std[$label] = array_fill(0, $this->featureCount, 0); |
| 89 | - $this->mean[$label]= array_fill(0, $this->featureCount, 0); |
|
| 89 | + $this->mean[$label] = array_fill(0, $this->featureCount, 0); |
|
| 90 | 90 | $this->dataType[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 91 | 91 | $this->discreteProb[$label] = array_fill(0, $this->featureCount, self::CONTINUOS); |
| 92 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 92 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 93 | 93 | // Get the values of nth column in the samples array |
| 94 | 94 | // Mean::arithmetic is called twice, can be optimized |
| 95 | 95 | $values = array_column($samples, $i); |
@@ -100,7 +100,7 @@ discard block |
||
| 100 | 100 | $this->dataType[$label][$i] = self::NOMINAL; |
| 101 | 101 | $this->discreteProb[$label][$i] = array_count_values($values); |
| 102 | 102 | $db = &$this->discreteProb[$label][$i]; |
| 103 | - $db = array_map(function ($el) use ($numValues) { |
|
| 103 | + $db = array_map(function($el) use ($numValues) { |
|
| 104 | 104 | return $el / $numValues; |
| 105 | 105 | }, $db); |
| 106 | 106 | } else { |
@@ -123,14 +123,14 @@ discard block |
||
| 123 | 123 | { |
| 124 | 124 | $value = $sample[$feature]; |
| 125 | 125 | if ($this->dataType[$label][$feature] == self::NOMINAL) { |
| 126 | - if (! isset($this->discreteProb[$label][$feature][$value]) || |
|
| 126 | + if (!isset($this->discreteProb[$label][$feature][$value]) || |
|
| 127 | 127 | $this->discreteProb[$label][$feature][$value] == 0) { |
| 128 | 128 | return self::EPSILON; |
| 129 | 129 | } |
| 130 | 130 | return $this->discreteProb[$label][$feature][$value]; |
| 131 | 131 | } |
| 132 | - $std = $this->std[$label][$feature] ; |
|
| 133 | - $mean= $this->mean[$label][$feature]; |
|
| 132 | + $std = $this->std[$label][$feature]; |
|
| 133 | + $mean = $this->mean[$label][$feature]; |
|
| 134 | 134 | // Calculate the probability density by use of normal/Gaussian distribution |
| 135 | 135 | // Ref: https://en.wikipedia.org/wiki/Normal_distribution |
| 136 | 136 | // |
@@ -138,7 +138,7 @@ discard block |
||
| 138 | 138 | // some libraries adopt taking log of calculations such as |
| 139 | 139 | // scikit-learn did. |
| 140 | 140 | // (See : https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/naive_bayes.py) |
| 141 | - $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 141 | + $pdf = -0.5 * log(2.0 * pi() * $std * $std); |
|
| 142 | 142 | $pdf -= 0.5 * pow($value - $mean, 2) / ($std * $std); |
| 143 | 143 | return $pdf; |
| 144 | 144 | } |
@@ -151,7 +151,7 @@ discard block |
||
| 151 | 151 | private function getSamplesByLabel($label) |
| 152 | 152 | { |
| 153 | 153 | $samples = array(); |
| 154 | - for ($i=0; $i<$this->sampleCount; $i++) { |
|
| 154 | + for ($i = 0; $i < $this->sampleCount; $i++) { |
|
| 155 | 155 | if ($this->targets[$i] == $label) { |
| 156 | 156 | $samples[] = $this->samples[$i]; |
| 157 | 157 | } |
@@ -171,7 +171,7 @@ discard block |
||
| 171 | 171 | $predictions = array(); |
| 172 | 172 | foreach ($this->labels as $label) { |
| 173 | 173 | $p = $this->p[$label]; |
| 174 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 174 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 175 | 175 | $Plf = $this->sampleProbability($sample, $i, $label); |
| 176 | 176 | $p += $Plf; |
| 177 | 177 | } |
@@ -131,7 +131,7 @@ discard block |
||
| 131 | 131 | |
| 132 | 132 | /** |
| 133 | 133 | * @param array $records |
| 134 | - * @return DecisionTreeLeaf[] |
|
| 134 | + * @return null|DecisionTreeLeaf |
|
| 135 | 135 | */ |
| 136 | 136 | protected function getBestSplit($records) |
| 137 | 137 | { |
@@ -254,7 +254,7 @@ discard block |
||
| 254 | 254 | |
| 255 | 255 | /** |
| 256 | 256 | * @param array $sample |
| 257 | - * @return mixed |
|
| 257 | + * @return string |
|
| 258 | 258 | */ |
| 259 | 259 | protected function predictSample(array $sample) |
| 260 | 260 | { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -72,7 +72,7 @@ discard block |
||
| 72 | 72 | protected function getColumnTypes(array $samples) |
| 73 | 73 | { |
| 74 | 74 | $types = []; |
| 75 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 75 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 76 | 76 | $values = array_column($samples, $i); |
| 77 | 77 | $isCategorical = $this->isCategoricalColumn($values); |
| 78 | 78 | $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOS; |
@@ -92,7 +92,7 @@ discard block |
||
| 92 | 92 | $this->actualDepth = $depth; |
| 93 | 93 | } |
| 94 | 94 | $leftRecords = []; |
| 95 | - $rightRecords= []; |
|
| 95 | + $rightRecords = []; |
|
| 96 | 96 | $remainingTargets = []; |
| 97 | 97 | $prevRecord = null; |
| 98 | 98 | $allSame = true; |
@@ -105,10 +105,10 @@ discard block |
||
| 105 | 105 | if ($split->evaluate($record)) { |
| 106 | 106 | $leftRecords[] = $recordNo; |
| 107 | 107 | } else { |
| 108 | - $rightRecords[]= $recordNo; |
|
| 108 | + $rightRecords[] = $recordNo; |
|
| 109 | 109 | } |
| 110 | 110 | $target = $this->targets[$recordNo]; |
| 111 | - if (! in_array($target, $remainingTargets)) { |
|
| 111 | + if (!in_array($target, $remainingTargets)) { |
|
| 112 | 112 | $remainingTargets[] = $target; |
| 113 | 113 | } |
| 114 | 114 | } |
@@ -123,7 +123,7 @@ discard block |
||
| 123 | 123 | $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); |
| 124 | 124 | } |
| 125 | 125 | if ($rightRecords) { |
| 126 | - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 126 | + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 127 | 127 | } |
| 128 | 128 | } |
| 129 | 129 | return $split; |
@@ -140,7 +140,7 @@ discard block |
||
| 140 | 140 | $samples = array_combine($records, $this->preprocess($samples)); |
| 141 | 141 | $bestGiniVal = 1; |
| 142 | 142 | $bestSplit = null; |
| 143 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 143 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 144 | 144 | $colValues = []; |
| 145 | 145 | $baseValue = null; |
| 146 | 146 | foreach ($samples as $index => $row) { |
@@ -180,7 +180,7 @@ discard block |
||
| 180 | 180 | $countMatrix[$label][$rowIndex]++; |
| 181 | 181 | } |
| 182 | 182 | $giniParts = [0, 0]; |
| 183 | - for ($i=0; $i<=1; $i++) { |
|
| 183 | + for ($i = 0; $i <= 1; $i++) { |
|
| 184 | 184 | $part = 0; |
| 185 | 185 | $sum = array_sum(array_column($countMatrix, $i)); |
| 186 | 186 | if ($sum > 0) { |
@@ -202,7 +202,7 @@ discard block |
||
| 202 | 202 | // Detect and convert continuous data column values into |
| 203 | 203 | // discrete values by using the median as a threshold value |
| 204 | 204 | $columns = array(); |
| 205 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 205 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 206 | 206 | $values = array_column($samples, $i); |
| 207 | 207 | if ($this->columnTypes[$i] == self::CONTINUOS) { |
| 208 | 208 | $median = Mean::median($values); |
@@ -1,5 +1,5 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | -declare(strict_types=1); |
|
| 2 | +declare(strict_types = 1); |
|
| 3 | 3 | |
| 4 | 4 | namespace Phpml\Classification\DecisionTree; |
| 5 | 5 | |
@@ -24,7 +24,7 @@ discard block |
||
| 24 | 24 | /** |
| 25 | 25 | * @var DecisionTreeLeaf |
| 26 | 26 | */ |
| 27 | - public $rightLeaf= null; |
|
| 27 | + public $rightLeaf = null; |
|
| 28 | 28 | |
| 29 | 29 | /** |
| 30 | 30 | * @var array |
@@ -63,7 +63,7 @@ discard block |
||
| 63 | 63 | $recordField = $record[$this->columnIndex]; |
| 64 | 64 | if (preg_match("/^([<>=]{1,2})\s*(.*)/", $this->value, $matches)) { |
| 65 | 65 | $op = $matches[1]; |
| 66 | - $value= floatval($matches[2]); |
|
| 66 | + $value = floatval($matches[2]); |
|
| 67 | 67 | $recordField = strval($recordField); |
| 68 | 68 | eval("\$result = $recordField $op $value;"); |
| 69 | 69 | return $result; |
@@ -78,25 +78,25 @@ discard block |
||
| 78 | 78 | } else { |
| 79 | 79 | $value = $this->value; |
| 80 | 80 | $col = "col_$this->columnIndex"; |
| 81 | - if (! preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 81 | + if (!preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 82 | 82 | $value = "=$value"; |
| 83 | 83 | } |
| 84 | - $value = "<b>$col $value</b><br>Gini: ". number_format($this->giniIndex, 2); |
|
| 84 | + $value = "<b>$col $value</b><br>Gini: ".number_format($this->giniIndex, 2); |
|
| 85 | 85 | } |
| 86 | 86 | $str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'> |
| 87 | 87 | $value</td></tr>"; |
| 88 | 88 | if ($this->leftLeaf || $this->rightLeaf) { |
| 89 | - $str .='<tr>'; |
|
| 89 | + $str .= '<tr>'; |
|
| 90 | 90 | if ($this->leftLeaf) { |
| 91 | - $str .="<td valign=top><b>| Yes</b><br>$this->leftLeaf</td>"; |
|
| 91 | + $str .= "<td valign=top><b>| Yes</b><br>$this->leftLeaf</td>"; |
|
| 92 | 92 | } else { |
| 93 | - $str .='<td></td>'; |
|
| 93 | + $str .= '<td></td>'; |
|
| 94 | 94 | } |
| 95 | - $str .='<td> </td>'; |
|
| 95 | + $str .= '<td> </td>'; |
|
| 96 | 96 | if ($this->rightLeaf) { |
| 97 | - $str .="<td valign=top align=right><b>No |</b><br>$this->rightLeaf</td>"; |
|
| 97 | + $str .= "<td valign=top align=right><b>No |</b><br>$this->rightLeaf</td>"; |
|
| 98 | 98 | } else { |
| 99 | - $str .='<td></td>'; |
|
| 99 | + $str .= '<td></td>'; |
|
| 100 | 100 | } |
| 101 | 101 | $str .= '</tr>'; |
| 102 | 102 | } |