@@ -6,10 +6,8 @@ |
||
| 6 | 6 | |
| 7 | 7 | use Phpml\Helper\Predictable; |
| 8 | 8 | use Phpml\Helper\Trainable; |
| 9 | -use Phpml\Math\Statistic\Mean; |
|
| 10 | 9 | use Phpml\Classification\Classifier; |
| 11 | 10 | use Phpml\Classification\DecisionTree; |
| 12 | -use Phpml\Classification\NaiveBayes; |
|
| 13 | 11 | |
| 14 | 12 | class Bagging implements Classifier |
| 15 | 13 | { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Ensemble; |
| 6 | 6 | |
@@ -140,7 +140,7 @@ discard block |
||
| 140 | 140 | $targets = []; |
| 141 | 141 | srand($index); |
| 142 | 142 | $bootstrapSize = $this->subsetRatio * $this->numSamples; |
| 143 | - for ($i=0; $i < $bootstrapSize; $i++) { |
|
| 143 | + for ($i = 0; $i < $bootstrapSize; $i++) { |
|
| 144 | 144 | $rand = rand(0, $this->numSamples - 1); |
| 145 | 145 | $samples[] = $this->samples[$rand]; |
| 146 | 146 | $targets[] = $this->targets[$rand]; |
@@ -154,7 +154,7 @@ discard block |
||
| 154 | 154 | protected function initClassifiers() |
| 155 | 155 | { |
| 156 | 156 | $classifiers = []; |
| 157 | - for ($i=0; $i<$this->numClassifier; $i++) { |
|
| 157 | + for ($i = 0; $i < $this->numClassifier; $i++) { |
|
| 158 | 158 | $ref = new \ReflectionClass($this->classifier); |
| 159 | 159 | if ($this->classifierOptions) { |
| 160 | 160 | $obj = $ref->newInstanceArgs($this->classifierOptions); |
@@ -1,5 +1,5 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | -declare(strict_types=1); |
|
| 2 | +declare(strict_types = 1); |
|
| 3 | 3 | |
| 4 | 4 | namespace Phpml\Classification\Ensemble; |
| 5 | 5 | |
@@ -70,11 +70,11 @@ discard block |
||
| 70 | 70 | protected function initSingleClassifier($classifier, $index) |
| 71 | 71 | { |
| 72 | 72 | if (is_float($this->featureSubsetRatio)) { |
| 73 | - $featureCount = (int)($this->featureSubsetRatio * $this->featureCount); |
|
| 73 | + $featureCount = (int) ($this->featureSubsetRatio * $this->featureCount); |
|
| 74 | 74 | } elseif ($this->featureCount == 'sqrt') { |
| 75 | - $featureCount = (int)sqrt($this->featureCount) + 1; |
|
| 75 | + $featureCount = (int) sqrt($this->featureCount) + 1; |
|
| 76 | 76 | } else { |
| 77 | - $featureCount = (int)log($this->featureCount, 2) + 1; |
|
| 77 | + $featureCount = (int) log($this->featureCount, 2) + 1; |
|
| 78 | 78 | } |
| 79 | 79 | |
| 80 | 80 | if ($featureCount >= $this->featureCount) { |
@@ -52,7 +52,7 @@ |
||
| 52 | 52 | * If normalizeInputs is set to true, then every input given to the algorithm will be standardized |
| 53 | 53 | * by use of standard deviation and mean calculation |
| 54 | 54 | * |
| 55 | - * @param int $learningRate |
|
| 55 | + * @param double $learningRate |
|
| 56 | 56 | * @param int $maxIterations |
| 57 | 57 | */ |
| 58 | 58 | public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, |
@@ -4,9 +4,6 @@ |
||
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Linear; |
| 6 | 6 | |
| 7 | -use Phpml\Helper\Predictable; |
|
| 8 | -use Phpml\Helper\Trainable; |
|
| 9 | -use Phpml\Classification\Classifier; |
|
| 10 | 7 | use Phpml\Classification\Linear\Perceptron; |
| 11 | 8 | |
| 12 | 9 | class Adaline extends Perceptron |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Linear; |
| 6 | 6 | |
@@ -15,12 +15,12 @@ discard block |
||
| 15 | 15 | /** |
| 16 | 16 | * Batch training is the default Adaline training algorithm |
| 17 | 17 | */ |
| 18 | - const BATCH_TRAINING = 1; |
|
| 18 | + const BATCH_TRAINING = 1; |
|
| 19 | 19 | |
| 20 | 20 | /** |
| 21 | 21 | * Online training: Stochastic gradient descent learning |
| 22 | 22 | */ |
| 23 | - const ONLINE_TRAINING = 2; |
|
| 23 | + const ONLINE_TRAINING = 2; |
|
| 24 | 24 | |
| 25 | 25 | /** |
| 26 | 26 | * The function whose result will be used to calculate the network error |
@@ -52,7 +52,7 @@ discard block |
||
| 52 | 52 | public function __construct(float $learningRate = 0.001, int $maxIterations = 1000, |
| 53 | 53 | bool $normalizeInputs = true, int $trainingType = self::BATCH_TRAINING) |
| 54 | 54 | { |
| 55 | - if (! in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) { |
|
| 55 | + if (!in_array($trainingType, [self::BATCH_TRAINING, self::ONLINE_TRAINING])) { |
|
| 56 | 56 | throw new \Exception("Adaline can only be trained with batch and online/stochastic gradient descent algorithm"); |
| 57 | 57 | } |
| 58 | 58 | |
@@ -104,7 +104,7 @@ discard block |
||
| 104 | 104 | protected function updateWeights(array $updates) |
| 105 | 105 | { |
| 106 | 106 | // Updates all weights at once |
| 107 | - for ($i=0; $i <= $this->featureCount; $i++) { |
|
| 107 | + for ($i = 0; $i <= $this->featureCount; $i++) { |
|
| 108 | 108 | if ($i == 0) { |
| 109 | 109 | $this->weights[0] += $this->learningRate * array_sum($updates); |
| 110 | 110 | } else { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Preprocessing; |
| 6 | 6 | |
@@ -12,7 +12,7 @@ discard block |
||
| 12 | 12 | { |
| 13 | 13 | const NORM_L1 = 1; |
| 14 | 14 | const NORM_L2 = 2; |
| 15 | - const NORM_STD= 3; |
|
| 15 | + const NORM_STD = 3; |
|
| 16 | 16 | |
| 17 | 17 | /** |
| 18 | 18 | * @var int |
@@ -117,7 +117,7 @@ discard block |
||
| 117 | 117 | foreach ($sample as $feature) { |
| 118 | 118 | $norm2 += $feature * $feature; |
| 119 | 119 | } |
| 120 | - $norm2 = sqrt((float)$norm2); |
|
| 120 | + $norm2 = sqrt((float) $norm2); |
|
| 121 | 121 | |
| 122 | 122 | if (0 == $norm2) { |
| 123 | 123 | $sample = array_fill(0, count($sample), 1); |
@@ -20,7 +20,7 @@ discard block |
||
| 20 | 20 | */ |
| 21 | 21 | protected static $errorFunction = 'outputClass'; |
| 22 | 22 | |
| 23 | - /** |
|
| 23 | + /** |
|
| 24 | 24 | * @var array |
| 25 | 25 | */ |
| 26 | 26 | protected $samples = []; |
@@ -78,7 +78,7 @@ discard block |
||
| 78 | 78 | $this->maxIterations = $maxIterations; |
| 79 | 79 | } |
| 80 | 80 | |
| 81 | - /** |
|
| 81 | + /** |
|
| 82 | 82 | * @param array $samples |
| 83 | 83 | * @param array $targets |
| 84 | 84 | */ |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Linear; |
| 6 | 6 | |
@@ -138,7 +138,7 @@ discard block |
||
| 138 | 138 | // Update bias |
| 139 | 139 | $this->weights[0] += $update * $this->learningRate; // Bias |
| 140 | 140 | // Update other weights |
| 141 | - for ($i=1; $i <= $this->featureCount; $i++) { |
|
| 141 | + for ($i = 1; $i <= $this->featureCount; $i++) { |
|
| 142 | 142 | $this->weights[$i] += $update * $sample[$i - 1] * $this->learningRate; |
| 143 | 143 | } |
| 144 | 144 | } |
@@ -190,6 +190,6 @@ discard block |
||
| 190 | 190 | |
| 191 | 191 | $predictedClass = $this->outputClass($sample); |
| 192 | 192 | |
| 193 | - return $this->labels[ $predictedClass ]; |
|
| 193 | + return $this->labels[$predictedClass]; |
|
| 194 | 194 | } |
| 195 | 195 | } |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -112,7 +112,7 @@ discard block |
||
| 112 | 112 | protected function getColumnTypes(array $samples) |
| 113 | 113 | { |
| 114 | 114 | $types = []; |
| 115 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 115 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 116 | 116 | $values = array_column($samples, $i); |
| 117 | 117 | $isCategorical = $this->isCategoricalColumn($values); |
| 118 | 118 | $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOS; |
@@ -136,7 +136,7 @@ discard block |
||
| 136 | 136 | // otherwise group the records so that we can classify the leaf |
| 137 | 137 | // in case maximum depth is reached |
| 138 | 138 | $leftRecords = []; |
| 139 | - $rightRecords= []; |
|
| 139 | + $rightRecords = []; |
|
| 140 | 140 | $remainingTargets = []; |
| 141 | 141 | $prevRecord = null; |
| 142 | 142 | $allSame = true; |
@@ -154,12 +154,12 @@ discard block |
||
| 154 | 154 | if ($split->evaluate($record)) { |
| 155 | 155 | $leftRecords[] = $recordNo; |
| 156 | 156 | } else { |
| 157 | - $rightRecords[]= $recordNo; |
|
| 157 | + $rightRecords[] = $recordNo; |
|
| 158 | 158 | } |
| 159 | 159 | |
| 160 | 160 | // Group remaining targets |
| 161 | 161 | $target = $this->targets[$recordNo]; |
| 162 | - if (! array_key_exists($target, $remainingTargets)) { |
|
| 162 | + if (!array_key_exists($target, $remainingTargets)) { |
|
| 163 | 163 | $remainingTargets[$target] = 1; |
| 164 | 164 | } else { |
| 165 | 165 | $remainingTargets[$target]++; |
@@ -175,7 +175,7 @@ discard block |
||
| 175 | 175 | $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); |
| 176 | 176 | } |
| 177 | 177 | if ($rightRecords) { |
| 178 | - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 178 | + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 179 | 179 | } |
| 180 | 180 | } |
| 181 | 181 | return $split; |
@@ -234,7 +234,7 @@ discard block |
||
| 234 | 234 | protected function getSelectedFeatures() |
| 235 | 235 | { |
| 236 | 236 | $allFeatures = range(0, $this->featureCount - 1); |
| 237 | - if ($this->numUsableFeatures == 0 && ! $this->selectedFeatures) { |
|
| 237 | + if ($this->numUsableFeatures == 0 && !$this->selectedFeatures) { |
|
| 238 | 238 | return $allFeatures; |
| 239 | 239 | } |
| 240 | 240 | |
@@ -270,7 +270,7 @@ discard block |
||
| 270 | 270 | $countMatrix[$label][$rowIndex]++; |
| 271 | 271 | } |
| 272 | 272 | $giniParts = [0, 0]; |
| 273 | - for ($i=0; $i<=1; $i++) { |
|
| 273 | + for ($i = 0; $i <= 1; $i++) { |
|
| 274 | 274 | $part = 0; |
| 275 | 275 | $sum = array_sum(array_column($countMatrix, $i)); |
| 276 | 276 | if ($sum > 0) { |
@@ -292,7 +292,7 @@ discard block |
||
| 292 | 292 | // Detect and convert continuous data column values into |
| 293 | 293 | // discrete values by using the median as a threshold value |
| 294 | 294 | $columns = []; |
| 295 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 295 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 296 | 296 | $values = array_column($samples, $i); |
| 297 | 297 | if ($this->columnTypes[$i] == self::CONTINUOS) { |
| 298 | 298 | $median = Mean::median($values); |
@@ -184,7 +184,7 @@ discard block |
||
| 184 | 184 | |
| 185 | 185 | /** |
| 186 | 186 | * @param array $records |
| 187 | - * @return DecisionTreeLeaf[] |
|
| 187 | + * @return null|DecisionTreeLeaf |
|
| 188 | 188 | */ |
| 189 | 189 | protected function getBestSplit($records) |
| 190 | 190 | { |
@@ -377,7 +377,7 @@ discard block |
||
| 377 | 377 | /** |
| 378 | 378 | * Used to set predefined features to consider while deciding which column to use for a split |
| 379 | 379 | * |
| 380 | - * @param array $selectedFeatures |
|
| 380 | + * @param integer[] $selectedFeatures |
|
| 381 | 381 | */ |
| 382 | 382 | protected function setSelectedFeatures(array $selectedFeatures) |
| 383 | 383 | { |
@@ -415,7 +415,6 @@ discard block |
||
| 415 | 415 | * each column in the given dataset. The importance values are |
| 416 | 416 | * normalized and their total makes 1.<br/> |
| 417 | 417 | * |
| 418 | - * @param array $labels |
|
| 419 | 418 | * @return array |
| 420 | 419 | */ |
| 421 | 420 | public function getFeatureImportances() |
@@ -455,7 +454,6 @@ discard block |
||
| 455 | 454 | * |
| 456 | 455 | * @param int $column |
| 457 | 456 | * @param DecisionTreeLeaf |
| 458 | - * @param array $collected |
|
| 459 | 457 | * |
| 460 | 458 | * @return array |
| 461 | 459 | */ |
@@ -158,8 +158,8 @@ |
||
| 158 | 158 | * @param string $operator |
| 159 | 159 | * @param array $values |
| 160 | 160 | * @param array $targets |
| 161 | - * @param mixed $leftLabel |
|
| 162 | - * @param mixed $rightLabel |
|
| 161 | + * @param string $leftLabel |
|
| 162 | + * @param string $rightLabel |
|
| 163 | 163 | */ |
| 164 | 164 | protected function calculateErrorRate(float $threshold, string $operator, array $values, array $targets, $leftLabel, $rightLabel) |
| 165 | 165 | { |
@@ -6,9 +6,7 @@ |
||
| 6 | 6 | |
| 7 | 7 | use Phpml\Helper\Predictable; |
| 8 | 8 | use Phpml\Helper\Trainable; |
| 9 | -use Phpml\Classification\Classifier; |
|
| 10 | 9 | use Phpml\Classification\DecisionTree; |
| 11 | -use Phpml\Classification\DecisionTree\DecisionTreeLeaf; |
|
| 12 | 10 | |
| 13 | 11 | class DecisionStump extends DecisionTree |
| 14 | 12 | { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Linear; |
| 6 | 6 | |
@@ -122,7 +122,7 @@ discard block |
||
| 122 | 122 | $stepSize = ($maxValue - $minValue) / 100.0; |
| 123 | 123 | |
| 124 | 124 | $leftLabel = $this->tree->leftLeaf->classValue; |
| 125 | - $rightLabel= $this->tree->rightLeaf->classValue; |
|
| 125 | + $rightLabel = $this->tree->rightLeaf->classValue; |
|
| 126 | 126 | |
| 127 | 127 | $bestOperator = $this->tree->operator; |
| 128 | 128 | $bestThreshold = $this->tree->numericValue; |
@@ -130,8 +130,8 @@ discard block |
||
| 130 | 130 | $bestThreshold, $bestOperator, $values, $targets, $leftLabel, $rightLabel); |
| 131 | 131 | |
| 132 | 132 | foreach (['<=', '>'] as $operator) { |
| 133 | - for ($step = $minValue; $step <= $maxValue; $step+= $stepSize) { |
|
| 134 | - $threshold = (float)$step; |
|
| 133 | + for ($step = $minValue; $step <= $maxValue; $step += $stepSize) { |
|
| 134 | + $threshold = (float) $step; |
|
| 135 | 135 | $errorRate = $this->calculateErrorRate( |
| 136 | 136 | $threshold, $operator, $values, $targets, $leftLabel, $rightLabel); |
| 137 | 137 | |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Ensemble; |
| 6 | 6 | |
@@ -122,7 +122,7 @@ discard block |
||
| 122 | 122 | // classifiers as well |
| 123 | 123 | $minErrorRate = 1.0; |
| 124 | 124 | $bestClassifier = null; |
| 125 | - for ($i=0; $i < $this->featureCount; $i++) { |
|
| 125 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 126 | 126 | $stump = new DecisionStump($i); |
| 127 | 127 | $stump->setSampleWeights($this->weights); |
| 128 | 128 | $stump->train($this->samples, $this->targets); |
@@ -185,6 +185,6 @@ discard block |
||
| 185 | 185 | $sum += $h * $alpha; |
| 186 | 186 | } |
| 187 | 187 | |
| 188 | - return $this->labels[ $sum > 0 ? 1 : -1]; |
|
| 188 | + return $this->labels[$sum > 0 ? 1 : -1]; |
|
| 189 | 189 | } |
| 190 | 190 | } |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\DecisionTree; |
| 6 | 6 | |
@@ -34,7 +34,7 @@ discard block |
||
| 34 | 34 | /** |
| 35 | 35 | * @var DecisionTreeLeaf |
| 36 | 36 | */ |
| 37 | - public $rightLeaf= null; |
|
| 37 | + public $rightLeaf = null; |
|
| 38 | 38 | |
| 39 | 39 | /** |
| 40 | 40 | * @var array |
@@ -79,7 +79,7 @@ discard block |
||
| 79 | 79 | |
| 80 | 80 | if ($this->isContinuous) { |
| 81 | 81 | $op = $this->operator; |
| 82 | - $value= $this->numericValue; |
|
| 82 | + $value = $this->numericValue; |
|
| 83 | 83 | $recordField = strval($recordField); |
| 84 | 84 | eval("\$result = $recordField $op $value;"); |
| 85 | 85 | return $result; |
@@ -100,16 +100,16 @@ discard block |
||
| 100 | 100 | return 0.0; |
| 101 | 101 | } |
| 102 | 102 | |
| 103 | - $nodeSampleCount = (float)count($this->records); |
|
| 103 | + $nodeSampleCount = (float) count($this->records); |
|
| 104 | 104 | $iT = $this->giniIndex; |
| 105 | 105 | |
| 106 | 106 | if ($this->leftLeaf) { |
| 107 | - $pL = count($this->leftLeaf->records)/$nodeSampleCount; |
|
| 107 | + $pL = count($this->leftLeaf->records) / $nodeSampleCount; |
|
| 108 | 108 | $iT -= $pL * $this->leftLeaf->giniIndex; |
| 109 | 109 | } |
| 110 | 110 | |
| 111 | 111 | if ($this->rightLeaf) { |
| 112 | - $pR = count($this->rightLeaf->records)/$nodeSampleCount; |
|
| 112 | + $pR = count($this->rightLeaf->records) / $nodeSampleCount; |
|
| 113 | 113 | $iT -= $pR * $this->rightLeaf->giniIndex; |
| 114 | 114 | } |
| 115 | 115 | |
@@ -133,25 +133,25 @@ discard block |
||
| 133 | 133 | } else { |
| 134 | 134 | $col = "col_$this->columnIndex"; |
| 135 | 135 | } |
| 136 | - if (! preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 136 | + if (!preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 137 | 137 | $value = "=$value"; |
| 138 | 138 | } |
| 139 | - $value = "<b>$col $value</b><br>Gini: ". number_format($this->giniIndex, 2); |
|
| 139 | + $value = "<b>$col $value</b><br>Gini: ".number_format($this->giniIndex, 2); |
|
| 140 | 140 | } |
| 141 | 141 | $str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'> |
| 142 | 142 | $value</td></tr>"; |
| 143 | 143 | if ($this->leftLeaf || $this->rightLeaf) { |
| 144 | - $str .='<tr>'; |
|
| 144 | + $str .= '<tr>'; |
|
| 145 | 145 | if ($this->leftLeaf) { |
| 146 | - $str .="<td valign=top><b>| Yes</b><br>" . $this->leftLeaf->getHTML($columnNames) . "</td>"; |
|
| 146 | + $str .= "<td valign=top><b>| Yes</b><br>".$this->leftLeaf->getHTML($columnNames)."</td>"; |
|
| 147 | 147 | } else { |
| 148 | - $str .='<td></td>'; |
|
| 148 | + $str .= '<td></td>'; |
|
| 149 | 149 | } |
| 150 | - $str .='<td> </td>'; |
|
| 150 | + $str .= '<td> </td>'; |
|
| 151 | 151 | if ($this->rightLeaf) { |
| 152 | - $str .="<td valign=top align=right><b>No |</b><br>" . $this->rightLeaf->getHTML($columnNames) . "</td>"; |
|
| 152 | + $str .= "<td valign=top align=right><b>No |</b><br>".$this->rightLeaf->getHTML($columnNames)."</td>"; |
|
| 153 | 153 | } else { |
| 154 | - $str .='<td></td>'; |
|
| 154 | + $str .= '<td></td>'; |
|
| 155 | 155 | } |
| 156 | 156 | $str .= '</tr>'; |
| 157 | 157 | } |