@@ -114,6 +114,7 @@ |
||
| 114 | 114 | } |
| 115 | 115 | |
| 116 | 116 | /** |
| 117 | + * @param integer $index |
|
| 117 | 118 | * @return array |
| 118 | 119 | */ |
| 119 | 120 | protected function getRandomSubset($index) |
@@ -6,10 +6,8 @@ |
||
| 6 | 6 | |
| 7 | 7 | use Phpml\Helper\Predictable; |
| 8 | 8 | use Phpml\Helper\Trainable; |
| 9 | -use Phpml\Math\Statistic\Mean; |
|
| 10 | 9 | use Phpml\Classification\Classifier; |
| 11 | 10 | use Phpml\Classification\DecisionTree; |
| 12 | -use Phpml\Classification\NaiveBayes; |
|
| 13 | 11 | |
| 14 | 12 | class Bagging implements Classifier |
| 15 | 13 | { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\Ensemble; |
| 6 | 6 | |
@@ -118,13 +118,13 @@ discard block |
||
| 118 | 118 | */ |
| 119 | 119 | protected function getRandomSubset($index) |
| 120 | 120 | { |
| 121 | - $subsetLength = (int)ceil(sqrt($this->numSamples)); |
|
| 121 | + $subsetLength = (int) ceil(sqrt($this->numSamples)); |
|
| 122 | 122 | $denom = $this->subsetRatio / 2; |
| 123 | 123 | $subsetLength = $this->numSamples / (1 / $denom); |
| 124 | 124 | $index = $index * $subsetLength % $this->numSamples; |
| 125 | 125 | $samples = []; |
| 126 | 126 | $targets = []; |
| 127 | - for ($i=0; $i<$subsetLength * 2; $i++) { |
|
| 127 | + for ($i = 0; $i < $subsetLength * 2; $i++) { |
|
| 128 | 128 | $rand = rand($index, $this->numSamples - 1); |
| 129 | 129 | $samples[] = $this->samples[$rand]; |
| 130 | 130 | $targets[] = $this->targets[$rand]; |
@@ -138,7 +138,7 @@ discard block |
||
| 138 | 138 | protected function initClassifiers() |
| 139 | 139 | { |
| 140 | 140 | $classifiers = []; |
| 141 | - for ($i=0; $i<$this->numClassifier; $i++) { |
|
| 141 | + for ($i = 0; $i < $this->numClassifier; $i++) { |
|
| 142 | 142 | $ref = new \ReflectionClass($this->classifier); |
| 143 | 143 | if ($this->classifierOptions) { |
| 144 | 144 | $obj = $ref->newInstanceArgs($this->classifierOptions); |
@@ -4,8 +4,6 @@ |
||
| 4 | 4 | namespace Phpml\Classification\Ensemble; |
| 5 | 5 | |
| 6 | 6 | use Phpml\Classification\Ensemble\Bagging; |
| 7 | -use Phpml\Classification\DecisionTree; |
|
| 8 | -use Phpml\Classification\NaiveBayes; |
|
| 9 | 7 | |
| 10 | 8 | class RandomForest extends Bagging |
| 11 | 9 | { |
@@ -1,5 +1,5 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | -declare(strict_types=1); |
|
| 2 | +declare(strict_types = 1); |
|
| 3 | 3 | |
| 4 | 4 | namespace Phpml\Classification\Ensemble; |
| 5 | 5 | |
@@ -35,7 +35,7 @@ discard block |
||
| 35 | 35 | { |
| 36 | 36 | list($subset, $targets) = parent::getRandomSubset($index); |
| 37 | 37 | |
| 38 | - $featureCount = (int)ceil($this->featureSubsetRatio * $this->featureCount); |
|
| 38 | + $featureCount = (int) ceil($this->featureSubsetRatio * $this->featureCount); |
|
| 39 | 39 | if ($featureCount >= $this->featureCount) { |
| 40 | 40 | $featureCount = $this->featureCount; |
| 41 | 41 | } |
@@ -49,7 +49,7 @@ discard block |
||
| 49 | 49 | foreach ($features as $colIndex) { |
| 50 | 50 | $columns[] = array_column($subset, $colIndex); |
| 51 | 51 | } |
| 52 | - $subset= array_map(null, ...$columns); |
|
| 52 | + $subset = array_map(null, ...$columns); |
|
| 53 | 53 | |
| 54 | 54 | return [$subset, $targets]; |
| 55 | 55 | } |
@@ -61,7 +61,7 @@ discard block |
||
| 61 | 61 | protected function predictSample(array $sample) |
| 62 | 62 | { |
| 63 | 63 | $predictions = []; |
| 64 | - for ($i=0; $i<count($this->classifiers); $i++) { |
|
| 64 | + for ($i = 0; $i < count($this->classifiers); $i++) { |
|
| 65 | 65 | $samplePiece = []; |
| 66 | 66 | foreach ($this->classifierColumns[$i] as $colIndex) { |
| 67 | 67 | $samplePiece[] = $sample[$colIndex]; |
@@ -136,7 +136,7 @@ |
||
| 136 | 136 | |
| 137 | 137 | /** |
| 138 | 138 | * @param array $records |
| 139 | - * @return DecisionTreeLeaf[] |
|
| 139 | + * @return null|DecisionTreeLeaf |
|
| 140 | 140 | */ |
| 141 | 141 | protected function getBestSplit($records) |
| 142 | 142 | { |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification; |
| 6 | 6 | |
@@ -76,7 +76,7 @@ discard block |
||
| 76 | 76 | protected function getColumnTypes(array $samples) |
| 77 | 77 | { |
| 78 | 78 | $types = []; |
| 79 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 79 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 80 | 80 | $values = array_column($samples, $i); |
| 81 | 81 | $isCategorical = $this->isCategoricalColumn($values); |
| 82 | 82 | $types[] = $isCategorical ? self::NOMINAL : self::CONTINUOS; |
@@ -96,7 +96,7 @@ discard block |
||
| 96 | 96 | $this->actualDepth = $depth; |
| 97 | 97 | } |
| 98 | 98 | $leftRecords = []; |
| 99 | - $rightRecords= []; |
|
| 99 | + $rightRecords = []; |
|
| 100 | 100 | $remainingTargets = []; |
| 101 | 101 | $prevRecord = null; |
| 102 | 102 | $allSame = true; |
@@ -109,10 +109,10 @@ discard block |
||
| 109 | 109 | if ($split->evaluate($record)) { |
| 110 | 110 | $leftRecords[] = $recordNo; |
| 111 | 111 | } else { |
| 112 | - $rightRecords[]= $recordNo; |
|
| 112 | + $rightRecords[] = $recordNo; |
|
| 113 | 113 | } |
| 114 | 114 | $target = $this->targets[$recordNo]; |
| 115 | - if (! in_array($target, $remainingTargets)) { |
|
| 115 | + if (!in_array($target, $remainingTargets)) { |
|
| 116 | 116 | $remainingTargets[] = $target; |
| 117 | 117 | } |
| 118 | 118 | } |
@@ -127,7 +127,7 @@ discard block |
||
| 127 | 127 | $split->leftLeaf = $this->getSplitLeaf($leftRecords, $depth + 1); |
| 128 | 128 | } |
| 129 | 129 | if ($rightRecords) { |
| 130 | - $split->rightLeaf= $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 130 | + $split->rightLeaf = $this->getSplitLeaf($rightRecords, $depth + 1); |
|
| 131 | 131 | } |
| 132 | 132 | } |
| 133 | 133 | return $split; |
@@ -144,7 +144,7 @@ discard block |
||
| 144 | 144 | $samples = array_combine($records, $this->preprocess($samples)); |
| 145 | 145 | $bestGiniVal = 1; |
| 146 | 146 | $bestSplit = null; |
| 147 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 147 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 148 | 148 | $colValues = []; |
| 149 | 149 | foreach ($samples as $index => $row) { |
| 150 | 150 | $colValues[$index] = $row[$i]; |
@@ -183,7 +183,7 @@ discard block |
||
| 183 | 183 | $countMatrix[$label][$rowIndex]++; |
| 184 | 184 | } |
| 185 | 185 | $giniParts = [0, 0]; |
| 186 | - for ($i=0; $i<=1; $i++) { |
|
| 186 | + for ($i = 0; $i <= 1; $i++) { |
|
| 187 | 187 | $part = 0; |
| 188 | 188 | $sum = array_sum(array_column($countMatrix, $i)); |
| 189 | 189 | if ($sum > 0) { |
@@ -205,7 +205,7 @@ discard block |
||
| 205 | 205 | // Detect and convert continuous data column values into |
| 206 | 206 | // discrete values by using the median as a threshold value |
| 207 | 207 | $columns = []; |
| 208 | - for ($i=0; $i<$this->featureCount; $i++) { |
|
| 208 | + for ($i = 0; $i < $this->featureCount; $i++) { |
|
| 209 | 209 | $values = array_column($samples, $i); |
| 210 | 210 | if ($this->columnTypes[$i] == self::CONTINUOS) { |
| 211 | 211 | $median = Mean::median($values); |
@@ -1,6 +1,6 @@ discard block |
||
| 1 | 1 | <?php |
| 2 | 2 | |
| 3 | -declare(strict_types=1); |
|
| 3 | +declare(strict_types = 1); |
|
| 4 | 4 | |
| 5 | 5 | namespace Phpml\Classification\DecisionTree; |
| 6 | 6 | |
@@ -25,7 +25,7 @@ discard block |
||
| 25 | 25 | /** |
| 26 | 26 | * @var DecisionTreeLeaf |
| 27 | 27 | */ |
| 28 | - public $rightLeaf= null; |
|
| 28 | + public $rightLeaf = null; |
|
| 29 | 29 | |
| 30 | 30 | /** |
| 31 | 31 | * @var array |
@@ -64,7 +64,7 @@ discard block |
||
| 64 | 64 | $recordField = $record[$this->columnIndex]; |
| 65 | 65 | if (is_string($this->value) && preg_match("/^([<>=]{1,2})\s*(.*)/", $this->value, $matches)) { |
| 66 | 66 | $op = $matches[1]; |
| 67 | - $value= floatval($matches[2]); |
|
| 67 | + $value = floatval($matches[2]); |
|
| 68 | 68 | $recordField = strval($recordField); |
| 69 | 69 | eval("\$result = $recordField $op $value;"); |
| 70 | 70 | return $result; |
@@ -79,25 +79,25 @@ discard block |
||
| 79 | 79 | } else { |
| 80 | 80 | $value = $this->value; |
| 81 | 81 | $col = "col_$this->columnIndex"; |
| 82 | - if (! preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 82 | + if (!preg_match("/^[<>=]{1,2}/", $value)) { |
|
| 83 | 83 | $value = "=$value"; |
| 84 | 84 | } |
| 85 | - $value = "<b>$col $value</b><br>Gini: ". number_format($this->giniIndex, 2); |
|
| 85 | + $value = "<b>$col $value</b><br>Gini: ".number_format($this->giniIndex, 2); |
|
| 86 | 86 | } |
| 87 | 87 | $str = "<table ><tr><td colspan=3 align=center style='border:1px solid;'> |
| 88 | 88 | $value</td></tr>"; |
| 89 | 89 | if ($this->leftLeaf || $this->rightLeaf) { |
| 90 | - $str .='<tr>'; |
|
| 90 | + $str .= '<tr>'; |
|
| 91 | 91 | if ($this->leftLeaf) { |
| 92 | - $str .="<td valign=top><b>| Yes</b><br>$this->leftLeaf</td>"; |
|
| 92 | + $str .= "<td valign=top><b>| Yes</b><br>$this->leftLeaf</td>"; |
|
| 93 | 93 | } else { |
| 94 | - $str .='<td></td>'; |
|
| 94 | + $str .= '<td></td>'; |
|
| 95 | 95 | } |
| 96 | - $str .='<td> </td>'; |
|
| 96 | + $str .= '<td> </td>'; |
|
| 97 | 97 | if ($this->rightLeaf) { |
| 98 | - $str .="<td valign=top align=right><b>No |</b><br>$this->rightLeaf</td>"; |
|
| 98 | + $str .= "<td valign=top align=right><b>No |</b><br>$this->rightLeaf</td>"; |
|
| 99 | 99 | } else { |
| 100 | - $str .='<td></td>'; |
|
| 100 | + $str .= '<td></td>'; |
|
| 101 | 101 | } |
| 102 | 102 | $str .= '</tr>'; |
| 103 | 103 | } |