Completed
Pull Request — master (#36)
by
unknown
05:04
created

RandomForest::setFeatureSubsetRatio()   A

Complexity

Conditions 3
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 5
nc 2
nop 1
1
<?php
2
declare(strict_types=1);
3
4
namespace Phpml\Classification\Ensemble;
5
6
use Phpml\Classification\Ensemble\Bagging;
7
use Phpml\Classification\DecisionTree;
8
use Phpml\Classification\NaiveBayes;
9
10
class RandomForest extends Bagging
11
{
12
    /**
13
     * @var array
14
     */
15
    protected $classifierColumns;
16
17
    /**
18
     * @var float
19
     */
20
    protected $featureSubsetRatio = 0.7;
21
22
    public function setFeatureSubsetRatio(float $ratio)
23
    {
24
        if ($ratio < 0.1 || $ratio > 0.9) {
25
            throw new Exception("Feature subset ratio should be between 0.1 and 0.9");
26
        }
27
        $this->featureSubsetRatio = $ratio;
28
        return $this;
29
    }
30
31
    /**
32
     * @param int $index
33
     */
34
    protected function getRandomSubset($index)
35
    {
36
        list($subset, $targets) = parent::getRandomSubset($index);
37
38
        $features = [];
39
        $featureCount = (int)ceil($this->featureSubsetRatio * $this->featureCount);
40
        while (count($features) < $featureCount) {
41
            $rand = rand(0, $this->featureCount - 1);
42
            if (! in_array($rand, $features)) {
43
                $features[] = $rand;
44
            }
45
        }
46
        sort($features);
47
        $this->classifierColumns[$index] = $features;
48
49
        $columns = [];
50
        foreach ($features as $colIndex) {
51
            $columns[] = array_column($subset, $colIndex);
52
        }
53
        $subset= array_map(null, ...$columns);
54
55
        return [$subset, $targets];
56
    }
57
58
    /**
59
     * @param array $sample
60
     * @return mixed
61
     */
62
    protected function predictSample(array $sample)
63
    {
64
        $predictions = [];
65
        for ($i=0; $i<count($this->classifiers); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
66
            $samplePiece = [];
67
            foreach ($this->classifierColumns[$i] as $colIndex) {
68
                $samplePiece[] = $sample[$colIndex];
69
            }
70
            /* @var $classifier Classifier */
71
            $predictions[] = $this->classifiers[$i]->predict($samplePiece);
72
        }
73
74
        $counts = array_count_values($predictions);
75
        arsort($counts);
76
        reset($counts);
77
        return key($counts);
78
    }
79
}
80