Completed
Pull Request — master (#36)
by
unknown
02:45
created

RandomForest   A

Complexity

Total Complexity 15

Size/Duplication

Total Lines 100
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 15
lcom 1
cbo 1
dl 0
loc 100
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
B setFeatureSubsetRatio() 0 11 7
B getRandomSubset() 0 30 5
A predictSample() 0 17 3
1
<?php
2
declare(strict_types=1);
3
4
namespace Phpml\Classification\Ensemble;
5
6
use Phpml\Classification\Ensemble\Bagging;
7
use Phpml\Classification\DecisionTree;
8
use Phpml\Classification\NaiveBayes;
9
10
class RandomForest extends Bagging
11
{
12
    /**
13
     * @var array
14
     */
15
    protected $classifierColumns;
16
17
    /**
18
     * @var float
19
     */
20
    protected $subsetRatio = 1.0;
21
22
    /**
23
     * @var float|string
24
     */
25
    protected $featureSubsetRatio = 0.7;
26
27
    /**
28
     * This method is used to determine how much of the original columns (features)
29
     * will be used to construct subsets to train base classifiers.<br>
30
     *
31
     * Allowed values: 'sqrt', 'log' or any float number between 0.1 and 1.0 <br>
32
     *
33
     * If there are many features that diminishes classification performance, then
34
     * small values should be preferred, otherwise, with low number of features,
35
     * default value (0.7) will result in satisfactory performance.
36
     *
37
     * @param mixed $ratio string or float should be given
38
     * @return $this
39
     * @throws Exception
40
     */
41
    public function setFeatureSubsetRatio($ratio)
42
    {
43
        if (is_float($ratio) && ($ratio < 0.1 || $ratio > 1.0)) {
44
            throw new Exception("When a float given, feature subset ratio should be between 0.1 and 1.0");
45
        }
46
        if (is_string($ratio) && $ratio != 'sqrt' && $ratio != 'log') {
47
            throw new Exception("When a string given, feature subset ratio can only be 'sqrt' or 'log' ");
48
        }
49
        $this->featureSubsetRatio = $ratio;
50
        return $this;
51
    }
52
53
    /**
54
     * @param int $index
55
     * @return array
56
     */
57
    protected function getRandomSubset($index)
58
    {
59
        list($subset, $targets) = parent::getRandomSubset($index);
60
61
        if (is_float($this->featureSubsetRatio)) {
62
            $featureCount = (int)ceil($this->featureSubsetRatio * $this->featureCount);
63
        } elseif ($this->featureCount == 'sqrt') {
64
            $featureCount = (int)ceil(sqrt($this->featureCount));
65
        } else {
66
            $featureCount = (int)log($this->featureCount + 1, 2);
67
        }
68
69
        if ($featureCount >= $this->featureCount) {
70
            $featureCount = $this->featureCount;
71
        }
72
73
        $features = range(0, $this->featureCount - 1);
74
        shuffle($features);
75
        $features = array_slice($features, 0, $featureCount, false);
76
        sort($features);
77
        $this->classifierColumns[$index] = $features;
78
79
        $columns = [];
80
        foreach ($features as $colIndex) {
81
            $columns[] = array_column($subset, $colIndex);
82
        }
83
        $subset= array_map(null, ...$columns);
84
85
        return [$subset, $targets];
86
    }
87
88
    /**
89
     * @param array $sample
90
     * @return mixed
91
     */
92
    protected function predictSample(array $sample)
93
    {
94
        $predictions = [];
95
        for ($i=0; $i<count($this->classifiers); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
96
            $samplePiece = [];
97
            foreach ($this->classifierColumns[$i] as $colIndex) {
98
                $samplePiece[] = $sample[$colIndex];
99
            }
100
            /* @var $classifier Classifier */
101
            $predictions[] = $this->classifiers[$i]->predict($samplePiece);
102
        }
103
104
        $counts = array_count_values($predictions);
105
        arsort($counts);
106
        reset($counts);
107
        return key($counts);
108
    }
109
}
110