Completed
Pull Request — master (#36)
by
unknown
03:10
created

RandomForest   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 70
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 10
lcom 1
cbo 1
dl 0
loc 70
rs 10
c 0
b 0
f 0

3 Methods

Rating   Name   Duplication   Size   Complexity  
A setFeatureSubsetRatio() 0 8 3
B getRandomSubset() 0 23 4
A predictSample() 0 17 3
1
<?php
2
declare(strict_types=1);
3
4
namespace Phpml\Classification\Ensemble;
5
6
use Phpml\Classification\Ensemble\Bagging;
7
use Phpml\Classification\DecisionTree;
8
use Phpml\Classification\NaiveBayes;
9
10
class RandomForest extends Bagging
11
{
12
    /**
13
     * @var array
14
     */
15
    protected $classifierColumns;
16
17
    /**
18
     * @var float
19
     */
20
    protected $featureSubsetRatio = 0.7;
21
22
    public function setFeatureSubsetRatio(float $ratio)
23
    {
24
        if ($ratio < 0.1 || $ratio > 0.9) {
25
            throw new Exception("Feature subset ratio should be between 0.1 and 0.9");
26
        }
27
        $this->featureSubsetRatio = $ratio;
28
        return $this;
29
    }
30
31
    /**
32
     * @param int $index
33
     */
34
    protected function getRandomSubset($index)
35
    {
36
        list($subset, $targets) = parent::getRandomSubset($index);
37
38
        $features = [];
39
        $featureCount = (int)ceil($this->featureSubsetRatio * $this->featureCount);
40
        while (count($features) < $featureCount) {
41
            $rand = rand(0, $this->featureCount - 1);
42
            if (! in_array($rand, $features)) {
43
                $features[] = $rand;
44
            }
45
        }
46
        sort($features);
47
        $this->classifierColumns[$index] = $features;
48
49
        $columns = [];
50
        foreach ($features as $colIndex) {
51
            $columns[] = array_column($subset, $colIndex);
52
        }
53
        $subset= array_map(null, ...$columns);
54
55
        return [$subset, $targets];
56
    }
57
58
    /**
59
     * @param array $sample
60
     * @return mixed
61
     */
62
    protected function predictSample(array $sample)
63
    {
64
        $predictions = [];
65
        for ($i=0; $i<count($this->classifiers); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
66
            $samplePiece = [];
67
            foreach ($this->classifierColumns[$i] as $colIndex) {
68
                $samplePiece[] = $sample[$colIndex];
69
            }
70
            /* @var $classifier Classifier */
71
            $predictions[] = $this->classifiers[$i]->predict($samplePiece);
72
        }
73
74
        $counts = array_count_values($predictions);
75
        arsort($counts);
76
        reset($counts);
77
        return key($counts);
78
    }
79
}
80