Completed
Pull Request — master (#36)
by
unknown
05:04
created

Bagging::train()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 16
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 16
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 11
nc 2
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Classification\Ensemble;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
use Phpml\Math\Statistic\Mean;
10
use Phpml\Classification\Classifier;
11
use Phpml\Classification\DecisionTree;
12
use Phpml\Classification\NaiveBayes;
13
14
class Bagging implements Classifier
15
{
16
    use Trainable, Predictable;
17
18
    /**
19
     * @var array
20
     */
21
    private $samples = array();
22
23
    /**
24
     * @var int
25
     */
26
    protected $numSamples;
27
28
    /**
29
     * @var array
30
     */
31
    private $targets = array();
32
33
    /**
34
     * @var int
35
     */
36
    protected $featureCount = 0;
37
38
    /**
39
     * @var int
40
     */
41
    protected $numClassifier;
42
43
    /**
44
     * @var Classifier
45
     */
46
    protected $classifier = DecisionTree::class;
47
48
    /**
49
     * @var array
50
     */
51
    protected $classifierOptions = [];
52
53
    /**
54
     * @var array
55
     */
56
    protected $classifiers;
57
58
    /**
59
     * @var float
60
     */
61
    protected $subsetRatio = 0.5;
62
63
64
    public function __construct($numClassifier = 50)
65
    {
66
        $this->numClassifier = $numClassifier;
67
    }
68
69
    /**
70
     * @param float $ratio
71
     * @return $this
72
     * @throws Exception
73
     */
74
    public function setSubsetRatio(float $ratio)
75
    {
76
        if ($ratio < 0.1 || $ratio > 1.0) {
77
            throw new Exception("Subset ratio should be between 0.1 and 1.0");
78
        }
79
        $this->subsetRatio = $ratio;
80
        return $this;
81
    }
82
83
    /**
84
     * @param Classifier $classifier
85
     * @param array $classifierOptions
86
     * @return $this
87
     */
88
    public function setClassifer(Classifier $classifier, array $classifierOptions = [])
89
    {
90
        $this->classifier = $classifier;
91
        $this->classifierOptions = $classifierOptions;
92
        return $this;
93
    }
94
95
    /**
96
     * @param array $samples
97
     * @param array $targets
98
     */
99
    public function train(array $samples, array $targets)
100
    {
101
        $this->featureCount = count($samples[0]);
102
        $this->numSamples = count($samples);
103
        $this->samples = $samples;
104
        $this->targets = $targets;
105
106
        // Init classifiers and train them with random sub-samples
107
        $this->classifiers = $this->initClassifiers();
108
        $index = 0;
109
        foreach ($this->classifiers as $classifier) {
110
            list($samples, $targets) = $this->getRandomSubset($index);
111
            $classifier->train($samples, $targets);
112
            ++$index;
113
        }
114
    }
115
116
    /**
117
     * @return array
118
     */
119
    protected function getRandomSubset($index)
120
    {
121
        $subsetLength = (int)ceil(sqrt($this->numSamples));
0 ignored issues
show
Unused Code introduced by
$subsetLength is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
122
        $denom = $this->subsetRatio / 2;
123
        $subsetLength = $this->numSamples / (1 / $denom);
124
        $index = $index * $subsetLength % $this->numSamples;
125
        $samples = [];
126
        $targets = [];
127
        for ($i=0; $i<$subsetLength * 2; $i++) {
128
            $rand = rand($index, $this->numSamples - 1);
129
            $samples[] = $this->samples[$rand];
130
            $targets[] = $this->targets[$rand];
131
        }
132
        return [$samples, $targets];
133
    }
134
135
    /**
136
     * @return Classifier[]|array
137
     */
138
    protected function initClassifiers()
139
    {
140
        $classifiers = [];
141
        for ($i=0; $i<$this->numClassifier; $i++) {
142
            $ref = new \ReflectionClass($this->classifier);
143
            if ($this->classifierOptions) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->classifierOptions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
144
                $obj = $ref->newInstanceArgs($this->classifierOptions);
145
            } else {
146
                $obj = $ref->newInstance();
147
            }
148
            $classifiers[] = $obj;
149
        }
150
        return $classifiers;
151
    }
152
153
    /**
154
     * @param array $sample
155
     * @return mixed
156
     */
157
    protected function predictSample(array $sample)
158
    {
159
        $predictions = [];
160
        foreach ($this->classifiers as $classifier) {
161
            /* @var $classifier Classifier */
162
            $predictions[] = $classifier->predict($sample);
163
        }
164
165
        $counts = array_count_values($predictions);
166
        arsort($counts);
167
        reset($counts);
168
        return key($counts);
169
    }
170
}
171