Completed
Push — master ( 72b25f...1d7350 )
by Arkadiusz
04:38
created

Bagging::predictSample()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 13
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 8
nc 2
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Classification\Ensemble;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
use Phpml\Math\Statistic\Mean;
10
use Phpml\Classification\Classifier;
11
use Phpml\Classification\DecisionTree;
12
use Phpml\Classification\NaiveBayes;
13
14
class Bagging implements Classifier
15
{
16
    use Trainable, Predictable;
17
18
    /**
19
     * @var int
20
     */
21
    protected $numSamples;
22
23
    /**
24
     * @var array
25
     */
26
    private $targets = [];
27
28
    /**
29
     * @var int
30
     */
31
    protected $featureCount = 0;
32
33
    /**
34
     * @var int
35
     */
36
    protected $numClassifier;
37
38
    /**
39
     * @var Classifier
40
     */
41
    protected $classifier = DecisionTree::class;
42
43
    /**
44
     * @var array
45
     */
46
    protected $classifierOptions = ['depth' => 20];
47
48
    /**
49
     * @var array
50
     */
51
    protected $classifiers;
52
53
    /**
54
     * @var float
55
     */
56
    protected $subsetRatio = 0.5;
57
58
    /**
59
     * @var array
60
     */
61
    private $samples = [];
62
63
    /**
64
     * Creates an ensemble classifier with given number of base classifiers<br>
65
     * Default number of base classifiers is 100.
66
     * The more number of base classifiers, the better performance but at the cost of procesing time
67
     *
68
     * @param int $numClassifier
69
     */
70
    public function __construct($numClassifier = 50)
71
    {
72
        $this->numClassifier = $numClassifier;
73
    }
74
75
    /**
76
     * This method determines the ratio of samples used to create the 'bootstrap' subset,
77
     * e.g., random samples drawn from the original dataset with replacement (allow repeats),
78
     * to train each base classifier.
79
     *
80
     * @param float $ratio
81
     * @return $this
82
     * @throws Exception
83
     */
84
    public function setSubsetRatio(float $ratio)
85
    {
86
        if ($ratio < 0.1 || $ratio > 1.0) {
87
            throw new \Exception("Subset ratio should be between 0.1 and 1.0");
88
        }
89
        $this->subsetRatio = $ratio;
90
        return $this;
91
    }
92
93
    /**
94
     * This method is used to set the base classifier. Default value is
95
     * DecisionTree::class, but any class that implements the <i>Classifier</i>
96
     * can be used. <br>
97
     * While giving the parameters of the classifier, the values should be
98
     * given in the order they are in the constructor of the classifier and parameter
99
     * names are neglected.
100
     *
101
     * @param string $classifier
102
     * @param array $classifierOptions
103
     * @return $this
104
     */
105
    public function setClassifer(string $classifier, array $classifierOptions = [])
106
    {
107
        $this->classifier = $classifier;
0 ignored issues
show
Documentation Bug introduced by
It seems like $classifier of type string is incompatible with the declared type object<Phpml\Classification\Classifier> of property $classifier.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
108
        $this->classifierOptions = $classifierOptions;
109
        return $this;
110
    }
111
112
    /**
113
     * @param array $samples
114
     * @param array $targets
115
     */
116
    public function train(array $samples, array $targets)
117
    {
118
        $this->samples = array_merge($this->samples, $samples);
119
        $this->targets = array_merge($this->targets, $targets);
120
        $this->featureCount = count($samples[0]);
121
        $this->numSamples = count($this->samples);
122
123
        // Init classifiers and train them with random sub-samples
124
        $this->classifiers = $this->initClassifiers();
125
        $index = 0;
126
        foreach ($this->classifiers as $classifier) {
127
            list($samples, $targets) = $this->getRandomSubset($index);
128
            $classifier->train($samples, $targets);
129
            ++$index;
130
        }
131
    }
132
133
    /**
134
     * @param int $index
135
     * @return array
136
     */
137
    protected function getRandomSubset($index)
138
    {
139
        $subsetLength = (int)ceil(sqrt($this->numSamples));
0 ignored issues
show
Unused Code introduced by
$subsetLength is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
140
        $denom = $this->subsetRatio / 2;
141
        $subsetLength = $this->numSamples / (1 / $denom);
142
        $index = $index * $subsetLength % $this->numSamples;
143
        $samples = [];
144
        $targets = [];
145
        for ($i=0; $i<$subsetLength * 2; $i++) {
146
            $rand = rand($index, $this->numSamples - 1);
147
            $samples[] = $this->samples[$rand];
148
            $targets[] = $this->targets[$rand];
149
        }
150
        return [$samples, $targets];
151
    }
152
153
    /**
154
     * @return array
155
     */
156
    protected function initClassifiers()
157
    {
158
        $classifiers = [];
159
        for ($i=0; $i<$this->numClassifier; $i++) {
160
            $ref = new \ReflectionClass($this->classifier);
161
            if ($this->classifierOptions) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->classifierOptions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
162
                $obj = $ref->newInstanceArgs($this->classifierOptions);
163
            } else {
164
                $obj = $ref->newInstance();
165
            }
166
            $classifiers[] = $this->initSingleClassifier($obj, $i);
167
        }
168
        return $classifiers;
169
    }
170
171
    /**
172
     * @param Classifier $classifier
173
     * @param int $index
174
     * @return Classifier
175
     */
176
    protected function initSingleClassifier($classifier, $index)
0 ignored issues
show
Unused Code introduced by
The parameter $index is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
177
    {
178
        return $classifier;
179
    }
180
181
    /**
182
     * @param array $sample
183
     * @return mixed
184
     */
185
    protected function predictSample(array $sample)
186
    {
187
        $predictions = [];
188
        foreach ($this->classifiers as $classifier) {
189
            /* @var $classifier Classifier */
190
            $predictions[] = $classifier->predict($sample);
191
        }
192
193
        $counts = array_count_values($predictions);
194
        arsort($counts);
195
        reset($counts);
196
        return key($counts);
197
    }
198
}
199