Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Classification/Ensemble/Bagging.php (1 issue)

Check for implicit conversion of array to boolean.

Best Practice Bug Minor

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Classification\Ensemble;
6
7
use Exception;
8
use Phpml\Classification\Classifier;
9
use Phpml\Classification\DecisionTree;
10
use Phpml\Helper\Predictable;
11
use Phpml\Helper\Trainable;
12
use ReflectionClass;
13
14
class Bagging implements Classifier
15
{
16
    use Trainable, Predictable;
17
18
    /**
19
     * @var int
20
     */
21
    protected $numSamples;
22
23
    /**
24
     * @var int
25
     */
26
    protected $featureCount = 0;
27
28
    /**
29
     * @var int
30
     */
31
    protected $numClassifier;
32
33
    /**
34
     * @var string
35
     */
36
    protected $classifier = DecisionTree::class;
37
38
    /**
39
     * @var array
40
     */
41
    protected $classifierOptions = ['depth' => 20];
42
43
    /**
44
     * @var array
45
     */
46
    protected $classifiers = [];
47
48
    /**
49
     * @var float
50
     */
51
    protected $subsetRatio = 0.7;
52
53
    /**
54
     * @var array
55
     */
56
    private $targets = [];
57
58
    /**
59
     * @var array
60
     */
61
    private $samples = [];
62
63
    /**
64
     * Creates an ensemble classifier with given number of base classifiers
65
     * Default number of base classifiers is 50.
66
     * The more number of base classifiers, the better performance but at the cost of procesing time
67
     */
68
    public function __construct(int $numClassifier = 50)
69
    {
70
        $this->numClassifier = $numClassifier;
71
    }
72
73
    /**
74
     * This method determines the ratio of samples used to create the 'bootstrap' subset,
75
     * e.g., random samples drawn from the original dataset with replacement (allow repeats),
76
     * to train each base classifier.
77
     *
78
     * @return $this
79
     *
80
     * @throws \Exception
81
     */
82
    public function setSubsetRatio(float $ratio)
83
    {
84
        if ($ratio < 0.1 || $ratio > 1.0) {
85
            throw new Exception('Subset ratio should be between 0.1 and 1.0');
86
        }
87
88
        $this->subsetRatio = $ratio;
89
90
        return $this;
91
    }
92
93
    /**
94
     * This method is used to set the base classifier. Default value is
95
     * DecisionTree::class, but any class that implements the <i>Classifier</i>
96
     * can be used. <br>
97
     * While giving the parameters of the classifier, the values should be
98
     * given in the order they are in the constructor of the classifier and parameter
99
     * names are neglected.
100
     *
101
     * @return $this
102
     */
103
    public function setClassifer(string $classifier, array $classifierOptions = [])
104
    {
105
        $this->classifier = $classifier;
106
        $this->classifierOptions = $classifierOptions;
107
108
        return $this;
109
    }
110
111
    public function train(array $samples, array $targets): void
112
    {
113
        $this->samples = array_merge($this->samples, $samples);
114
        $this->targets = array_merge($this->targets, $targets);
115
        $this->featureCount = count($samples[0]);
116
        $this->numSamples = count($this->samples);
117
118
        // Init classifiers and train them with bootstrap samples
119
        $this->classifiers = $this->initClassifiers();
120
        $index = 0;
121
        foreach ($this->classifiers as $classifier) {
122
            [$samples, $targets] = $this->getRandomSubset($index);
123
            $classifier->train($samples, $targets);
124
            ++$index;
125
        }
126
    }
127
128
    protected function getRandomSubset(int $index): array
129
    {
130
        $samples = [];
131
        $targets = [];
132
        srand($index);
133
        $bootstrapSize = $this->subsetRatio * $this->numSamples;
134
        for ($i = 0; $i < $bootstrapSize; ++$i) {
135
            $rand = random_int(0, $this->numSamples - 1);
136
            $samples[] = $this->samples[$rand];
137
            $targets[] = $this->targets[$rand];
138
        }
139
140
        return [$samples, $targets];
141
    }
142
143
    protected function initClassifiers(): array
144
    {
145
        $classifiers = [];
146
        for ($i = 0; $i < $this->numClassifier; ++$i) {
147
            $ref = new ReflectionClass($this->classifier);
148
            if ($this->classifierOptions) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->classifierOptions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
149
                $obj = $ref->newInstanceArgs($this->classifierOptions);
150
            } else {
151
                $obj = $ref->newInstance();
152
            }
153
154
            $classifiers[] = $this->initSingleClassifier($obj);
155
        }
156
157
        return $classifiers;
158
    }
159
160
    protected function initSingleClassifier(Classifier $classifier): Classifier
161
    {
162
        return $classifier;
163
    }
164
165
    /**
166
     * @return mixed
167
     */
168
    protected function predictSample(array $sample)
169
    {
170
        $predictions = [];
171
        foreach ($this->classifiers as $classifier) {
172
            /* @var $classifier Classifier */
173
            $predictions[] = $classifier->predict($sample);
174
        }
175
176
        $counts = array_count_values($predictions);
177
        arsort($counts);
178
        reset($counts);
179
180
        return key($counts);
181
    }
182
}
183