Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Classification/Ensemble/Bagging.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Classification\Ensemble;
6
7
use Exception;
8
use Phpml\Classification\Classifier;
9
use Phpml\Classification\DecisionTree;
10
use Phpml\Helper\Predictable;
11
use Phpml\Helper\Trainable;
12
use ReflectionClass;
13
14
class Bagging implements Classifier
15
{
16
    use Trainable, Predictable;
17
18
    /**
19
     * @var int
20
     */
21
    protected $numSamples;
22
23
    /**
24
     * @var int
25
     */
26
    protected $featureCount = 0;
27
28
    /**
29
     * @var int
30
     */
31
    protected $numClassifier;
32
33
    /**
34
     * @var string
35
     */
36
    protected $classifier = DecisionTree::class;
37
38
    /**
39
     * @var array
40
     */
41
    protected $classifierOptions = ['depth' => 20];
42
43
    /**
44
     * @var array
45
     */
46
    protected $classifiers = [];
47
48
    /**
49
     * @var float
50
     */
51
    protected $subsetRatio = 0.7;
52
53
    /**
54
     * @var array
55
     */
56
    private $targets = [];
57
58
    /**
59
     * @var array
60
     */
61
    private $samples = [];
62
63
    /**
64
     * Creates an ensemble classifier with given number of base classifiers
65
     * Default number of base classifiers is 50.
66
     * The more number of base classifiers, the better performance but at the cost of procesing time
67
     */
68
    public function __construct(int $numClassifier = 50)
69
    {
70
        $this->numClassifier = $numClassifier;
71
    }
72
73
    /**
74
     * This method determines the ratio of samples used to create the 'bootstrap' subset,
75
     * e.g., random samples drawn from the original dataset with replacement (allow repeats),
76
     * to train each base classifier.
77
     *
78
     * @return $this
79
     *
80
     * @throws \Exception
81
     */
82
    public function setSubsetRatio(float $ratio)
83
    {
84
        if ($ratio < 0.1 || $ratio > 1.0) {
85
            throw new Exception('Subset ratio should be between 0.1 and 1.0');
86
        }
87
88
        $this->subsetRatio = $ratio;
89
90
        return $this;
91
    }
92
93
    /**
94
     * This method is used to set the base classifier. Default value is
95
     * DecisionTree::class, but any class that implements the <i>Classifier</i>
96
     * can be used. <br>
97
     * While giving the parameters of the classifier, the values should be
98
     * given in the order they are in the constructor of the classifier and parameter
99
     * names are neglected.
100
     *
101
     * @return $this
102
     */
103
    public function setClassifer(string $classifier, array $classifierOptions = [])
104
    {
105
        $this->classifier = $classifier;
106
        $this->classifierOptions = $classifierOptions;
107
108
        return $this;
109
    }
110
111
    public function train(array $samples, array $targets): void
112
    {
113
        $this->samples = array_merge($this->samples, $samples);
114
        $this->targets = array_merge($this->targets, $targets);
115
        $this->featureCount = count($samples[0]);
116
        $this->numSamples = count($this->samples);
117
118
        // Init classifiers and train them with bootstrap samples
119
        $this->classifiers = $this->initClassifiers();
120
        $index = 0;
121
        foreach ($this->classifiers as $classifier) {
122
            [$samples, $targets] = $this->getRandomSubset($index);
123
            $classifier->train($samples, $targets);
124
            ++$index;
125
        }
126
    }
127
128
    protected function getRandomSubset(int $index): array
129
    {
130
        $samples = [];
131
        $targets = [];
132
        srand($index);
133
        $bootstrapSize = $this->subsetRatio * $this->numSamples;
134
        for ($i = 0; $i < $bootstrapSize; ++$i) {
135
            $rand = random_int(0, $this->numSamples - 1);
136
            $samples[] = $this->samples[$rand];
137
            $targets[] = $this->targets[$rand];
138
        }
139
140
        return [$samples, $targets];
141
    }
142
143
    protected function initClassifiers(): array
144
    {
145
        $classifiers = [];
146
        for ($i = 0; $i < $this->numClassifier; ++$i) {
147
            $ref = new ReflectionClass($this->classifier);
148
            if ($this->classifierOptions) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->classifierOptions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
149
                $obj = $ref->newInstanceArgs($this->classifierOptions);
150
            } else {
151
                $obj = $ref->newInstance();
152
            }
153
154
            $classifiers[] = $this->initSingleClassifier($obj);
155
        }
156
157
        return $classifiers;
158
    }
159
160
    protected function initSingleClassifier(Classifier $classifier): Classifier
161
    {
162
        return $classifier;
163
    }
164
165
    /**
166
     * @return mixed
167
     */
168
    protected function predictSample(array $sample)
169
    {
170
        $predictions = [];
171
        foreach ($this->classifiers as $classifier) {
172
            /* @var $classifier Classifier */
173
            $predictions[] = $classifier->predict($sample);
174
        }
175
176
        $counts = array_count_values($predictions);
177
        arsort($counts);
178
        reset($counts);
179
180
        return key($counts);
181
    }
182
}
183