These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Classification\Ensemble; |
||
6 | |||
7 | use Exception; |
||
8 | use Phpml\Classification\Classifier; |
||
9 | use Phpml\Classification\DecisionTree; |
||
10 | use Phpml\Helper\Predictable; |
||
11 | use Phpml\Helper\Trainable; |
||
12 | use ReflectionClass; |
||
13 | |||
14 | class Bagging implements Classifier |
||
15 | { |
||
16 | use Trainable, Predictable; |
||
17 | |||
18 | /** |
||
19 | * @var int |
||
20 | */ |
||
21 | protected $numSamples; |
||
22 | |||
23 | /** |
||
24 | * @var int |
||
25 | */ |
||
26 | protected $featureCount = 0; |
||
27 | |||
28 | /** |
||
29 | * @var int |
||
30 | */ |
||
31 | protected $numClassifier; |
||
32 | |||
33 | /** |
||
34 | * @var string |
||
35 | */ |
||
36 | protected $classifier = DecisionTree::class; |
||
37 | |||
38 | /** |
||
39 | * @var array |
||
40 | */ |
||
41 | protected $classifierOptions = ['depth' => 20]; |
||
42 | |||
43 | /** |
||
44 | * @var array |
||
45 | */ |
||
46 | protected $classifiers = []; |
||
47 | |||
48 | /** |
||
49 | * @var float |
||
50 | */ |
||
51 | protected $subsetRatio = 0.7; |
||
52 | |||
53 | /** |
||
54 | * @var array |
||
55 | */ |
||
56 | private $targets = []; |
||
57 | |||
58 | /** |
||
59 | * @var array |
||
60 | */ |
||
61 | private $samples = []; |
||
62 | |||
63 | /** |
||
64 | * Creates an ensemble classifier with given number of base classifiers |
||
65 | * Default number of base classifiers is 50. |
||
66 | * The more number of base classifiers, the better performance but at the cost of procesing time |
||
67 | */ |
||
68 | public function __construct(int $numClassifier = 50) |
||
69 | { |
||
70 | $this->numClassifier = $numClassifier; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * This method determines the ratio of samples used to create the 'bootstrap' subset, |
||
75 | * e.g., random samples drawn from the original dataset with replacement (allow repeats), |
||
76 | * to train each base classifier. |
||
77 | * |
||
78 | * @return $this |
||
79 | * |
||
80 | * @throws \Exception |
||
81 | */ |
||
82 | public function setSubsetRatio(float $ratio) |
||
83 | { |
||
84 | if ($ratio < 0.1 || $ratio > 1.0) { |
||
85 | throw new Exception('Subset ratio should be between 0.1 and 1.0'); |
||
86 | } |
||
87 | |||
88 | $this->subsetRatio = $ratio; |
||
89 | |||
90 | return $this; |
||
91 | } |
||
92 | |||
93 | /** |
||
94 | * This method is used to set the base classifier. Default value is |
||
95 | * DecisionTree::class, but any class that implements the <i>Classifier</i> |
||
96 | * can be used. <br> |
||
97 | * While giving the parameters of the classifier, the values should be |
||
98 | * given in the order they are in the constructor of the classifier and parameter |
||
99 | * names are neglected. |
||
100 | * |
||
101 | * @return $this |
||
102 | */ |
||
103 | public function setClassifer(string $classifier, array $classifierOptions = []) |
||
104 | { |
||
105 | $this->classifier = $classifier; |
||
106 | $this->classifierOptions = $classifierOptions; |
||
107 | |||
108 | return $this; |
||
109 | } |
||
110 | |||
111 | public function train(array $samples, array $targets): void |
||
112 | { |
||
113 | $this->samples = array_merge($this->samples, $samples); |
||
114 | $this->targets = array_merge($this->targets, $targets); |
||
115 | $this->featureCount = count($samples[0]); |
||
116 | $this->numSamples = count($this->samples); |
||
117 | |||
118 | // Init classifiers and train them with bootstrap samples |
||
119 | $this->classifiers = $this->initClassifiers(); |
||
120 | $index = 0; |
||
121 | foreach ($this->classifiers as $classifier) { |
||
122 | [$samples, $targets] = $this->getRandomSubset($index); |
||
123 | $classifier->train($samples, $targets); |
||
124 | ++$index; |
||
125 | } |
||
126 | } |
||
127 | |||
128 | protected function getRandomSubset(int $index): array |
||
129 | { |
||
130 | $samples = []; |
||
131 | $targets = []; |
||
132 | srand($index); |
||
133 | $bootstrapSize = $this->subsetRatio * $this->numSamples; |
||
134 | for ($i = 0; $i < $bootstrapSize; ++$i) { |
||
135 | $rand = random_int(0, $this->numSamples - 1); |
||
136 | $samples[] = $this->samples[$rand]; |
||
137 | $targets[] = $this->targets[$rand]; |
||
138 | } |
||
139 | |||
140 | return [$samples, $targets]; |
||
141 | } |
||
142 | |||
143 | protected function initClassifiers(): array |
||
144 | { |
||
145 | $classifiers = []; |
||
146 | for ($i = 0; $i < $this->numClassifier; ++$i) { |
||
147 | $ref = new ReflectionClass($this->classifier); |
||
148 | if ($this->classifierOptions) { |
||
0 ignored issues
–
show
|
|||
149 | $obj = $ref->newInstanceArgs($this->classifierOptions); |
||
150 | } else { |
||
151 | $obj = $ref->newInstance(); |
||
152 | } |
||
153 | |||
154 | $classifiers[] = $this->initSingleClassifier($obj); |
||
155 | } |
||
156 | |||
157 | return $classifiers; |
||
158 | } |
||
159 | |||
160 | protected function initSingleClassifier(Classifier $classifier): Classifier |
||
161 | { |
||
162 | return $classifier; |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * @return mixed |
||
167 | */ |
||
168 | protected function predictSample(array $sample) |
||
169 | { |
||
170 | $predictions = []; |
||
171 | foreach ($this->classifiers as $classifier) { |
||
172 | /* @var $classifier Classifier */ |
||
173 | $predictions[] = $classifier->predict($sample); |
||
174 | } |
||
175 | |||
176 | $counts = array_count_values($predictions); |
||
177 | arsort($counts); |
||
178 | reset($counts); |
||
179 | |||
180 | return key($counts); |
||
181 | } |
||
182 | } |
||
183 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.