These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Classification\Ensemble; |
||
6 | |||
7 | use Phpml\Helper\Predictable; |
||
8 | use Phpml\Helper\Trainable; |
||
9 | use Phpml\Classification\Classifier; |
||
10 | use Phpml\Classification\DecisionTree; |
||
11 | |||
12 | class Bagging implements Classifier |
||
13 | { |
||
14 | use Trainable, Predictable; |
||
15 | |||
16 | /** |
||
17 | * @var int |
||
18 | */ |
||
19 | protected $numSamples; |
||
20 | |||
21 | /** |
||
22 | * @var array |
||
23 | */ |
||
24 | private $targets = []; |
||
25 | |||
26 | /** |
||
27 | * @var int |
||
28 | */ |
||
29 | protected $featureCount = 0; |
||
30 | |||
31 | /** |
||
32 | * @var int |
||
33 | */ |
||
34 | protected $numClassifier; |
||
35 | |||
36 | /** |
||
37 | * @var Classifier |
||
38 | */ |
||
39 | protected $classifier = DecisionTree::class; |
||
40 | |||
41 | /** |
||
42 | * @var array |
||
43 | */ |
||
44 | protected $classifierOptions = ['depth' => 20]; |
||
45 | |||
46 | /** |
||
47 | * @var array |
||
48 | */ |
||
49 | protected $classifiers; |
||
50 | |||
51 | /** |
||
52 | * @var float |
||
53 | */ |
||
54 | protected $subsetRatio = 0.7; |
||
55 | |||
56 | /** |
||
57 | * @var array |
||
58 | */ |
||
59 | private $samples = []; |
||
60 | |||
61 | /** |
||
62 | * Creates an ensemble classifier with given number of base classifiers |
||
63 | * Default number of base classifiers is 50. |
||
64 | * The more number of base classifiers, the better performance but at the cost of procesing time |
||
65 | * |
||
66 | * @param int $numClassifier |
||
67 | */ |
||
68 | public function __construct(int $numClassifier = 50) |
||
69 | { |
||
70 | $this->numClassifier = $numClassifier; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * This method determines the ratio of samples used to create the 'bootstrap' subset, |
||
75 | * e.g., random samples drawn from the original dataset with replacement (allow repeats), |
||
76 | * to train each base classifier. |
||
77 | * |
||
78 | * @param float $ratio |
||
79 | * |
||
80 | * @return $this |
||
81 | * |
||
82 | * @throws \Exception |
||
83 | */ |
||
84 | public function setSubsetRatio(float $ratio) |
||
85 | { |
||
86 | if ($ratio < 0.1 || $ratio > 1.0) { |
||
87 | throw new \Exception("Subset ratio should be between 0.1 and 1.0"); |
||
88 | } |
||
89 | |||
90 | $this->subsetRatio = $ratio; |
||
91 | return $this; |
||
92 | } |
||
93 | |||
94 | /** |
||
95 | * This method is used to set the base classifier. Default value is |
||
96 | * DecisionTree::class, but any class that implements the <i>Classifier</i> |
||
97 | * can be used. <br> |
||
98 | * While giving the parameters of the classifier, the values should be |
||
99 | * given in the order they are in the constructor of the classifier and parameter |
||
100 | * names are neglected. |
||
101 | * |
||
102 | * @param string $classifier |
||
103 | * @param array $classifierOptions |
||
104 | * |
||
105 | * @return $this |
||
106 | */ |
||
107 | public function setClassifer(string $classifier, array $classifierOptions = []) |
||
108 | { |
||
109 | $this->classifier = $classifier; |
||
0 ignored issues
–
show
|
|||
110 | $this->classifierOptions = $classifierOptions; |
||
111 | |||
112 | return $this; |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * @param array $samples |
||
117 | * @param array $targets |
||
118 | */ |
||
119 | public function train(array $samples, array $targets) |
||
120 | { |
||
121 | $this->samples = array_merge($this->samples, $samples); |
||
122 | $this->targets = array_merge($this->targets, $targets); |
||
123 | $this->featureCount = count($samples[0]); |
||
124 | $this->numSamples = count($this->samples); |
||
125 | |||
126 | // Init classifiers and train them with bootstrap samples |
||
127 | $this->classifiers = $this->initClassifiers(); |
||
128 | $index = 0; |
||
129 | foreach ($this->classifiers as $classifier) { |
||
130 | list($samples, $targets) = $this->getRandomSubset($index); |
||
131 | $classifier->train($samples, $targets); |
||
132 | ++$index; |
||
133 | } |
||
134 | } |
||
135 | |||
136 | /** |
||
137 | * @param int $index |
||
138 | * @return array |
||
139 | */ |
||
140 | protected function getRandomSubset(int $index) |
||
141 | { |
||
142 | $samples = []; |
||
143 | $targets = []; |
||
144 | srand($index); |
||
145 | $bootstrapSize = $this->subsetRatio * $this->numSamples; |
||
146 | View Code Duplication | for ($i = 0; $i < $bootstrapSize; ++$i) { |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository.
Loading history...
|
|||
147 | $rand = rand(0, $this->numSamples - 1); |
||
148 | $samples[] = $this->samples[$rand]; |
||
149 | $targets[] = $this->targets[$rand]; |
||
150 | } |
||
151 | |||
152 | return [$samples, $targets]; |
||
153 | } |
||
154 | |||
155 | /** |
||
156 | * @return array |
||
157 | */ |
||
158 | protected function initClassifiers() |
||
159 | { |
||
160 | $classifiers = []; |
||
161 | for ($i = 0; $i < $this->numClassifier; ++$i) { |
||
162 | $ref = new \ReflectionClass($this->classifier); |
||
163 | if ($this->classifierOptions) { |
||
0 ignored issues
–
show
The expression
$this->classifierOptions of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||
164 | $obj = $ref->newInstanceArgs($this->classifierOptions); |
||
165 | } else { |
||
166 | $obj = $ref->newInstance(); |
||
167 | } |
||
168 | |||
169 | $classifiers[] = $this->initSingleClassifier($obj); |
||
170 | } |
||
171 | return $classifiers; |
||
172 | } |
||
173 | |||
174 | /** |
||
175 | * @param Classifier $classifier |
||
176 | * |
||
177 | * @return Classifier |
||
178 | */ |
||
179 | protected function initSingleClassifier($classifier) |
||
180 | { |
||
181 | return $classifier; |
||
182 | } |
||
183 | |||
184 | /** |
||
185 | * @param array $sample |
||
186 | * @return mixed |
||
187 | */ |
||
188 | protected function predictSample(array $sample) |
||
189 | { |
||
190 | $predictions = []; |
||
191 | foreach ($this->classifiers as $classifier) { |
||
192 | /* @var $classifier Classifier */ |
||
193 | $predictions[] = $classifier->predict($sample); |
||
194 | } |
||
195 | |||
196 | $counts = array_count_values($predictions); |
||
197 | arsort($counts); |
||
198 | reset($counts); |
||
199 | return key($counts); |
||
200 | } |
||
201 | } |
||
202 |
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.
Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..