|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Phpml\Classification\Ensemble; |
|
6
|
|
|
|
|
7
|
|
|
use Phpml\Helper\Predictable; |
|
8
|
|
|
use Phpml\Helper\Trainable; |
|
9
|
|
|
use Phpml\Math\Statistic\Mean; |
|
10
|
|
|
use Phpml\Classification\Classifier; |
|
11
|
|
|
use Phpml\Classification\DecisionTree; |
|
12
|
|
|
use Phpml\Classification\NaiveBayes; |
|
13
|
|
|
|
|
14
|
|
|
class Bagging implements Classifier |
|
15
|
|
|
{ |
|
16
|
|
|
use Trainable, Predictable; |
|
17
|
|
|
|
|
18
|
|
|
/** |
|
19
|
|
|
* @var array |
|
20
|
|
|
*/ |
|
21
|
|
|
private $samples = []; |
|
22
|
|
|
|
|
23
|
|
|
/** |
|
24
|
|
|
* @var int |
|
25
|
|
|
*/ |
|
26
|
|
|
protected $numSamples; |
|
27
|
|
|
|
|
28
|
|
|
/** |
|
29
|
|
|
* @var array |
|
30
|
|
|
*/ |
|
31
|
|
|
private $targets = []; |
|
32
|
|
|
|
|
33
|
|
|
/** |
|
34
|
|
|
* @var int |
|
35
|
|
|
*/ |
|
36
|
|
|
protected $featureCount = 0; |
|
37
|
|
|
|
|
38
|
|
|
/** |
|
39
|
|
|
* @var int |
|
40
|
|
|
*/ |
|
41
|
|
|
protected $numClassifier; |
|
42
|
|
|
|
|
43
|
|
|
/** |
|
44
|
|
|
* @var Classifier |
|
45
|
|
|
*/ |
|
46
|
|
|
protected $classifier = DecisionTree::class; |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* @var array |
|
50
|
|
|
*/ |
|
51
|
|
|
protected $classifierOptions = []; |
|
52
|
|
|
|
|
53
|
|
|
/** |
|
54
|
|
|
* @var array |
|
55
|
|
|
*/ |
|
56
|
|
|
protected $classifiers; |
|
57
|
|
|
|
|
58
|
|
|
/** |
|
59
|
|
|
* @var float |
|
60
|
|
|
*/ |
|
61
|
|
|
protected $subsetRatio = 0.5; |
|
62
|
|
|
|
|
63
|
|
|
|
|
64
|
|
|
public function __construct($numClassifier = 50) |
|
65
|
|
|
{ |
|
66
|
|
|
$this->numClassifier = $numClassifier; |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* @param float $ratio |
|
71
|
|
|
* @return $this |
|
72
|
|
|
* @throws Exception |
|
73
|
|
|
*/ |
|
74
|
|
|
public function setSubsetRatio(float $ratio) |
|
75
|
|
|
{ |
|
76
|
|
|
if ($ratio < 0.1 || $ratio > 1.0) { |
|
77
|
|
|
throw new Exception("Subset ratio should be between 0.1 and 1.0"); |
|
78
|
|
|
} |
|
79
|
|
|
$this->subsetRatio = $ratio; |
|
80
|
|
|
return $this; |
|
81
|
|
|
} |
|
82
|
|
|
|
|
83
|
|
|
/** |
|
84
|
|
|
* @param Classifier $classifier |
|
85
|
|
|
* @param array $classifierOptions |
|
86
|
|
|
* @return $this |
|
87
|
|
|
*/ |
|
88
|
|
|
public function setClassifer(string $classifier, array $classifierOptions = []) |
|
89
|
|
|
{ |
|
90
|
|
|
$this->classifier = $classifier; |
|
|
|
|
|
|
91
|
|
|
$this->classifierOptions = $classifierOptions; |
|
92
|
|
|
return $this; |
|
93
|
|
|
} |
|
94
|
|
|
|
|
95
|
|
|
/** |
|
96
|
|
|
* @param array $samples |
|
97
|
|
|
* @param array $targets |
|
98
|
|
|
*/ |
|
99
|
|
|
public function train(array $samples, array $targets) |
|
100
|
|
|
{ |
|
101
|
|
|
$this->samples = array_merge($this->samples, $samples); |
|
102
|
|
|
$this->targets = array_merge($this->targets, $targets); |
|
103
|
|
|
$this->featureCount = count($samples[0]); |
|
104
|
|
|
$this->numSamples = count($this->samples); |
|
105
|
|
|
|
|
106
|
|
|
// Init classifiers and train them with random sub-samples |
|
107
|
|
|
$this->classifiers = $this->initClassifiers(); |
|
108
|
|
|
$index = 0; |
|
109
|
|
|
foreach ($this->classifiers as $classifier) { |
|
110
|
|
|
list($samples, $targets) = $this->getRandomSubset($index); |
|
111
|
|
|
$classifier->train($samples, $targets); |
|
112
|
|
|
++$index; |
|
113
|
|
|
} |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
/** |
|
117
|
|
|
* @return array |
|
118
|
|
|
*/ |
|
119
|
|
|
protected function getRandomSubset($index) |
|
120
|
|
|
{ |
|
121
|
|
|
$subsetLength = (int)ceil(sqrt($this->numSamples)); |
|
|
|
|
|
|
122
|
|
|
$denom = $this->subsetRatio / 2; |
|
123
|
|
|
$subsetLength = $this->numSamples / (1 / $denom); |
|
124
|
|
|
$index = $index * $subsetLength % $this->numSamples; |
|
125
|
|
|
$samples = []; |
|
126
|
|
|
$targets = []; |
|
127
|
|
|
for ($i=0; $i<$subsetLength * 2; $i++) { |
|
128
|
|
|
$rand = rand($index, $this->numSamples - 1); |
|
129
|
|
|
$samples[] = $this->samples[$rand]; |
|
130
|
|
|
$targets[] = $this->targets[$rand]; |
|
131
|
|
|
} |
|
132
|
|
|
return [$samples, $targets]; |
|
133
|
|
|
} |
|
134
|
|
|
|
|
135
|
|
|
/** |
|
136
|
|
|
* @return Classifier[]|array |
|
137
|
|
|
*/ |
|
138
|
|
|
protected function initClassifiers() |
|
139
|
|
|
{ |
|
140
|
|
|
$classifiers = []; |
|
141
|
|
|
for ($i=0; $i<$this->numClassifier; $i++) { |
|
142
|
|
|
$ref = new \ReflectionClass($this->classifier); |
|
143
|
|
|
if ($this->classifierOptions) { |
|
|
|
|
|
|
144
|
|
|
$obj = $ref->newInstanceArgs($this->classifierOptions); |
|
145
|
|
|
} else { |
|
146
|
|
|
$obj = $ref->newInstance(); |
|
147
|
|
|
} |
|
148
|
|
|
$classifiers[] = $obj; |
|
149
|
|
|
} |
|
150
|
|
|
return $classifiers; |
|
151
|
|
|
} |
|
152
|
|
|
|
|
153
|
|
|
/** |
|
154
|
|
|
* @param array $sample |
|
155
|
|
|
* @return mixed |
|
156
|
|
|
*/ |
|
157
|
|
|
protected function predictSample(array $sample) |
|
158
|
|
|
{ |
|
159
|
|
|
$predictions = []; |
|
160
|
|
|
foreach ($this->classifiers as $classifier) { |
|
161
|
|
|
/* @var $classifier Classifier */ |
|
162
|
|
|
$predictions[] = $classifier->predict($sample); |
|
163
|
|
|
} |
|
164
|
|
|
|
|
165
|
|
|
$counts = array_count_values($predictions); |
|
166
|
|
|
arsort($counts); |
|
167
|
|
|
reset($counts); |
|
168
|
|
|
return key($counts); |
|
169
|
|
|
} |
|
170
|
|
|
} |
|
171
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountIdthat can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theidproperty of an instance of theAccountclass. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.