1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare (strict_types = 1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\Clustering\KMeans; |
6
|
|
|
|
7
|
|
|
use Phpml\Clustering\KMeans; |
8
|
|
|
use SplObjectStorage; |
9
|
|
|
use LogicException; |
10
|
|
|
use InvalidArgumentException; |
11
|
|
|
|
12
|
|
|
class Space extends SplObjectStorage |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @var int |
16
|
|
|
*/ |
17
|
|
|
protected $dimension; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* @param $dimension |
21
|
|
|
*/ |
22
|
|
|
public function __construct($dimension) |
23
|
|
|
{ |
24
|
|
|
if ($dimension < 1) { |
25
|
|
|
throw new LogicException('a space dimension cannot be null or negative'); |
26
|
|
|
} |
27
|
|
|
|
28
|
|
|
$this->dimension = $dimension; |
29
|
|
|
} |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* @return array |
33
|
|
|
*/ |
34
|
|
|
public function toArray() |
35
|
|
|
{ |
36
|
|
|
$points = []; |
37
|
|
|
foreach ($this as $point) { |
38
|
|
|
$points[] = $point->toArray(); |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
return ['points' => $points]; |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* @param array $coordinates |
46
|
|
|
* |
47
|
|
|
* @return Point |
48
|
|
|
*/ |
49
|
|
|
public function newPoint(array $coordinates) |
50
|
|
|
{ |
51
|
|
|
if (count($coordinates) != $this->dimension) { |
52
|
|
|
throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
return new Point($coordinates); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @param array $coordinates |
60
|
|
|
* @param null $data |
61
|
|
|
*/ |
62
|
|
|
public function addPoint(array $coordinates, $data = null) |
63
|
|
|
{ |
64
|
|
|
$this->attach($this->newPoint($coordinates), $data); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
/** |
68
|
|
|
* @param object $point |
69
|
|
|
* @param null $data |
70
|
|
|
*/ |
71
|
|
|
public function attach($point, $data = null) |
72
|
|
|
{ |
73
|
|
|
if (!$point instanceof Point) { |
74
|
|
|
throw new InvalidArgumentException('can only attach points to spaces'); |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
parent::attach($point, $data); |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
/** |
81
|
|
|
* @return int |
82
|
|
|
*/ |
83
|
|
|
public function getDimension() |
84
|
|
|
{ |
85
|
|
|
return $this->dimension; |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* @return array|bool |
90
|
|
|
*/ |
91
|
|
|
public function getBoundaries() |
92
|
|
|
{ |
93
|
|
|
if (!count($this)) { |
94
|
|
|
return false; |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
$min = $this->newPoint(array_fill(0, $this->dimension, null)); |
98
|
|
|
$max = $this->newPoint(array_fill(0, $this->dimension, null)); |
99
|
|
|
|
100
|
|
|
foreach ($this as $point) { |
101
|
|
|
for ($n = 0; $n < $this->dimension; ++$n) { |
102
|
|
|
($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; |
103
|
|
|
($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; |
104
|
|
|
} |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
return array($min, $max); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
/** |
111
|
|
|
* @param Point $min |
112
|
|
|
* @param Point $max |
113
|
|
|
* |
114
|
|
|
* @return Point |
115
|
|
|
*/ |
116
|
|
|
public function getRandomPoint(Point $min, Point $max) |
117
|
|
|
{ |
118
|
|
|
$point = $this->newPoint(array_fill(0, $this->dimension, null)); |
119
|
|
|
|
120
|
|
|
for ($n = 0; $n < $this->dimension; ++$n) { |
121
|
|
|
$point[$n] = rand($min[$n], $max[$n]); |
122
|
|
|
} |
123
|
|
|
|
124
|
|
|
return $point; |
125
|
|
|
} |
126
|
|
|
|
127
|
|
|
/** |
128
|
|
|
* @param int $clustersNumber |
129
|
|
|
* @param int $initMethod |
130
|
|
|
* |
131
|
|
|
* @return array|Cluster[] |
132
|
|
|
*/ |
133
|
|
|
public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) |
134
|
|
|
{ |
135
|
|
|
$clusters = $this->initializeClusters($clustersNumber, $initMethod); |
136
|
|
|
|
137
|
|
|
do { |
|
|
|
|
138
|
|
|
} while (!$this->iterate($clusters)); |
139
|
|
|
|
140
|
|
|
return $clusters; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* @param $clustersNumber |
145
|
|
|
* @param $initMethod |
146
|
|
|
* |
147
|
|
|
* @return array|Cluster[] |
148
|
|
|
*/ |
149
|
|
|
protected function initializeClusters(int $clustersNumber, int $initMethod) |
150
|
|
|
{ |
151
|
|
|
switch ($initMethod) { |
152
|
|
|
case KMeans::INIT_RANDOM: |
153
|
|
|
$clusters = $this->initializeRandomClusters($clustersNumber); |
154
|
|
|
break; |
155
|
|
|
|
156
|
|
|
case KMeans::INIT_KMEANS_PLUS_PLUS: |
157
|
|
|
$clusters = $this->initializeKMPPClusters($clustersNumber); |
158
|
|
|
break; |
159
|
|
|
} |
160
|
|
|
$clusters[0]->attachAll($this); |
|
|
|
|
161
|
|
|
|
162
|
|
|
return $clusters; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* @param $clusters |
167
|
|
|
* |
168
|
|
|
* @return bool |
169
|
|
|
*/ |
170
|
|
|
protected function iterate($clusters) |
171
|
|
|
{ |
172
|
|
|
$convergence = true; |
173
|
|
|
|
174
|
|
|
$attach = new SplObjectStorage(); |
175
|
|
|
$detach = new SplObjectStorage(); |
176
|
|
|
|
177
|
|
|
foreach ($clusters as $cluster) { |
178
|
|
|
foreach ($cluster as $point) { |
179
|
|
|
$closest = $point->getClosest($clusters); |
180
|
|
|
|
181
|
|
|
if ($closest !== $cluster) { |
182
|
|
|
isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); |
183
|
|
|
isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); |
184
|
|
|
|
185
|
|
|
$attach[$closest]->attach($point); |
186
|
|
|
$detach[$cluster]->attach($point); |
187
|
|
|
|
188
|
|
|
$convergence = false; |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
foreach ($attach as $cluster) { |
194
|
|
|
$cluster->attachAll($attach[$cluster]); |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
foreach ($detach as $cluster) { |
198
|
|
|
$cluster->detachAll($detach[$cluster]); |
199
|
|
|
} |
200
|
|
|
|
201
|
|
|
foreach ($clusters as $cluster) { |
202
|
|
|
$cluster->updateCentroid(); |
203
|
|
|
} |
204
|
|
|
|
205
|
|
|
return $convergence; |
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
/** |
209
|
|
|
* @param int $clustersNumber |
210
|
|
|
* |
211
|
|
|
* @return array |
212
|
|
|
*/ |
213
|
|
|
private function initializeRandomClusters(int $clustersNumber) |
214
|
|
|
{ |
215
|
|
|
$clusters = []; |
216
|
|
|
list($min, $max) = $this->getBoundaries(); |
217
|
|
|
|
218
|
|
|
for ($n = 0; $n < $clustersNumber; ++$n) { |
219
|
|
|
$clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
return $clusters; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* @param int $clustersNumber |
227
|
|
|
* |
228
|
|
|
* @return array |
229
|
|
|
*/ |
230
|
|
|
protected function initializeKMPPClusters(int $clustersNumber) |
231
|
|
|
{ |
232
|
|
|
$clusters = []; |
233
|
|
|
$this->rewind(); |
234
|
|
|
|
235
|
|
|
$clusters[] = new Cluster($this, $this->current()->getCoordinates()); |
236
|
|
|
|
237
|
|
|
$distances = new SplObjectStorage(); |
238
|
|
|
|
239
|
|
|
for ($i = 1; $i < $clustersNumber; ++$i) { |
240
|
|
|
$sum = 0; |
241
|
|
|
foreach ($this as $point) { |
242
|
|
|
$distance = $point->getDistanceWith($point->getClosest($clusters)); |
243
|
|
|
$sum += $distances[$point] = $distance; |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
$sum = rand(0, (int) $sum); |
247
|
|
|
foreach ($this as $point) { |
248
|
|
|
if (($sum -= $distances[$point]) > 0) { |
249
|
|
|
continue; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
$clusters[] = new Cluster($this, $point->getCoordinates()); |
253
|
|
|
break; |
254
|
|
|
} |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
return $clusters; |
258
|
|
|
} |
259
|
|
|
} |
260
|
|
|
|
This check looks for
do
loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.Consider removing the loop.