These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Clustering\KMeans; |
||
6 | |||
7 | use InvalidArgumentException; |
||
8 | use LogicException; |
||
9 | use Phpml\Clustering\KMeans; |
||
10 | use SplObjectStorage; |
||
11 | |||
12 | class Space extends SplObjectStorage |
||
13 | { |
||
14 | /** |
||
15 | * @var int |
||
16 | */ |
||
17 | protected $dimension; |
||
18 | |||
19 | public function __construct($dimension) |
||
20 | { |
||
21 | if ($dimension < 1) { |
||
22 | throw new LogicException('a space dimension cannot be null or negative'); |
||
23 | } |
||
24 | |||
25 | $this->dimension = $dimension; |
||
26 | } |
||
27 | |||
28 | public function toArray(): array |
||
29 | { |
||
30 | $points = []; |
||
31 | foreach ($this as $point) { |
||
32 | $points[] = $point->toArray(); |
||
33 | } |
||
34 | |||
35 | return ['points' => $points]; |
||
36 | } |
||
37 | |||
38 | public function newPoint(array $coordinates): Point |
||
39 | { |
||
40 | if (count($coordinates) != $this->dimension) { |
||
41 | throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); |
||
42 | } |
||
43 | |||
44 | return new Point($coordinates); |
||
45 | } |
||
46 | |||
47 | /** |
||
48 | * @param null $data |
||
49 | */ |
||
50 | public function addPoint(array $coordinates, $data = null): void |
||
51 | { |
||
52 | $this->attach($this->newPoint($coordinates), $data); |
||
53 | } |
||
54 | |||
55 | /** |
||
56 | * @param Point $point |
||
57 | * @param null $data |
||
58 | */ |
||
59 | public function attach($point, $data = null): void |
||
60 | { |
||
61 | if (!$point instanceof Point) { |
||
62 | throw new InvalidArgumentException('can only attach points to spaces'); |
||
63 | } |
||
64 | |||
65 | parent::attach($point, $data); |
||
66 | } |
||
67 | |||
68 | public function getDimension(): int |
||
69 | { |
||
70 | return $this->dimension; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * @return array|bool |
||
75 | */ |
||
76 | public function getBoundaries() |
||
77 | { |
||
78 | if (!count($this)) { |
||
79 | return false; |
||
80 | } |
||
81 | |||
82 | $min = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
83 | $max = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
84 | |||
85 | foreach ($this as $point) { |
||
86 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
87 | ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; |
||
88 | ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; |
||
89 | } |
||
90 | } |
||
91 | |||
92 | return [$min, $max]; |
||
93 | } |
||
94 | |||
95 | public function getRandomPoint(Point $min, Point $max): Point |
||
96 | { |
||
97 | $point = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
98 | |||
99 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
100 | $point[$n] = random_int($min[$n], $max[$n]); |
||
101 | } |
||
102 | |||
103 | return $point; |
||
104 | } |
||
105 | |||
106 | /** |
||
107 | * @return array|Cluster[] |
||
108 | */ |
||
109 | public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array |
||
110 | { |
||
111 | $clusters = $this->initializeClusters($clustersNumber, $initMethod); |
||
112 | |||
113 | do { |
||
0 ignored issues
–
show
|
|||
114 | } while (!$this->iterate($clusters)); |
||
115 | |||
116 | return $clusters; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * @return array|Cluster[] |
||
121 | */ |
||
122 | protected function initializeClusters(int $clustersNumber, int $initMethod): array |
||
123 | { |
||
124 | switch ($initMethod) { |
||
125 | case KMeans::INIT_RANDOM: |
||
126 | $clusters = $this->initializeRandomClusters($clustersNumber); |
||
127 | |||
128 | break; |
||
129 | |||
130 | case KMeans::INIT_KMEANS_PLUS_PLUS: |
||
131 | $clusters = $this->initializeKMPPClusters($clustersNumber); |
||
132 | |||
133 | break; |
||
134 | |||
135 | default: |
||
136 | return []; |
||
137 | } |
||
138 | |||
139 | $clusters[0]->attachAll($this); |
||
140 | |||
141 | return $clusters; |
||
142 | } |
||
143 | |||
144 | protected function iterate($clusters): bool |
||
145 | { |
||
146 | $convergence = true; |
||
147 | |||
148 | $attach = new SplObjectStorage(); |
||
149 | $detach = new SplObjectStorage(); |
||
150 | |||
151 | foreach ($clusters as $cluster) { |
||
152 | foreach ($cluster as $point) { |
||
153 | $closest = $point->getClosest($clusters); |
||
154 | |||
155 | if ($closest !== $cluster) { |
||
156 | isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); |
||
157 | isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); |
||
158 | |||
159 | $attach[$closest]->attach($point); |
||
160 | $detach[$cluster]->attach($point); |
||
161 | |||
162 | $convergence = false; |
||
163 | } |
||
164 | } |
||
165 | } |
||
166 | |||
167 | foreach ($attach as $cluster) { |
||
168 | $cluster->attachAll($attach[$cluster]); |
||
169 | } |
||
170 | |||
171 | foreach ($detach as $cluster) { |
||
172 | $cluster->detachAll($detach[$cluster]); |
||
173 | } |
||
174 | |||
175 | foreach ($clusters as $cluster) { |
||
176 | $cluster->updateCentroid(); |
||
177 | } |
||
178 | |||
179 | return $convergence; |
||
180 | } |
||
181 | |||
182 | protected function initializeKMPPClusters(int $clustersNumber): array |
||
183 | { |
||
184 | $clusters = []; |
||
185 | $this->rewind(); |
||
186 | |||
187 | $clusters[] = new Cluster($this, $this->current()->getCoordinates()); |
||
188 | |||
189 | $distances = new SplObjectStorage(); |
||
190 | |||
191 | for ($i = 1; $i < $clustersNumber; ++$i) { |
||
192 | $sum = 0; |
||
193 | foreach ($this as $point) { |
||
194 | $distance = $point->getDistanceWith($point->getClosest($clusters)); |
||
195 | $sum += $distances[$point] = $distance; |
||
196 | } |
||
197 | |||
198 | $sum = random_int(0, (int) $sum); |
||
199 | foreach ($this as $point) { |
||
200 | if (($sum -= $distances[$point]) > 0) { |
||
201 | continue; |
||
202 | } |
||
203 | |||
204 | $clusters[] = new Cluster($this, $point->getCoordinates()); |
||
205 | |||
206 | break; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | return $clusters; |
||
211 | } |
||
212 | |||
213 | private function initializeRandomClusters(int $clustersNumber): array |
||
214 | { |
||
215 | $clusters = []; |
||
216 | [$min, $max] = $this->getBoundaries(); |
||
217 | |||
218 | for ($n = 0; $n < $clustersNumber; ++$n) { |
||
219 | $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); |
||
220 | } |
||
221 | |||
222 | return $clusters; |
||
223 | } |
||
224 | } |
||
225 |
This check looks for
do
loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.Consider removing the loop.