These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Clustering\KMeans; |
||
6 | |||
7 | use InvalidArgumentException; |
||
8 | use LogicException; |
||
9 | use Phpml\Clustering\KMeans; |
||
10 | use SplObjectStorage; |
||
11 | |||
12 | class Space extends SplObjectStorage |
||
13 | { |
||
14 | /** |
||
15 | * @var int |
||
16 | */ |
||
17 | protected $dimension; |
||
18 | |||
19 | public function __construct($dimension) |
||
20 | { |
||
21 | if ($dimension < 1) { |
||
22 | throw new LogicException('a space dimension cannot be null or negative'); |
||
23 | } |
||
24 | |||
25 | $this->dimension = $dimension; |
||
26 | } |
||
27 | |||
28 | public function toArray(): array |
||
29 | { |
||
30 | $points = []; |
||
31 | foreach ($this as $point) { |
||
32 | $points[] = $point->toArray(); |
||
33 | } |
||
34 | |||
35 | return ['points' => $points]; |
||
36 | } |
||
37 | |||
38 | public function newPoint(array $coordinates): Point |
||
39 | { |
||
40 | if (count($coordinates) != $this->dimension) { |
||
41 | throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); |
||
42 | } |
||
43 | |||
44 | return new Point($coordinates); |
||
45 | } |
||
46 | |||
47 | /** |
||
48 | * @param null $data |
||
49 | */ |
||
50 | public function addPoint(array $coordinates, $data = null): void |
||
51 | { |
||
52 | $this->attach($this->newPoint($coordinates), $data); |
||
53 | } |
||
54 | |||
55 | /** |
||
56 | * @param Point $point |
||
57 | * @param null $data |
||
58 | */ |
||
59 | public function attach($point, $data = null): void |
||
60 | { |
||
61 | if (!$point instanceof Point) { |
||
62 | throw new InvalidArgumentException('can only attach points to spaces'); |
||
63 | } |
||
64 | |||
65 | parent::attach($point, $data); |
||
66 | } |
||
67 | |||
68 | public function getDimension(): int |
||
69 | { |
||
70 | return $this->dimension; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * @return array|bool |
||
75 | */ |
||
76 | public function getBoundaries() |
||
77 | { |
||
78 | if (!count($this)) { |
||
79 | return false; |
||
80 | } |
||
81 | |||
82 | $min = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
83 | $max = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
84 | |||
85 | foreach ($this as $point) { |
||
86 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
87 | ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; |
||
88 | ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; |
||
89 | } |
||
90 | } |
||
91 | |||
92 | return [$min, $max]; |
||
93 | } |
||
94 | |||
95 | public function getRandomPoint(Point $min, Point $max): Point |
||
96 | { |
||
97 | $point = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
98 | |||
99 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
100 | $point[$n] = random_int($min[$n], $max[$n]); |
||
101 | } |
||
102 | |||
103 | return $point; |
||
104 | } |
||
105 | |||
106 | /** |
||
107 | * @return array|Cluster[] |
||
108 | */ |
||
109 | public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array |
||
110 | { |
||
111 | $clusters = $this->initializeClusters($clustersNumber, $initMethod); |
||
112 | |||
113 | do { |
||
114 | } while (!$this->iterate($clusters)); |
||
115 | |||
116 | return $clusters; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * @return array|Cluster[] |
||
121 | */ |
||
122 | protected function initializeClusters(int $clustersNumber, int $initMethod): array |
||
123 | { |
||
124 | switch ($initMethod) { |
||
125 | case KMeans::INIT_RANDOM: |
||
126 | $clusters = $this->initializeRandomClusters($clustersNumber); |
||
127 | |||
128 | break; |
||
129 | |||
130 | case KMeans::INIT_KMEANS_PLUS_PLUS: |
||
131 | $clusters = $this->initializeKMPPClusters($clustersNumber); |
||
132 | |||
133 | break; |
||
134 | |||
135 | default: |
||
136 | return []; |
||
137 | } |
||
138 | |||
139 | $clusters[0]->attachAll($this); |
||
140 | |||
141 | return $clusters; |
||
142 | } |
||
143 | |||
144 | protected function iterate($clusters): bool |
||
145 | { |
||
146 | $convergence = true; |
||
147 | |||
148 | $attach = new SplObjectStorage(); |
||
149 | $detach = new SplObjectStorage(); |
||
150 | |||
151 | foreach ($clusters as $cluster) { |
||
152 | foreach ($cluster as $point) { |
||
153 | $closest = $point->getClosest($clusters); |
||
154 | |||
155 | if ($closest !== $cluster) { |
||
156 | isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); |
||
157 | isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); |
||
158 | |||
159 | $attach[$closest]->attach($point); |
||
160 | $detach[$cluster]->attach($point); |
||
161 | |||
162 | $convergence = false; |
||
163 | } |
||
164 | } |
||
165 | } |
||
166 | |||
167 | foreach ($attach as $cluster) { |
||
168 | $cluster->attachAll($attach[$cluster]); |
||
169 | } |
||
170 | |||
171 | foreach ($detach as $cluster) { |
||
172 | $cluster->detachAll($detach[$cluster]); |
||
173 | } |
||
174 | |||
175 | foreach ($clusters as $cluster) { |
||
176 | $cluster->updateCentroid(); |
||
177 | } |
||
178 | |||
179 | return $convergence; |
||
180 | } |
||
181 | |||
182 | protected function initializeKMPPClusters(int $clustersNumber): array |
||
183 | { |
||
184 | $clusters = []; |
||
185 | $this->rewind(); |
||
186 | |||
187 | $clusters[] = new Cluster($this, $this->current()->getCoordinates()); |
||
188 | |||
189 | $distances = new SplObjectStorage(); |
||
190 | |||
191 | for ($i = 1; $i < $clustersNumber; ++$i) { |
||
192 | $sum = 0; |
||
193 | foreach ($this as $point) { |
||
194 | $distance = $point->getDistanceWith($point->getClosest($clusters)); |
||
195 | $sum += $distances[$point] = $distance; |
||
196 | } |
||
197 | |||
198 | $sum = random_int(0, (int) $sum); |
||
199 | foreach ($this as $point) { |
||
200 | if (($sum -= $distances[$point]) > 0) { |
||
201 | continue; |
||
202 | } |
||
203 | |||
204 | $clusters[] = new Cluster($this, $point->getCoordinates()); |
||
205 | |||
206 | break; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | return $clusters; |
||
211 | } |
||
212 | |||
213 | private function initializeRandomClusters(int $clustersNumber): array |
||
214 | { |
||
215 | $clusters = []; |
||
216 | [$min, $max] = $this->getBoundaries(); |
||
0 ignored issues
–
show
|
|||
217 | |||
218 | for ($n = 0; $n < $clustersNumber; ++$n) { |
||
219 | $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates()); |
||
220 | } |
||
221 | |||
222 | return $clusters; |
||
223 | } |
||
224 | } |
||
225 |
This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.