Test Setup Failed
Push — master ( 3baf15...4590d5 )
by Arkadiusz
02:24
created

src/Clustering/KMeans/Space.php (2 issues)

Labels
Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering\KMeans;
6
7
use InvalidArgumentException;
8
use LogicException;
9
use Phpml\Clustering\KMeans;
10
use SplObjectStorage;
11
12
class Space extends SplObjectStorage
13
{
14
    /**
15
     * @var int
16
     */
17
    protected $dimension;
18
19
    public function __construct(int $dimension)
20
    {
21
        if ($dimension < 1) {
22
            throw new LogicException('a space dimension cannot be null or negative');
23
        }
24
25
        $this->dimension = $dimension;
26
    }
27
28
    public function toArray(): array
29
    {
30
        $points = [];
31
32
        /** @var Point $point */
33
        foreach ($this as $point) {
34
            $points[] = $point->toArray();
35
        }
36
37
        return ['points' => $points];
38
    }
39
40
    /**
41
     * @param mixed $label
42
     */
43
    public function newPoint(array $coordinates, $label = null): Point
44
    {
45
        if (count($coordinates) !== $this->dimension) {
46
            throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
47
        }
48
49
        return new Point($coordinates, $label);
50
    }
51
52
    /**
53
     * @param mixed $label
54
     * @param mixed $data
55
     */
56
    public function addPoint(array $coordinates, $label = null, $data = null): void
57
    {
58
        $this->attach($this->newPoint($coordinates, $label), $data);
59
    }
60
61
    /**
62
     * @param object $point
63
     * @param mixed  $data
64
     */
65
    public function attach($point, $data = null): void
66
    {
67
        if (!$point instanceof Point) {
68
            throw new InvalidArgumentException('can only attach points to spaces');
69
        }
70
71
        parent::attach($point, $data);
72
    }
73
74
    public function getDimension(): int
75
    {
76
        return $this->dimension;
77
    }
78
79
    /**
80
     * @return array|bool
81
     */
82
    public function getBoundaries()
83
    {
84
        if (count($this) === 0) {
85
            return false;
86
        }
87
88
        $min = $this->newPoint(array_fill(0, $this->dimension, null));
89
        $max = $this->newPoint(array_fill(0, $this->dimension, null));
90
91
        /** @var self $point */
92
        foreach ($this as $point) {
93
            for ($n = 0; $n < $this->dimension; ++$n) {
94
                if ($min[$n] === null || $min[$n] > $point[$n]) {
95
                    $min[$n] = $point[$n];
96
                }
97
98
                if ($max[$n] === null || $max[$n] < $point[$n]) {
99
                    $max[$n] = $point[$n];
100
                }
101
            }
102
        }
103
104
        return [$min, $max];
105
    }
106
107
    public function getRandomPoint(Point $min, Point $max): Point
108
    {
109
        $point = $this->newPoint(array_fill(0, $this->dimension, null));
110
111
        for ($n = 0; $n < $this->dimension; ++$n) {
112
            $point[$n] = random_int($min[$n], $max[$n]);
113
        }
114
115
        return $point;
116
    }
117
118
    /**
119
     * @return Cluster[]
120
     */
121
    public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
122
    {
123
        $clusters = $this->initializeClusters($clustersNumber, $initMethod);
124
125
        do {
126
        } while (!$this->iterate($clusters));
127
128
        return $clusters;
129
    }
130
131
    /**
132
     * @return Cluster[]
133
     */
134
    protected function initializeClusters(int $clustersNumber, int $initMethod): array
135
    {
136
        switch ($initMethod) {
137
            case KMeans::INIT_RANDOM:
138
                $clusters = $this->initializeRandomClusters($clustersNumber);
139
140
                break;
141
142
            case KMeans::INIT_KMEANS_PLUS_PLUS:
143
                $clusters = $this->initializeKMPPClusters($clustersNumber);
144
145
                break;
146
147
            default:
148
                return [];
149
        }
150
151
        $clusters[0]->attachAll($this);
152
153
        return $clusters;
154
    }
155
156
    /**
157
     * @param Cluster[] $clusters
158
     */
159
    protected function iterate(array $clusters): bool
160
    {
161
        $convergence = true;
162
163
        $attach = new SplObjectStorage();
164
        $detach = new SplObjectStorage();
165
166
        foreach ($clusters as $cluster) {
167
            foreach ($cluster as $point) {
168
                $closest = $point->getClosest($clusters);
169
170
                if ($closest === null) {
171
                    continue;
172
                }
173
174
                if ($closest !== $cluster) {
175
                    $attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
176
                    $detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();
177
178
                    $attach[$closest]->attach($point);
179
                    $detach[$cluster]->attach($point);
180
181
                    $convergence = false;
182
                }
183
            }
184
        }
185
186
        /** @var Cluster $cluster */
187
        foreach ($attach as $cluster) {
188
            $cluster->attachAll($attach[$cluster]);
189
        }
190
191
        /** @var Cluster $cluster */
192
        foreach ($detach as $cluster) {
193
            $cluster->detachAll($detach[$cluster]);
194
        }
195
196
        foreach ($clusters as $cluster) {
197
            $cluster->updateCentroid();
198
        }
199
200
        return $convergence;
201
    }
202
203
    /**
204
     * @return Cluster[]
205
     */
206
    protected function initializeKMPPClusters(int $clustersNumber): array
207
    {
208
        $clusters = [];
209
        $this->rewind();
210
211
        /** @var Point $current */
212
        $current = $this->current();
213
214
        $clusters[] = new Cluster($this, $current->getCoordinates());
215
216
        $distances = new SplObjectStorage();
217
218
        for ($i = 1; $i < $clustersNumber; ++$i) {
219
            $sum = 0;
220
            /** @var Point $point */
221
            foreach ($this as $point) {
222
                $closest = $point->getClosest($clusters);
223
                if ($closest === null) {
224
                    continue;
225
                }
226
227
                $distance = $point->getDistanceWith($closest);
228
                $sum += $distances[$point] = $distance;
229
            }
230
231
            $sum = random_int(0, (int) $sum);
232
            /** @var Point $point */
233
            foreach ($this as $point) {
234
                $sum -= $distances[$point];
235
236
                if ($sum > 0) {
237
                    continue;
238
                }
239
240
                $clusters[] = new Cluster($this, $point->getCoordinates());
241
242
                break;
243
            }
244
        }
245
246
        return $clusters;
247
    }
248
249
    /**
250
     * @return Cluster[]
251
     */
252
    private function initializeRandomClusters(int $clustersNumber): array
253
    {
254
        $clusters = [];
255
        [$min, $max] = $this->getBoundaries();
0 ignored issues
show
The variable $min does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
The variable $max does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
256
257
        for ($n = 0; $n < $clustersNumber; ++$n) {
258
            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
259
        }
260
261
        return $clusters;
262
    }
263
}
264