Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Clustering/KMeans/Space.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering\KMeans;
6
7
use InvalidArgumentException;
8
use LogicException;
9
use Phpml\Clustering\KMeans;
10
use SplObjectStorage;
11
12
class Space extends SplObjectStorage
13
{
14
    /**
15
     * @var int
16
     */
17
    protected $dimension;
18
19
    public function __construct($dimension)
20
    {
21
        if ($dimension < 1) {
22
            throw new LogicException('a space dimension cannot be null or negative');
23
        }
24
25
        $this->dimension = $dimension;
26
    }
27
28
    public function toArray(): array
29
    {
30
        $points = [];
31
        foreach ($this as $point) {
32
            $points[] = $point->toArray();
33
        }
34
35
        return ['points' => $points];
36
    }
37
38
    public function newPoint(array $coordinates): Point
39
    {
40
        if (count($coordinates) != $this->dimension) {
41
            throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
42
        }
43
44
        return new Point($coordinates);
45
    }
46
47
    /**
48
     * @param null $data
49
     */
50
    public function addPoint(array $coordinates, $data = null): void
51
    {
52
        $this->attach($this->newPoint($coordinates), $data);
53
    }
54
55
    /**
56
     * @param Point $point
57
     * @param null  $data
58
     */
59
    public function attach($point, $data = null): void
60
    {
61
        if (!$point instanceof Point) {
62
            throw new InvalidArgumentException('can only attach points to spaces');
63
        }
64
65
        parent::attach($point, $data);
66
    }
67
68
    public function getDimension(): int
69
    {
70
        return $this->dimension;
71
    }
72
73
    /**
74
     * @return array|bool
75
     */
76
    public function getBoundaries()
77
    {
78
        if (!count($this)) {
79
            return false;
80
        }
81
82
        $min = $this->newPoint(array_fill(0, $this->dimension, null));
83
        $max = $this->newPoint(array_fill(0, $this->dimension, null));
84
85
        foreach ($this as $point) {
86
            for ($n = 0; $n < $this->dimension; ++$n) {
87
                ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
88
                ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
89
            }
90
        }
91
92
        return [$min, $max];
93
    }
94
95
    public function getRandomPoint(Point $min, Point $max): Point
96
    {
97
        $point = $this->newPoint(array_fill(0, $this->dimension, null));
98
99
        for ($n = 0; $n < $this->dimension; ++$n) {
100
            $point[$n] = random_int($min[$n], $max[$n]);
101
        }
102
103
        return $point;
104
    }
105
106
    /**
107
     * @return array|Cluster[]
108
     */
109
    public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
110
    {
111
        $clusters = $this->initializeClusters($clustersNumber, $initMethod);
112
113
        do {
114
        } while (!$this->iterate($clusters));
115
116
        return $clusters;
117
    }
118
119
    /**
120
     * @return array|Cluster[]
121
     */
122
    protected function initializeClusters(int $clustersNumber, int $initMethod): array
123
    {
124
        switch ($initMethod) {
125
            case KMeans::INIT_RANDOM:
126
                $clusters = $this->initializeRandomClusters($clustersNumber);
127
128
                break;
129
130
            case KMeans::INIT_KMEANS_PLUS_PLUS:
131
                $clusters = $this->initializeKMPPClusters($clustersNumber);
132
133
                break;
134
135
            default:
136
                return [];
137
        }
138
139
        $clusters[0]->attachAll($this);
140
141
        return $clusters;
142
    }
143
144
    protected function iterate($clusters): bool
145
    {
146
        $convergence = true;
147
148
        $attach = new SplObjectStorage();
149
        $detach = new SplObjectStorage();
150
151
        foreach ($clusters as $cluster) {
152
            foreach ($cluster as $point) {
153
                $closest = $point->getClosest($clusters);
154
155
                if ($closest !== $cluster) {
156
                    isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage();
157
                    isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage();
158
159
                    $attach[$closest]->attach($point);
160
                    $detach[$cluster]->attach($point);
161
162
                    $convergence = false;
163
                }
164
            }
165
        }
166
167
        foreach ($attach as $cluster) {
168
            $cluster->attachAll($attach[$cluster]);
169
        }
170
171
        foreach ($detach as $cluster) {
172
            $cluster->detachAll($detach[$cluster]);
173
        }
174
175
        foreach ($clusters as $cluster) {
176
            $cluster->updateCentroid();
177
        }
178
179
        return $convergence;
180
    }
181
182
    protected function initializeKMPPClusters(int $clustersNumber): array
183
    {
184
        $clusters = [];
185
        $this->rewind();
186
187
        $clusters[] = new Cluster($this, $this->current()->getCoordinates());
188
189
        $distances = new SplObjectStorage();
190
191
        for ($i = 1; $i < $clustersNumber; ++$i) {
192
            $sum = 0;
193
            foreach ($this as $point) {
194
                $distance = $point->getDistanceWith($point->getClosest($clusters));
195
                $sum += $distances[$point] = $distance;
196
            }
197
198
            $sum = random_int(0, (int) $sum);
199
            foreach ($this as $point) {
200
                if (($sum -= $distances[$point]) > 0) {
201
                    continue;
202
                }
203
204
                $clusters[] = new Cluster($this, $point->getCoordinates());
205
206
                break;
207
            }
208
        }
209
210
        return $clusters;
211
    }
212
213
    private function initializeRandomClusters(int $clustersNumber): array
214
    {
215
        $clusters = [];
216
        [$min, $max] = $this->getBoundaries();
0 ignored issues
show
The variable $min does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
The variable $max does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
217
218
        for ($n = 0; $n < $clustersNumber; ++$n) {
219
            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
220
        }
221
222
        return $clusters;
223
    }
224
}
225