Passed
Push — master ( 331d4b...653c7c )
by Arkadiusz
02:19
created

src/Phpml/Clustering/KMeans/Space.php (2 issues)

Labels
Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering\KMeans;
6
7
use InvalidArgumentException;
8
use LogicException;
9
use Phpml\Clustering\KMeans;
10
use SplObjectStorage;
11
12
class Space extends SplObjectStorage
13
{
14
    /**
15
     * @var int
16
     */
17
    protected $dimension;
18
19
    public function __construct($dimension)
20
    {
21
        if ($dimension < 1) {
22
            throw new LogicException('a space dimension cannot be null or negative');
23
        }
24
25
        $this->dimension = $dimension;
26
    }
27
28
    public function toArray() : array
29
    {
30
        $points = [];
31
        foreach ($this as $point) {
32
            $points[] = $point->toArray();
33
        }
34
35
        return ['points' => $points];
36
    }
37
38
    public function newPoint(array $coordinates) : Point
39
    {
40
        if (count($coordinates) != $this->dimension) {
41
            throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
42
        }
43
44
        return new Point($coordinates);
45
    }
46
47
    /**
48
     * @param null  $data
49
     */
50
    public function addPoint(array $coordinates, $data = null): void
51
    {
52
        $this->attach($this->newPoint($coordinates), $data);
53
    }
54
55
    /**
56
     * @param Point $point
57
     * @param null  $data
58
     */
59
    public function attach($point, $data = null): void
60
    {
61
        if (!$point instanceof Point) {
62
            throw new InvalidArgumentException('can only attach points to spaces');
63
        }
64
65
        parent::attach($point, $data);
66
    }
67
68
    public function getDimension() : int
69
    {
70
        return $this->dimension;
71
    }
72
73
    /**
74
     * @return array|bool
75
     */
76
    public function getBoundaries()
77
    {
78
        if (!count($this)) {
79
            return false;
80
        }
81
82
        $min = $this->newPoint(array_fill(0, $this->dimension, null));
83
        $max = $this->newPoint(array_fill(0, $this->dimension, null));
84
85
        foreach ($this as $point) {
86
            for ($n = 0; $n < $this->dimension; ++$n) {
87
                ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
88
                ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
89
            }
90
        }
91
92
        return [$min, $max];
93
    }
94
95
    public function getRandomPoint(Point $min, Point $max) : Point
96
    {
97
        $point = $this->newPoint(array_fill(0, $this->dimension, null));
98
99
        for ($n = 0; $n < $this->dimension; ++$n) {
100
            $point[$n] = random_int($min[$n], $max[$n]);
101
        }
102
103
        return $point;
104
    }
105
106
    /**
107
     * @return array|Cluster[]
108
     */
109
    public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM) : array
110
    {
111
        $clusters = $this->initializeClusters($clustersNumber, $initMethod);
112
113
        do {
114
        } while (!$this->iterate($clusters));
115
116
        return $clusters;
117
    }
118
119
    /**
120
     * @return array|Cluster[]
121
     */
122
    protected function initializeClusters(int $clustersNumber, int $initMethod) : array
123
    {
124
        switch ($initMethod) {
125
            case KMeans::INIT_RANDOM:
126
                $clusters = $this->initializeRandomClusters($clustersNumber);
127
                break;
128
129
            case KMeans::INIT_KMEANS_PLUS_PLUS:
130
                $clusters = $this->initializeKMPPClusters($clustersNumber);
131
                break;
132
133
            default:
134
                return [];
135
        }
136
137
        $clusters[0]->attachAll($this);
138
139
        return $clusters;
140
    }
141
142
    protected function iterate($clusters) : bool
143
    {
144
        $convergence = true;
145
146
        $attach = new SplObjectStorage();
147
        $detach = new SplObjectStorage();
148
149
        foreach ($clusters as $cluster) {
150
            foreach ($cluster as $point) {
151
                $closest = $point->getClosest($clusters);
152
153
                if ($closest !== $cluster) {
154
                    isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage();
155
                    isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage();
156
157
                    $attach[$closest]->attach($point);
158
                    $detach[$cluster]->attach($point);
159
160
                    $convergence = false;
161
                }
162
            }
163
        }
164
165
        foreach ($attach as $cluster) {
166
            $cluster->attachAll($attach[$cluster]);
167
        }
168
169
        foreach ($detach as $cluster) {
170
            $cluster->detachAll($detach[$cluster]);
171
        }
172
173
        foreach ($clusters as $cluster) {
174
            $cluster->updateCentroid();
175
        }
176
177
        return $convergence;
178
    }
179
180
    private function initializeRandomClusters(int $clustersNumber) : array
181
    {
182
        $clusters = [];
183
        [$min, $max] = $this->getBoundaries();
0 ignored issues
show
The variable $min does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
The variable $max does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
184
185
        for ($n = 0; $n < $clustersNumber; ++$n) {
186
            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
187
        }
188
189
        return $clusters;
190
    }
191
192
    protected function initializeKMPPClusters(int $clustersNumber) : array
193
    {
194
        $clusters = [];
195
        $this->rewind();
196
197
        $clusters[] = new Cluster($this, $this->current()->getCoordinates());
198
199
        $distances = new SplObjectStorage();
200
201
        for ($i = 1; $i < $clustersNumber; ++$i) {
202
            $sum = 0;
203
            foreach ($this as $point) {
204
                $distance = $point->getDistanceWith($point->getClosest($clusters));
205
                $sum += $distances[$point] = $distance;
206
            }
207
208
            $sum = random_int(0, (int) $sum);
209
            foreach ($this as $point) {
210
                if (($sum -= $distances[$point]) > 0) {
211
                    continue;
212
                }
213
214
                $clusters[] = new Cluster($this, $point->getCoordinates());
215
                break;
216
            }
217
        }
218
219
        return $clusters;
220
    }
221
}
222