Passed
Pull Request — master (#262)
by
unknown
03:02
created

Space   B

Complexity

Total Complexity 40

Size/Duplication

Total Lines 221
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
wmc 40
lcom 1
cbo 2
dl 0
loc 221
rs 8.2608
c 0
b 0
f 0

14 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 2
A toArray() 0 9 2
A addPoint() 0 4 1
A newPoint() 0 8 2
A addPointWithLabels() 0 4 1
A attach() 0 8 2
A getDimension() 0 4 1
B getBoundaries() 0 18 8
A getRandomPoint() 0 10 2
A cluster() 0 9 2
A initializeClusters() 0 21 3
C iterate() 0 37 7
B initializeKMPPClusters() 0 30 5
A initializeRandomClusters() 0 11 2

How to fix   Complexity   

Complex Class

Complex classes like Space often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Space, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering\KMeans;
6
7
use InvalidArgumentException;
8
use LogicException;
9
use Phpml\Clustering\KMeans;
10
use SplObjectStorage;
11
12
class Space extends SplObjectStorage
13
{
14
    /**
15
     * @var int
16
     */
17
    protected $dimension;
18
19
    public function __construct(int $dimension)
20
    {
21
        if ($dimension < 1) {
22
            throw new LogicException('a space dimension cannot be null or negative');
23
        }
24
25
        $this->dimension = $dimension;
26
    }
27
28
    public function toArray(): array
29
    {
30
        $points = [];
31
        foreach ($this as $point) {
32
            $points[] = $point->toArray();
33
        }
34
35
        return ['points' => $points];
36
    }
37
38
    public function newPoint(array $coordinates, $label = null): Point
39
    {
40
        if (count($coordinates) != $this->dimension) {
41
            throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
42
        }
43
44
        return new Point($coordinates, $label);
45
    }
46
47
    /**
48
     * @param null $data
49
     */
50
    public function addPoint(array $coordinates, $data = null): void
51
    {
52
        $this->attach($this->newPoint($coordinates), $data);
53
    }
54
55
    /**
56
     * @param null $label
57
     */
58
    public function addPointWithLabels(array $coordinates, $label = null): void
59
    {
60
        $this->attach($this->newPoint($coordinates, $label));
61
    }
62
63
    /**
64
     * @param Point $point
65
     * @param null  $data
66
     */
67
    public function attach($point, $data = null): void
68
    {
69
        if (!$point instanceof Point) {
70
            throw new InvalidArgumentException('can only attach points to spaces');
71
        }
72
73
        parent::attach($point, $data);
74
    }
75
76
    public function getDimension(): int
77
    {
78
        return $this->dimension;
79
    }
80
81
    /**
82
     * @return array|bool
83
     */
84
    public function getBoundaries()
85
    {
86
        if (count($this) === 0) {
87
            return false;
88
        }
89
90
        $min = $this->newPoint(array_fill(0, $this->dimension, null));
91
        $max = $this->newPoint(array_fill(0, $this->dimension, null));
92
93
        foreach ($this as $point) {
94
            for ($n = 0; $n < $this->dimension; ++$n) {
95
                ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
96
                ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
97
            }
98
        }
99
100
        return [$min, $max];
101
    }
102
103
    public function getRandomPoint(Point $min, Point $max): Point
104
    {
105
        $point = $this->newPoint(array_fill(0, $this->dimension, null));
106
107
        for ($n = 0; $n < $this->dimension; ++$n) {
108
            $point[$n] = random_int($min[$n], $max[$n]);
109
        }
110
111
        return $point;
112
    }
113
114
    /**
115
     * @return array|Cluster[]
116
     */
117
    public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array
118
    {
119
        $clusters = $this->initializeClusters($clustersNumber, $initMethod);
120
121
        do {
0 ignored issues
show
Unused Code introduced by
This do loop is empty and can be removed.

This check looks for do loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

Consider removing the loop.

Loading history...
122
        } while (!$this->iterate($clusters));
123
124
        return $clusters;
125
    }
126
127
    /**
128
     * @return array|Cluster[]
129
     */
130
    protected function initializeClusters(int $clustersNumber, int $initMethod): array
131
    {
132
        switch ($initMethod) {
133
            case KMeans::INIT_RANDOM:
134
                $clusters = $this->initializeRandomClusters($clustersNumber);
135
136
                break;
137
138
            case KMeans::INIT_KMEANS_PLUS_PLUS:
139
                $clusters = $this->initializeKMPPClusters($clustersNumber);
140
141
                break;
142
143
            default:
144
                return [];
145
        }
146
147
        $clusters[0]->attachAll($this);
148
149
        return $clusters;
150
    }
151
152
    protected function iterate($clusters): bool
153
    {
154
        $convergence = true;
155
156
        $attach = new SplObjectStorage();
157
        $detach = new SplObjectStorage();
158
159
        foreach ($clusters as $cluster) {
160
            foreach ($cluster as $point) {
161
                $closest = $point->getClosest($clusters);
162
163
                if ($closest !== $cluster) {
164
                    $attach[$closest] ?? $attach[$closest] = new SplObjectStorage();
165
                    $detach[$cluster] ?? $detach[$cluster] = new SplObjectStorage();
166
167
                    $attach[$closest]->attach($point);
168
                    $detach[$cluster]->attach($point);
169
170
                    $convergence = false;
171
                }
172
            }
173
        }
174
175
        foreach ($attach as $cluster) {
176
            $cluster->attachAll($attach[$cluster]);
177
        }
178
179
        foreach ($detach as $cluster) {
180
            $cluster->detachAll($detach[$cluster]);
181
        }
182
183
        foreach ($clusters as $cluster) {
184
            $cluster->updateCentroid();
185
        }
186
187
        return $convergence;
188
    }
189
190
    protected function initializeKMPPClusters(int $clustersNumber): array
191
    {
192
        $clusters = [];
193
        $this->rewind();
194
195
        $clusters[] = new Cluster($this, $this->current()->getCoordinates());
196
197
        $distances = new SplObjectStorage();
198
199
        for ($i = 1; $i < $clustersNumber; ++$i) {
200
            $sum = 0;
201
            foreach ($this as $point) {
202
                $distance = $point->getDistanceWith($point->getClosest($clusters));
203
                $sum += $distances[$point] = $distance;
204
            }
205
206
            $sum = random_int(0, (int) $sum);
207
            foreach ($this as $point) {
208
                if (($sum -= $distances[$point]) > 0) {
209
                    continue;
210
                }
211
212
                $clusters[] = new Cluster($this, $point->getCoordinates());
213
214
                break;
215
            }
216
        }
217
218
        return $clusters;
219
    }
220
221
    private function initializeRandomClusters(int $clustersNumber): array
222
    {
223
        $clusters = [];
224
        [$min, $max] = $this->getBoundaries();
0 ignored issues
show
Bug introduced by
The variable $min does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $max does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
225
226
        for ($n = 0; $n < $clustersNumber; ++$n) {
227
            $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
228
        }
229
230
        return $clusters;
231
    }
232
}
233