Completed
Push — develop ( 01a249...bb9e1a )
by Arkadiusz
02:44
created

Space::initializeClusters()   C

Complexity

Conditions 11
Paths 4

Size

Total Lines 59
Code Lines 28

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
c 2
b 0
f 0
dl 0
loc 59
rs 6.3545
cc 11
eloc 28
nc 4
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare (strict_types = 1);
4
5
namespace Phpml\Clustering\KMeans;
6
7
use Phpml\Clustering\KMeans;
8
use SplObjectStorage;
9
use LogicException;
10
use InvalidArgumentException;
11
12
class Space extends SplObjectStorage
13
{
14
    /**
15
     * @var int
16
     */
17
    protected $dimension;
18
19
    /**
20
     * @param $dimension
21
     */
22
    public function __construct($dimension)
23
    {
24
        if ($dimension < 1) {
25
            throw new LogicException('a space dimension cannot be null or negative');
26
        }
27
28
        $this->dimension = $dimension;
29
    }
30
31
    /**
32
     * @return array
33
     */
34
    public function toArray()
35
    {
36
        $points = [];
37
        foreach ($this as $point) {
38
            $points[] = $point->toArray();
39
        }
40
41
        return ['points' => $points];
42
    }
43
44
    /**
45
     * @param array $coordinates
46
     *
47
     * @return Point
48
     */
49
    public function newPoint(array $coordinates)
50
    {
51
        if (count($coordinates) != $this->dimension) {
52
            throw new LogicException('('.implode(',', $coordinates).') is not a point of this space');
53
        }
54
55
        return new Point($coordinates);
56
    }
57
58
    /**
59
     * @param array $coordinates
60
     * @param null  $data
61
     */
62
    public function addPoint(array $coordinates, $data = null)
63
    {
64
        return $this->attach($this->newPoint($coordinates), $data);
65
    }
66
67
    /**
68
     * @param object $point
69
     * @param null   $data
70
     */
71
    public function attach($point, $data = null)
72
    {
73
        if (!$point instanceof Point) {
74
            throw new InvalidArgumentException('can only attach points to spaces');
75
        }
76
77
        return parent::attach($point, $data);
78
    }
79
80
    /**
81
     * @return int
82
     */
83
    public function getDimension()
84
    {
85
        return $this->dimension;
86
    }
87
88
    /**
89
     * @return array|bool
90
     */
91
    public function getBoundaries()
92
    {
93
        if (!count($this)) {
94
            return false;
95
        }
96
97
        $min = $this->newPoint(array_fill(0, $this->dimension, null));
98
        $max = $this->newPoint(array_fill(0, $this->dimension, null));
99
100
        foreach ($this as $point) {
101
            for ($n = 0; $n < $this->dimension; ++$n) {
102
                ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n];
103
                ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n];
104
            }
105
        }
106
107
        return array($min, $max);
108
    }
109
110
    /**
111
     * @param Point $min
112
     * @param Point $max
113
     *
114
     * @return Point
115
     */
116
    public function getRandomPoint(Point $min, Point $max)
117
    {
118
        $point = $this->newPoint(array_fill(0, $this->dimension, null));
119
120
        for ($n = 0; $n < $this->dimension; ++$n) {
121
            $point[$n] = rand($min[$n], $max[$n]);
122
        }
123
124
        return $point;
125
    }
126
127
    /**
128
     * @param $nbClusters
129
     * @param int  $seed
130
     * @param null $iterationCallback
131
     *
132
     * @return array|Cluster[]
133
     */
134
    public function solve($nbClusters, $seed = KMeans::INIT_RANDOM, $iterationCallback = null)
135
    {
136
        if ($iterationCallback && !is_callable($iterationCallback)) {
137
            throw new InvalidArgumentException('invalid iteration callback');
138
        }
139
140
        // initialize K clusters
141
        $clusters = $this->initializeClusters($nbClusters, $seed);
142
143
        // there's only one cluster, clusterization has no meaning
144
        if (count($clusters) == 1) {
145
            return $clusters[0];
146
        }
147
148
        // until convergence is reached
149
        do {
150
            $iterationCallback && $iterationCallback($this, $clusters);
151
        } while ($this->iterate($clusters));
152
153
        // clustering is done.
154
        return $clusters;
155
    }
156
157
    /**
158
     * @param $nbClusters
159
     * @param $seed
160
     *
161
     * @return array
162
     */
163
    protected function initializeClusters($nbClusters, $seed)
164
    {
165
        if ($nbClusters <= 0) {
166
            throw new InvalidArgumentException('invalid clusters number');
167
        }
168
169
        switch ($seed) {
170
            // the default seeding method chooses completely random centroid
171
            case KMeans::INIT_RANDOM:
172
                // get the space boundaries to avoid placing clusters centroid too far from points
173
                list($min, $max) = $this->getBoundaries();
174
175
                // initialize N clusters with a random point within space boundaries
176
                for ($n = 0; $n < $nbClusters; ++$n) {
177
                    $clusters[] = new Cluster($this, $this->getRandomPoint($min, $max)->getCoordinates());
0 ignored issues
show
Coding Style Comprehensibility introduced by
$clusters was never initialized. Although not strictly required by PHP, it is generally a good practice to add $clusters = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
178
                }
179
180
                break;
181
182
            // the DASV seeding method consists of finding good initial centroids for the clusters
183
            case KMeans::INIT_KMEANS_PLUS_PLUS:
184
                // find a random point
185
                $position = rand(1, count($this));
186
                for ($i = 1, $this->rewind(); $i < $position && $this->valid(); $i++, $this->next());
187
                $clusters[] = new Cluster($this, $this->current()->getCoordinates());
0 ignored issues
show
Coding Style Comprehensibility introduced by
$clusters was never initialized. Although not strictly required by PHP, it is generally a good practice to add $clusters = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
188
189
                // retains the distances between points and their closest clusters
190
                $distances = new SplObjectStorage();
191
192
                // create k clusters
193
                for ($i = 1; $i < $nbClusters; ++$i) {
194
                    $sum = 0;
195
196
                    // for each points, get the distance with the closest centroid already choosen
197
                    foreach ($this as $point) {
198
                        $distance = $point->getDistanceWith($point->getClosest($clusters));
199
                        $sum += $distances[$point] = $distance;
200
                    }
201
202
                    // choose a new random point using a weighted probability distribution
203
                    $sum = rand(0, (int) $sum);
204
                    foreach ($this as $point) {
205
                        if (($sum -= $distances[$point]) > 0) {
206
                            continue;
207
                        }
208
209
                        $clusters[] = new Cluster($this, $point->getCoordinates());
210
                        break;
211
                    }
212
                }
213
214
                break;
215
        }
216
217
        // assing all points to the first cluster
218
        $clusters[0]->attachAll($this);
0 ignored issues
show
Bug introduced by
The variable $clusters does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
219
220
        return $clusters;
221
    }
222
223
    /**
224
     * @param $clusters
225
     *
226
     * @return bool
227
     */
228
    protected function iterate($clusters)
229
    {
230
        $continue = false;
231
232
        // migration storages
233
        $attach = new SplObjectStorage();
234
        $detach = new SplObjectStorage();
235
236
        // calculate proximity amongst points and clusters
237
        foreach ($clusters as $cluster) {
238
            foreach ($cluster as $point) {
239
                // find the closest cluster
240
                $closest = $point->getClosest($clusters);
241
242
                // move the point from its old cluster to its closest
243
                if ($closest !== $cluster) {
244
                    isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage();
245
                    isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage();
246
247
                    $attach[$closest]->attach($point);
248
                    $detach[$cluster]->attach($point);
249
250
                    $continue = true;
251
                }
252
            }
253
        }
254
255
        // perform points migrations
256
        foreach ($attach as $cluster) {
257
            $cluster->attachAll($attach[$cluster]);
258
        }
259
260
        foreach ($detach as $cluster) {
261
            $cluster->detachAll($detach[$cluster]);
262
        }
263
264
        // update all cluster's centroids
265
        foreach ($clusters as $cluster) {
266
            $cluster->updateCentroid();
267
        }
268
269
        return $continue;
270
    }
271
}
272