Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Clustering/FuzzyCMeans.php (1 issue)

Check for implicit conversion of array to boolean.

Best Practice Bug Minor

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering;
6
7
use Phpml\Clustering\KMeans\Cluster;
8
use Phpml\Clustering\KMeans\Point;
9
use Phpml\Clustering\KMeans\Space;
10
use Phpml\Exception\InvalidArgumentException;
11
use Phpml\Math\Distance\Euclidean;
12
13
class FuzzyCMeans implements Clusterer
14
{
15
    /**
16
     * @var int
17
     */
18
    private $clustersNumber;
19
20
    /**
21
     * @var array|Cluster[]
22
     */
23
    private $clusters = null;
24
25
    /**
26
     * @var Space
27
     */
28
    private $space;
29
30
    /**
31
     * @var array|float[][]
32
     */
33
    private $membership = [];
34
35
    /**
36
     * @var float
37
     */
38
    private $fuzziness;
39
40
    /**
41
     * @var float
42
     */
43
    private $epsilon;
44
45
    /**
46
     * @var int
47
     */
48
    private $maxIterations;
49
50
    /**
51
     * @var int
52
     */
53
    private $sampleCount;
54
55
    /**
56
     * @var array
57
     */
58
    private $samples = [];
59
60
    /**
61
     * @throws InvalidArgumentException
62
     */
63
    public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
64
    {
65
        if ($clustersNumber <= 0) {
66
            throw InvalidArgumentException::invalidClustersNumber();
67
        }
68
69
        $this->clustersNumber = $clustersNumber;
70
        $this->fuzziness = $fuzziness;
71
        $this->epsilon = $epsilon;
72
        $this->maxIterations = $maxIterations;
73
    }
74
75
    public function getMembershipMatrix(): array
76
    {
77
        return $this->membership;
78
    }
79
80
    /**
81
     * @param Point[]|int[][] $samples
82
     */
83
    public function cluster(array $samples): array
84
    {
85
        // Initialize variables, clusters and membership matrix
86
        $this->sampleCount = count($samples);
87
        $this->samples = &$samples;
88
        $this->space = new Space(count($samples[0]));
89
        $this->initClusters();
90
91
        // Our goal is minimizing the objective value while
92
        // executing the clustering steps at a maximum number of iterations
93
        $lastObjective = 0.0;
94
        $iterations = 0;
95
        do {
96
            // Update the membership matrix and cluster centers, respectively
97
            $this->updateMembershipMatrix();
98
            $this->updateClusters();
99
100
            // Calculate the new value of the objective function
101
            $objectiveVal = $this->getObjective();
102
            $difference = abs($lastObjective - $objectiveVal);
103
            $lastObjective = $objectiveVal;
104
        } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
105
106
        // Attach (hard cluster) each data point to the nearest cluster
107
        for ($k = 0; $k < $this->sampleCount; ++$k) {
108
            $column = array_column($this->membership, $k);
109
            arsort($column);
110
            reset($column);
111
            $i = key($column);
112
            $cluster = $this->clusters[$i];
113
            $cluster->attach(new Point($this->samples[$k]));
114
        }
115
116
        // Return grouped samples
117
        $grouped = [];
118
        foreach ($this->clusters as $cluster) {
119
            $grouped[] = $cluster->getPoints();
120
        }
121
122
        return $grouped;
123
    }
124
125
    protected function initClusters(): void
126
    {
127
        // Membership array is a matrix of cluster number by sample counts
128
        // We initilize the membership array with random values
129
        $dim = $this->space->getDimension();
130
        $this->generateRandomMembership($dim, $this->sampleCount);
131
        $this->updateClusters();
132
    }
133
134
    protected function generateRandomMembership(int $rows, int $cols): void
135
    {
136
        $this->membership = [];
137
        for ($i = 0; $i < $rows; ++$i) {
138
            $row = [];
139
            $total = 0.0;
140
            for ($k = 0; $k < $cols; ++$k) {
141
                $val = random_int(1, 5) / 10.0;
142
                $row[] = $val;
143
                $total += $val;
144
            }
145
146
            $this->membership[] = array_map(function ($val) use ($total) {
147
                return $val / $total;
148
            }, $row);
149
        }
150
    }
151
152
    protected function updateClusters(): void
153
    {
154
        $dim = $this->space->getDimension();
155
        if (!$this->clusters) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->clusters of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
156
            $this->clusters = [];
157
            for ($i = 0; $i < $this->clustersNumber; ++$i) {
158
                $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
159
            }
160
        }
161
162
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
163
            $cluster = $this->clusters[$i];
164
            $center = $cluster->getCoordinates();
165
            for ($k = 0; $k < $dim; ++$k) {
166
                $a = $this->getMembershipRowTotal($i, $k, true);
167
                $b = $this->getMembershipRowTotal($i, $k, false);
168
                $center[$k] = $a / $b;
169
            }
170
171
            $cluster->setCoordinates($center);
172
        }
173
    }
174
175
    protected function getMembershipRowTotal(int $row, int $col, bool $multiply)
176
    {
177
        $sum = 0.0;
178
        for ($k = 0; $k < $this->sampleCount; ++$k) {
179
            $val = pow($this->membership[$row][$k], $this->fuzziness);
180
            if ($multiply) {
181
                $val *= $this->samples[$k][$col];
182
            }
183
184
            $sum += $val;
185
        }
186
187
        return $sum;
188
    }
189
190
    protected function updateMembershipMatrix(): void
191
    {
192
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
193
            for ($k = 0; $k < $this->sampleCount; ++$k) {
194
                $distCalc = $this->getDistanceCalc($i, $k);
195
                $this->membership[$i][$k] = 1.0 / $distCalc;
196
            }
197
        }
198
    }
199
200
    protected function getDistanceCalc(int $row, int $col): float
201
    {
202
        $sum = 0.0;
203
        $distance = new Euclidean();
204
        $dist1 = $distance->distance(
205
            $this->clusters[$row]->getCoordinates(),
206
            $this->samples[$col]
207
        );
208
209
        for ($j = 0; $j < $this->clustersNumber; ++$j) {
210
            $dist2 = $distance->distance(
211
                $this->clusters[$j]->getCoordinates(),
212
                $this->samples[$col]
213
            );
214
215
            $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1));
216
            $sum += $val;
217
        }
218
219
        return $sum;
220
    }
221
222
    /**
223
     * The objective is to minimize the distance between all data points
224
     * and all cluster centers. This method returns the summation of all
225
     * these distances
226
     */
227
    protected function getObjective()
228
    {
229
        $sum = 0.0;
230
        $distance = new Euclidean();
231
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
232
            $clust = $this->clusters[$i]->getCoordinates();
233
            for ($k = 0; $k < $this->sampleCount; ++$k) {
234
                $point = $this->samples[$k];
235
                $sum += $distance->distance($clust, $point);
236
            }
237
        }
238
239
        return $sum;
240
    }
241
}
242