Passed
Push — master ( f7537c...ff80af )
by Arkadiusz
02:31
created

src/Phpml/Clustering/FuzzyCMeans.php (1 issue)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering;
6
7
use Phpml\Clustering\KMeans\Cluster;
8
use Phpml\Clustering\KMeans\Point;
9
use Phpml\Clustering\KMeans\Space;
10
use Phpml\Exception\InvalidArgumentException;
11
use Phpml\Math\Distance\Euclidean;
12
13
class FuzzyCMeans implements Clusterer
14
{
15
    /**
16
     * @var int
17
     */
18
    private $clustersNumber;
19
20
    /**
21
     * @var array|Cluster[]
22
     */
23
    private $clusters = null;
24
25
    /**
26
     * @var Space
27
     */
28
    private $space;
29
30
    /**
31
     * @var array|float[][]
32
     */
33
    private $membership;
34
35
    /**
36
     * @var float
37
     */
38
    private $fuzziness;
39
40
    /**
41
     * @var float
42
     */
43
    private $epsilon;
44
45
    /**
46
     * @var int
47
     */
48
    private $maxIterations;
49
50
    /**
51
     * @var int
52
     */
53
    private $sampleCount;
54
55
    /**
56
     * @var array
57
     */
58
    private $samples;
59
60
    /**
61
     * @throws InvalidArgumentException
62
     */
63
    public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
64
    {
65
        if ($clustersNumber <= 0) {
66
            throw InvalidArgumentException::invalidClustersNumber();
67
        }
68
        $this->clustersNumber = $clustersNumber;
69
        $this->fuzziness = $fuzziness;
70
        $this->epsilon = $epsilon;
71
        $this->maxIterations = $maxIterations;
72
    }
73
74
    protected function initClusters(): void
75
    {
76
        // Membership array is a matrix of cluster number by sample counts
77
        // We initilize the membership array with random values
78
        $dim = $this->space->getDimension();
79
        $this->generateRandomMembership($dim, $this->sampleCount);
80
        $this->updateClusters();
81
    }
82
83
    protected function generateRandomMembership(int $rows, int $cols): void
84
    {
85
        $this->membership = [];
86
        for ($i = 0; $i < $rows; ++$i) {
87
            $row = [];
88
            $total = 0.0;
89
            for ($k = 0; $k < $cols; ++$k) {
90
                $val = rand(1, 5) / 10.0;
91
                $row[] = $val;
92
                $total += $val;
93
            }
94
95
            $this->membership[] = array_map(function ($val) use ($total) {
96
                return $val / $total;
97
            }, $row);
98
        }
99
    }
100
101
    protected function updateClusters(): void
102
    {
103
        $dim = $this->space->getDimension();
104
        if (!$this->clusters) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->clusters of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
105
            $this->clusters = [];
106
            for ($i = 0; $i < $this->clustersNumber; ++$i) {
107
                $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
108
            }
109
        }
110
111
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
112
            $cluster = $this->clusters[$i];
113
            $center = $cluster->getCoordinates();
114
            for ($k = 0; $k < $dim; ++$k) {
115
                $a = $this->getMembershipRowTotal($i, $k, true);
116
                $b = $this->getMembershipRowTotal($i, $k, false);
117
                $center[$k] = $a / $b;
118
            }
119
120
            $cluster->setCoordinates($center);
121
        }
122
    }
123
124
    protected function getMembershipRowTotal(int $row, int $col, bool $multiply)
125
    {
126
        $sum = 0.0;
127
        for ($k = 0; $k < $this->sampleCount; ++$k) {
128
            $val = pow($this->membership[$row][$k], $this->fuzziness);
129
            if ($multiply) {
130
                $val *= $this->samples[$k][$col];
131
            }
132
133
            $sum += $val;
134
        }
135
136
        return $sum;
137
    }
138
139
    protected function updateMembershipMatrix(): void
140
    {
141
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
142
            for ($k = 0; $k < $this->sampleCount; ++$k) {
143
                $distCalc = $this->getDistanceCalc($i, $k);
144
                $this->membership[$i][$k] = 1.0 / $distCalc;
145
            }
146
        }
147
    }
148
149
    protected function getDistanceCalc(int $row, int $col) : float
150
    {
151
        $sum = 0.0;
152
        $distance = new Euclidean();
153
        $dist1 = $distance->distance(
154
                $this->clusters[$row]->getCoordinates(),
155
                $this->samples[$col]
156
        );
157
158
        for ($j = 0; $j < $this->clustersNumber; ++$j) {
159
            $dist2 = $distance->distance(
160
                $this->clusters[$j]->getCoordinates(),
161
                $this->samples[$col]
162
            );
163
164
            $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1));
165
            $sum += $val;
166
        }
167
168
        return $sum;
169
    }
170
171
    /**
172
     * The objective is to minimize the distance between all data points
173
     * and all cluster centers. This method returns the summation of all
174
     * these distances
175
     */
176
    protected function getObjective()
177
    {
178
        $sum = 0.0;
179
        $distance = new Euclidean();
180
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
181
            $clust = $this->clusters[$i]->getCoordinates();
182
            for ($k = 0; $k < $this->sampleCount; ++$k) {
183
                $point = $this->samples[$k];
184
                $sum += $distance->distance($clust, $point);
185
            }
186
        }
187
188
        return $sum;
189
    }
190
191
    public function getMembershipMatrix() : array
192
    {
193
        return $this->membership;
194
    }
195
196
    /**
197
     * @param array|Point[] $samples
198
     */
199
    public function cluster(array $samples) : array
200
    {
201
        // Initialize variables, clusters and membership matrix
202
        $this->sampleCount = count($samples);
203
        $this->samples = &$samples;
204
        $this->space = new Space(count($samples[0]));
205
        $this->initClusters();
206
207
        // Our goal is minimizing the objective value while
208
        // executing the clustering steps at a maximum number of iterations
209
        $lastObjective = 0.0;
210
        $iterations = 0;
211
        do {
212
            // Update the membership matrix and cluster centers, respectively
213
            $this->updateMembershipMatrix();
214
            $this->updateClusters();
215
216
            // Calculate the new value of the objective function
217
            $objectiveVal = $this->getObjective();
218
            $difference = abs($lastObjective - $objectiveVal);
219
            $lastObjective = $objectiveVal;
220
        } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
221
222
        // Attach (hard cluster) each data point to the nearest cluster
223
        for ($k = 0; $k < $this->sampleCount; ++$k) {
224
            $column = array_column($this->membership, $k);
225
            arsort($column);
226
            reset($column);
227
            $i = key($column);
228
            $cluster = $this->clusters[$i];
229
            $cluster->attach(new Point($this->samples[$k]));
230
        }
231
232
        // Return grouped samples
233
        $grouped = [];
234
        foreach ($this->clusters as $cluster) {
235
            $grouped[] = $cluster->getPoints();
236
        }
237
238
        return $grouped;
239
    }
240
}
241