Test Setup Failed
Push — master ( 5e02b8...d3888e )
by Arkadiusz
11:41
created

src/Clustering/FuzzyCMeans.php (1 issue)

Labels
Severity

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Clustering;
6
7
use Phpml\Clustering\KMeans\Cluster;
8
use Phpml\Clustering\KMeans\Point;
9
use Phpml\Clustering\KMeans\Space;
10
use Phpml\Exception\InvalidArgumentException;
11
use Phpml\Math\Distance\Euclidean;
12
13
class FuzzyCMeans implements Clusterer
14
{
15
    /**
16
     * @var int
17
     */
18
    private $clustersNumber;
19
20
    /**
21
     * @var Cluster[]
22
     */
23
    private $clusters = [];
24
25
    /**
26
     * @var Space
27
     */
28
    private $space;
29
30
    /**
31
     * @var float[][]
32
     */
33
    private $membership = [];
34
35
    /**
36
     * @var float
37
     */
38
    private $fuzziness;
39
40
    /**
41
     * @var float
42
     */
43
    private $epsilon;
44
45
    /**
46
     * @var int
47
     */
48
    private $maxIterations;
49
50
    /**
51
     * @var int
52
     */
53
    private $sampleCount;
54
55
    /**
56
     * @var array
57
     */
58
    private $samples = [];
59
60
    /**
61
     * @throws InvalidArgumentException
62
     */
63
    public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
64
    {
65
        if ($clustersNumber <= 0) {
66
            throw new InvalidArgumentException('Invalid clusters number');
67
        }
68
69
        $this->clustersNumber = $clustersNumber;
70
        $this->fuzziness = $fuzziness;
71
        $this->epsilon = $epsilon;
72
        $this->maxIterations = $maxIterations;
73
    }
74
75
    public function getMembershipMatrix(): array
76
    {
77
        return $this->membership;
78
    }
79
80
    /**
81
     * @param Point[]|int[][] $samples
82
     */
83
    public function cluster(array $samples): array
84
    {
85
        // Initialize variables, clusters and membership matrix
86
        $this->sampleCount = count($samples);
87
        $this->samples = &$samples;
88
        $this->space = new Space(count($samples[0]));
89
        $this->initClusters();
90
91
        // Our goal is minimizing the objective value while
92
        // executing the clustering steps at a maximum number of iterations
93
        $lastObjective = 0.0;
94
        $iterations = 0;
95
        do {
96
            // Update the membership matrix and cluster centers, respectively
97
            $this->updateMembershipMatrix();
98
            $this->updateClusters();
99
100
            // Calculate the new value of the objective function
101
            $objectiveVal = $this->getObjective();
102
            $difference = abs($lastObjective - $objectiveVal);
103
            $lastObjective = $objectiveVal;
104
        } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
105
106
        // Attach (hard cluster) each data point to the nearest cluster
107
        for ($k = 0; $k < $this->sampleCount; ++$k) {
108
            $column = array_column($this->membership, $k);
109
            arsort($column);
110
            reset($column);
111
            $cluster = $this->clusters[key($column)];
112
            $cluster->attach(new Point($this->samples[$k]));
0 ignored issues
show
It seems like $this->samples[$k] can also be of type object<Phpml\Clustering\KMeans\Point>; however, Phpml\Clustering\KMeans\Point::__construct() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
113
        }
114
115
        // Return grouped samples
116
        $grouped = [];
117
        foreach ($this->clusters as $cluster) {
118
            $grouped[] = $cluster->getPoints();
119
        }
120
121
        return $grouped;
122
    }
123
124
    protected function initClusters(): void
125
    {
126
        // Membership array is a matrix of cluster number by sample counts
127
        // We initilize the membership array with random values
128
        $dim = $this->space->getDimension();
129
        $this->generateRandomMembership($dim, $this->sampleCount);
130
        $this->updateClusters();
131
    }
132
133
    protected function generateRandomMembership(int $rows, int $cols): void
134
    {
135
        $this->membership = [];
136
        for ($i = 0; $i < $rows; ++$i) {
137
            $row = [];
138
            $total = 0.0;
139
            for ($k = 0; $k < $cols; ++$k) {
140
                $val = random_int(1, 5) / 10.0;
141
                $row[] = $val;
142
                $total += $val;
143
            }
144
145
            $this->membership[] = array_map(function ($val) use ($total) {
146
                return $val / $total;
147
            }, $row);
148
        }
149
    }
150
151
    protected function updateClusters(): void
152
    {
153
        $dim = $this->space->getDimension();
154
        if (count($this->clusters) === 0) {
155
            for ($i = 0; $i < $this->clustersNumber; ++$i) {
156
                $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
157
            }
158
        }
159
160
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
161
            $cluster = $this->clusters[$i];
162
            $center = $cluster->getCoordinates();
163
            for ($k = 0; $k < $dim; ++$k) {
164
                $a = $this->getMembershipRowTotal($i, $k, true);
165
                $b = $this->getMembershipRowTotal($i, $k, false);
166
                $center[$k] = $a / $b;
167
            }
168
169
            $cluster->setCoordinates($center);
170
        }
171
    }
172
173
    protected function getMembershipRowTotal(int $row, int $col, bool $multiply): float
174
    {
175
        $sum = 0.0;
176
        for ($k = 0; $k < $this->sampleCount; ++$k) {
177
            $val = $this->membership[$row][$k] ** $this->fuzziness;
178
            if ($multiply) {
179
                $val *= $this->samples[$k][$col];
180
            }
181
182
            $sum += $val;
183
        }
184
185
        return $sum;
186
    }
187
188
    protected function updateMembershipMatrix(): void
189
    {
190
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
191
            for ($k = 0; $k < $this->sampleCount; ++$k) {
192
                $distCalc = $this->getDistanceCalc($i, $k);
193
                $this->membership[$i][$k] = 1.0 / $distCalc;
194
            }
195
        }
196
    }
197
198
    protected function getDistanceCalc(int $row, int $col): float
199
    {
200
        $sum = 0.0;
201
        $distance = new Euclidean();
202
        $dist1 = $distance->distance(
203
            $this->clusters[$row]->getCoordinates(),
204
            $this->samples[$col]
205
        );
206
207
        for ($j = 0; $j < $this->clustersNumber; ++$j) {
208
            $dist2 = $distance->distance(
209
                $this->clusters[$j]->getCoordinates(),
210
                $this->samples[$col]
211
            );
212
213
            $val = ($dist1 / $dist2) ** 2.0 / ($this->fuzziness - 1);
214
            $sum += $val;
215
        }
216
217
        return $sum;
218
    }
219
220
    /**
221
     * The objective is to minimize the distance between all data points
222
     * and all cluster centers. This method returns the summation of all
223
     * these distances
224
     */
225
    protected function getObjective(): float
226
    {
227
        $sum = 0.0;
228
        $distance = new Euclidean();
229
        for ($i = 0; $i < $this->clustersNumber; ++$i) {
230
            $clust = $this->clusters[$i]->getCoordinates();
231
            for ($k = 0; $k < $this->sampleCount; ++$k) {
232
                $point = $this->samples[$k];
233
                $sum += $distance->distance($clust, $point);
234
            }
235
        }
236
237
        return $sum;
238
    }
239
}
240