Completed
Push — master ( 95fc13...87396e )
by Arkadiusz
02:45
created

FuzzyCMeans   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 232
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 0
Metric Value
wmc 28
lcom 1
cbo 4
dl 0
loc 232
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 2
A initClusters() 0 8 1
A generateRandomMembership() 0 16 3
B updateClusters() 0 21 5
A getMembershipRowTotal() 0 12 3
A updateMembershipMatrix() 0 9 3
A getDistanceCalc() 0 16 2
A getObjective() 0 13 3
A getMembershipMatrix() 0 4 1
B cluster() 0 41 5
1
<?php
2
declare(strict_types=1);
3
4
namespace Phpml\Clustering;
5
6
use Phpml\Clustering\KMeans\Point;
7
use Phpml\Clustering\KMeans\Cluster;
8
use Phpml\Clustering\KMeans\Space;
9
use Phpml\Math\Distance\Euclidean;
10
11
class FuzzyCMeans implements Clusterer
12
{
13
    /**
14
     * @var int
15
     */
16
    private $clustersNumber;
17
18
    /**
19
     * @var array|Cluster[]
20
     */
21
    private $clusters = null;
22
23
    /**
24
     * @var Space
25
     */
26
    private $space;
27
    /**
28
     * @var array|float[][]
29
     */
30
    private $membership;
31
    /**
32
     * @var float
33
     */
34
    private $fuzziness;
35
36
    /**
37
     * @var float
38
     */
39
    private $epsilon;
40
41
    /**
42
     * @var int
43
     */
44
    private $maxIterations;
45
46
    /**
47
     * @var int
48
     */
49
    private $sampleCount;
50
51
    /**
52
     * @var array
53
     */
54
    private $samples;
55
56
    /**
57
     * @param int $clustersNumber
58
     *
59
     * @throws InvalidArgumentException
60
     */
61
    public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
62
    {
63
        if ($clustersNumber <= 0) {
64
            throw InvalidArgumentException::invalidClustersNumber();
65
        }
66
        $this->clustersNumber = $clustersNumber;
67
        $this->fuzziness = $fuzziness;
68
        $this->epsilon = $epsilon;
69
        $this->maxIterations = $maxIterations;
70
    }
71
72
    protected function initClusters()
73
    {
74
        // Membership array is a matrix of cluster number by sample counts
75
        // We initilize the membership array with random values
76
        $dim = $this->space->getDimension();
77
        $this->generateRandomMembership($dim, $this->sampleCount);
78
        $this->updateClusters();
79
    }
80
81
    /**
82
     * @param int $rows
83
     * @param int $cols
84
     */
85
    protected function generateRandomMembership(int $rows, int $cols)
86
    {
87
        $this->membership = [];
88
        for ($i=0; $i < $rows; $i++) {
89
            $row = [];
90
            $total = 0.0;
91
            for ($k=0; $k < $cols; $k++) {
92
                $val = rand(1, 5) / 10.0;
93
                $row[] = $val;
94
                $total += $val;
95
            }
96
            $this->membership[] = array_map(function ($val) use ($total) {
97
                return $val / $total;
98
            }, $row);
99
        }
100
    }
101
102
    protected function updateClusters()
103
    {
104
        $dim = $this->space->getDimension();
105
        if (! $this->clusters) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->clusters of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
106
            $this->clusters = [];
107
            for ($i=0; $i<$this->clustersNumber; $i++) {
108
                $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
109
            }
110
        }
111
112
        for ($i=0; $i<$this->clustersNumber; $i++) {
113
            $cluster = $this->clusters[$i];
114
            $center = $cluster->getCoordinates();
115
            for ($k=0; $k<$dim; $k++) {
116
                $a = $this->getMembershipRowTotal($i, $k, true);
117
                $b = $this->getMembershipRowTotal($i, $k, false);
118
                $center[$k] = $a / $b;
119
            }
120
            $cluster->setCoordinates($center);
121
        }
122
    }
123
124
    protected function getMembershipRowTotal(int $row, int $col, bool $multiply)
125
    {
126
        $sum = 0.0;
127
        for ($k = 0; $k < $this->sampleCount; $k++) {
128
            $val = pow($this->membership[$row][$k], $this->fuzziness);
129
            if ($multiply) {
130
                $val *= $this->samples[$k][$col];
131
            }
132
            $sum += $val;
133
        }
134
        return $sum;
135
    }
136
137
    protected function updateMembershipMatrix()
138
    {
139
        for ($i = 0; $i < $this->clustersNumber; $i++) {
140
            for ($k = 0; $k < $this->sampleCount; $k++) {
141
                $distCalc = $this->getDistanceCalc($i, $k);
142
                $this->membership[$i][$k] = 1.0 / $distCalc;
143
            }
144
        }
145
    }
146
147
    /**
148
     *
149
     * @param int $row
150
     * @param int $col
151
     * @return float
152
     */
153
    protected function getDistanceCalc(int $row, int $col)
154
    {
155
        $sum = 0.0;
156
        $distance = new Euclidean();
157
        $dist1 = $distance->distance(
158
                $this->clusters[$row]->getCoordinates(),
159
                $this->samples[$col]);
160
        for ($j = 0; $j < $this->clustersNumber; $j++) {
161
            $dist2 = $distance->distance(
162
                $this->clusters[$j]->getCoordinates(),
163
                $this->samples[$col]);
164
            $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1));
165
            $sum += $val;
166
        }
167
        return $sum;
168
    }
169
170
    /**
171
     * The objective is to minimize the distance between all data points
172
     * and all cluster centers. This method returns the summation of all
173
     * these distances
174
     */
175
    protected function getObjective()
176
    {
177
        $sum = 0.0;
178
        $distance = new Euclidean();
179
        for ($i = 0; $i < $this->clustersNumber; $i++) {
180
            $clust = $this->clusters[$i]->getCoordinates();
181
            for ($k = 0; $k < $this->sampleCount; $k++) {
182
                $point = $this->samples[$k];
183
                $sum += $distance->distance($clust, $point);
184
            }
185
        }
186
        return $sum;
187
    }
188
189
    /**
190
     * @return array
191
     */
192
    public function getMembershipMatrix()
193
    {
194
        return $this->membership;
195
    }
196
197
    /**
198
     * @param array|Point[] $samples
199
     * @return array
200
     */
201
    public function cluster(array $samples)
202
    {
203
        // Initialize variables, clusters and membership matrix
204
        $this->sampleCount = count($samples);
205
        $this->samples =& $samples;
206
        $this->space = new Space(count($samples[0]));
207
        $this->initClusters();
208
209
        // Our goal is minimizing the objective value while
210
        // executing the clustering steps at a maximum number of iterations
211
        $lastObjective = 0.0;
212
        $difference = 0.0;
0 ignored issues
show
Unused Code introduced by
$difference is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
213
        $iterations = 0;
214
        do {
215
            // Update the membership matrix and cluster centers, respectively
216
            $this->updateMembershipMatrix();
217
            $this->updateClusters();
218
219
            // Calculate the new value of the objective function
220
            $objectiveVal = $this->getObjective();
221
            $difference = abs($lastObjective - $objectiveVal);
222
            $lastObjective = $objectiveVal;
223
        } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
224
225
        // Attach (hard cluster) each data point to the nearest cluster
226
        for ($k=0; $k<$this->sampleCount; $k++) {
227
            $column = array_column($this->membership, $k);
228
            arsort($column);
229
            reset($column);
230
            $i = key($column);
231
            $cluster = $this->clusters[$i];
232
            $cluster->attach(new Point($this->samples[$k]));
233
        }
234
235
        // Return grouped samples
236
        $grouped = [];
237
        foreach ($this->clusters as $cluster) {
238
            $grouped[] = $cluster->getPoints();
239
        }
240
        return $grouped;
241
    }
242
}
243