Completed
Pull Request — master (#36)
by
unknown
03:44 queued 01:01
created

FuzzyCMeans   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 232
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 0
Metric Value
wmc 28
lcom 1
cbo 4
dl 0
loc 232
rs 10
c 0
b 0
f 0
1
<?php
2
<<<<<<< HEAD
0 ignored issues
show
Bug introduced by
This code did not parse for me. Apparently, there is an error somewhere around this line:

Syntax error, unexpected T_SL
Loading history...
3
=======
4
5
>>>>>>> refs/remotes/php-ai/master
6
declare(strict_types=1);
7
8
namespace Phpml\Clustering;
9
10
use Phpml\Clustering\KMeans\Point;
11
use Phpml\Clustering\KMeans\Cluster;
12
use Phpml\Clustering\KMeans\Space;
13
use Phpml\Math\Distance\Euclidean;
14
15
class FuzzyCMeans implements Clusterer
16
{
17
    /**
18
     * @var int
19
     */
20
    private $clustersNumber;
21
22
    /**
23
     * @var array|Cluster[]
24
     */
25
    private $clusters = null;
26
27
    /**
28
     * @var Space
29
     */
30
    private $space;
31
    /**
32
     * @var array|float[][]
33
     */
34
    private $membership;
35
    /**
36
     * @var float
37
     */
38
    private $fuzziness;
39
40
    /**
41
     * @var float
42
     */
43
    private $epsilon;
44
45
    /**
46
     * @var int
47
     */
48
    private $maxIterations;
49
50
    /**
51
     * @var int
52
     */
53
    private $sampleCount;
54
55
    /**
56
     * @var array
57
     */
58
    private $samples;
59
60
    /**
61
     * @param int $clustersNumber
62
     *
63
     * @throws InvalidArgumentException
64
     */
65
    public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100)
66
    {
67
        if ($clustersNumber <= 0) {
68
            throw InvalidArgumentException::invalidClustersNumber();
69
        }
70
        $this->clustersNumber = $clustersNumber;
71
        $this->fuzziness = $fuzziness;
72
        $this->epsilon = $epsilon;
73
        $this->maxIterations = $maxIterations;
74
    }
75
76
    protected function initClusters()
77
    {
78
        // Membership array is a matrix of cluster number by sample counts
79
        // We initilize the membership array with random values
80
        $dim = $this->space->getDimension();
81
        $this->generateRandomMembership($dim, $this->sampleCount);
82
        $this->updateClusters();
83
    }
84
85
    /**
86
     * @param int $rows
87
     * @param int $cols
88
     */
89
    protected function generateRandomMembership(int $rows, int $cols)
90
    {
91
        $this->membership = [];
92
        for ($i=0; $i < $rows; $i++) {
93
            $row = [];
94
            $total = 0.0;
95
            for ($k=0; $k < $cols; $k++) {
96
                $val = rand(1, 5) / 10.0;
97
                $row[] = $val;
98
                $total += $val;
99
            }
100
            $this->membership[] = array_map(function ($val) use ($total) {
101
                return $val / $total;
102
            }, $row);
103
        }
104
    }
105
106
    protected function updateClusters()
107
    {
108
        $dim = $this->space->getDimension();
109
        if (! $this->clusters) {
110
            $this->clusters = [];
111
            for ($i=0; $i<$this->clustersNumber; $i++) {
112
                $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0));
113
            }
114
        }
115
116
        for ($i=0; $i<$this->clustersNumber; $i++) {
117
            $cluster = $this->clusters[$i];
118
            $center = $cluster->getCoordinates();
119
            for ($k=0; $k<$dim; $k++) {
120
                $a = $this->getMembershipRowTotal($i, $k, true);
121
                $b = $this->getMembershipRowTotal($i, $k, false);
122
                $center[$k] = $a / $b;
123
            }
124
            $cluster->setCoordinates($center);
125
        }
126
    }
127
128
    protected function getMembershipRowTotal(int $row, int $col, bool $multiply)
129
    {
130
        $sum = 0.0;
131
        for ($k = 0; $k < $this->sampleCount; $k++) {
132
            $val = pow($this->membership[$row][$k], $this->fuzziness);
133
            if ($multiply) {
134
                $val *= $this->samples[$k][$col];
135
            }
136
            $sum += $val;
137
        }
138
        return $sum;
139
    }
140
141
    protected function updateMembershipMatrix()
142
    {
143
        for ($i = 0; $i < $this->clustersNumber; $i++) {
144
            for ($k = 0; $k < $this->sampleCount; $k++) {
145
                $distCalc = $this->getDistanceCalc($i, $k);
146
                $this->membership[$i][$k] = 1.0 / $distCalc;
147
            }
148
        }
149
    }
150
151
    /**
152
     *
153
     * @param int $row
154
     * @param int $col
155
     * @return float
156
     */
157
    protected function getDistanceCalc(int $row, int $col)
158
    {
159
        $sum = 0.0;
160
        $distance = new Euclidean();
161
        $dist1 = $distance->distance(
162
                $this->clusters[$row]->getCoordinates(),
163
                $this->samples[$col]);
164
        for ($j = 0; $j < $this->clustersNumber; $j++) {
165
            $dist2 = $distance->distance(
166
                $this->clusters[$j]->getCoordinates(),
167
                $this->samples[$col]);
168
            $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1));
169
            $sum += $val;
170
        }
171
        return $sum;
172
    }
173
174
    /**
175
     * The objective is to minimize the distance between all data points
176
     * and all cluster centers. This method returns the summation of all
177
     * these distances
178
     */
179
    protected function getObjective()
180
    {
181
        $sum = 0.0;
182
        $distance = new Euclidean();
183
        for ($i = 0; $i < $this->clustersNumber; $i++) {
184
            $clust = $this->clusters[$i]->getCoordinates();
185
            for ($k = 0; $k < $this->sampleCount; $k++) {
186
                $point = $this->samples[$k];
187
                $sum += $distance->distance($clust, $point);
188
            }
189
        }
190
        return $sum;
191
    }
192
193
    /**
194
     * @return array
195
     */
196
    public function getMembershipMatrix()
197
    {
198
        return $this->membership;
199
    }
200
201
    /**
202
     * @param array|Point[] $samples
203
     * @return array
204
     */
205
    public function cluster(array $samples)
206
    {
207
        // Initialize variables, clusters and membership matrix
208
        $this->sampleCount = count($samples);
209
        $this->samples =& $samples;
210
        $this->space = new Space(count($samples[0]));
211
        $this->initClusters();
212
213
        // Our goal is minimizing the objective value while
214
        // executing the clustering steps at a maximum number of iterations
215
        $lastObjective = 0.0;
216
        $difference = 0.0;
217
        $iterations = 0;
218
        do {
219
            // Update the membership matrix and cluster centers, respectively
220
            $this->updateMembershipMatrix();
221
            $this->updateClusters();
222
223
            // Calculate the new value of the objective function
224
            $objectiveVal = $this->getObjective();
225
            $difference = abs($lastObjective - $objectiveVal);
226
            $lastObjective = $objectiveVal;
227
        } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations);
228
229
        // Attach (hard cluster) each data point to the nearest cluster
230
        for ($k=0; $k<$this->sampleCount; $k++) {
231
            $column = array_column($this->membership, $k);
232
            arsort($column);
233
            reset($column);
234
            $i = key($column);
235
            $cluster = $this->clusters[$i];
236
            $cluster->attach(new Point($this->samples[$k]));
237
        }
238
239
        // Return grouped samples
240
        $grouped = [];
241
        foreach ($this->clusters as $cluster) {
242
            $grouped[] = $cluster->getPoints();
243
        }
244
        return $grouped;
245
    }
246
}
247