1 | <?php |
||
13 | class FuzzyCMeans implements Clusterer |
||
14 | { |
||
15 | /** |
||
16 | * @var int |
||
17 | */ |
||
18 | private $clustersNumber; |
||
19 | |||
20 | /** |
||
21 | * @var array|Cluster[] |
||
22 | */ |
||
23 | private $clusters = null; |
||
24 | |||
25 | /** |
||
26 | * @var Space |
||
27 | */ |
||
28 | private $space; |
||
29 | |||
30 | /** |
||
31 | * @var array|float[][] |
||
32 | */ |
||
33 | private $membership; |
||
34 | |||
35 | /** |
||
36 | * @var float |
||
37 | */ |
||
38 | private $fuzziness; |
||
39 | |||
40 | /** |
||
41 | * @var float |
||
42 | */ |
||
43 | private $epsilon; |
||
44 | |||
45 | /** |
||
46 | * @var int |
||
47 | */ |
||
48 | private $maxIterations; |
||
49 | |||
50 | /** |
||
51 | * @var int |
||
52 | */ |
||
53 | private $sampleCount; |
||
54 | |||
55 | /** |
||
56 | * @var array |
||
57 | */ |
||
58 | private $samples; |
||
59 | |||
60 | /** |
||
61 | * @throws InvalidArgumentException |
||
62 | */ |
||
63 | public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100) |
||
64 | { |
||
65 | if ($clustersNumber <= 0) { |
||
66 | throw InvalidArgumentException::invalidClustersNumber(); |
||
67 | } |
||
68 | $this->clustersNumber = $clustersNumber; |
||
69 | $this->fuzziness = $fuzziness; |
||
70 | $this->epsilon = $epsilon; |
||
71 | $this->maxIterations = $maxIterations; |
||
72 | } |
||
73 | |||
74 | protected function initClusters(): void |
||
75 | { |
||
76 | // Membership array is a matrix of cluster number by sample counts |
||
77 | // We initilize the membership array with random values |
||
78 | $dim = $this->space->getDimension(); |
||
79 | $this->generateRandomMembership($dim, $this->sampleCount); |
||
80 | $this->updateClusters(); |
||
81 | } |
||
82 | |||
83 | protected function generateRandomMembership(int $rows, int $cols): void |
||
84 | { |
||
85 | $this->membership = []; |
||
86 | for ($i = 0; $i < $rows; ++$i) { |
||
87 | $row = []; |
||
88 | $total = 0.0; |
||
89 | for ($k = 0; $k < $cols; ++$k) { |
||
90 | $val = rand(1, 5) / 10.0; |
||
91 | $row[] = $val; |
||
92 | $total += $val; |
||
93 | } |
||
94 | |||
95 | $this->membership[] = array_map(function ($val) use ($total) { |
||
96 | return $val / $total; |
||
97 | }, $row); |
||
98 | } |
||
99 | } |
||
100 | |||
101 | protected function updateClusters(): void |
||
102 | { |
||
103 | $dim = $this->space->getDimension(); |
||
104 | if (!$this->clusters) { |
||
|
|||
105 | $this->clusters = []; |
||
106 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
107 | $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); |
||
108 | } |
||
109 | } |
||
110 | |||
111 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
112 | $cluster = $this->clusters[$i]; |
||
113 | $center = $cluster->getCoordinates(); |
||
114 | for ($k = 0; $k < $dim; ++$k) { |
||
115 | $a = $this->getMembershipRowTotal($i, $k, true); |
||
116 | $b = $this->getMembershipRowTotal($i, $k, false); |
||
117 | $center[$k] = $a / $b; |
||
118 | } |
||
119 | |||
120 | $cluster->setCoordinates($center); |
||
121 | } |
||
122 | } |
||
123 | |||
124 | protected function getMembershipRowTotal(int $row, int $col, bool $multiply) |
||
125 | { |
||
126 | $sum = 0.0; |
||
127 | for ($k = 0; $k < $this->sampleCount; ++$k) { |
||
128 | $val = pow($this->membership[$row][$k], $this->fuzziness); |
||
129 | if ($multiply) { |
||
130 | $val *= $this->samples[$k][$col]; |
||
131 | } |
||
132 | |||
133 | $sum += $val; |
||
134 | } |
||
135 | |||
136 | return $sum; |
||
137 | } |
||
138 | |||
139 | protected function updateMembershipMatrix(): void |
||
148 | |||
149 | protected function getDistanceCalc(int $row, int $col) : float |
||
150 | { |
||
151 | $sum = 0.0; |
||
152 | $distance = new Euclidean(); |
||
153 | $dist1 = $distance->distance( |
||
170 | |||
171 | /** |
||
172 | * The objective is to minimize the distance between all data points |
||
173 | * and all cluster centers. This method returns the summation of all |
||
174 | * these distances |
||
175 | */ |
||
176 | protected function getObjective() |
||
190 | |||
191 | public function getMembershipMatrix() : array |
||
195 | |||
196 | /** |
||
197 | * @param array|Point[] $samples |
||
198 | */ |
||
199 | public function cluster(array $samples) : array |
||
240 | } |
||
241 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.