Check for implicit conversion of array to boolean.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Clustering; |
||
6 | |||
7 | use Phpml\Clustering\KMeans\Cluster; |
||
8 | use Phpml\Clustering\KMeans\Point; |
||
9 | use Phpml\Clustering\KMeans\Space; |
||
10 | use Phpml\Exception\InvalidArgumentException; |
||
11 | use Phpml\Math\Distance\Euclidean; |
||
12 | |||
13 | class FuzzyCMeans implements Clusterer |
||
14 | { |
||
15 | /** |
||
16 | * @var int |
||
17 | */ |
||
18 | private $clustersNumber; |
||
19 | |||
20 | /** |
||
21 | * @var array|Cluster[] |
||
22 | */ |
||
23 | private $clusters = null; |
||
24 | |||
25 | /** |
||
26 | * @var Space |
||
27 | */ |
||
28 | private $space; |
||
29 | |||
30 | /** |
||
31 | * @var array|float[][] |
||
32 | */ |
||
33 | private $membership = []; |
||
34 | |||
35 | /** |
||
36 | * @var float |
||
37 | */ |
||
38 | private $fuzziness; |
||
39 | |||
40 | /** |
||
41 | * @var float |
||
42 | */ |
||
43 | private $epsilon; |
||
44 | |||
45 | /** |
||
46 | * @var int |
||
47 | */ |
||
48 | private $maxIterations; |
||
49 | |||
50 | /** |
||
51 | * @var int |
||
52 | */ |
||
53 | private $sampleCount; |
||
54 | |||
55 | /** |
||
56 | * @var array |
||
57 | */ |
||
58 | private $samples = []; |
||
59 | |||
60 | /** |
||
61 | * @throws InvalidArgumentException |
||
62 | */ |
||
63 | public function __construct(int $clustersNumber, float $fuzziness = 2.0, float $epsilon = 1e-2, int $maxIterations = 100) |
||
64 | { |
||
65 | if ($clustersNumber <= 0) { |
||
66 | throw InvalidArgumentException::invalidClustersNumber(); |
||
67 | } |
||
68 | |||
69 | $this->clustersNumber = $clustersNumber; |
||
70 | $this->fuzziness = $fuzziness; |
||
71 | $this->epsilon = $epsilon; |
||
72 | $this->maxIterations = $maxIterations; |
||
73 | } |
||
74 | |||
75 | public function getMembershipMatrix(): array |
||
76 | { |
||
77 | return $this->membership; |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * @param Point[]|int[][] $samples |
||
82 | */ |
||
83 | public function cluster(array $samples): array |
||
84 | { |
||
85 | // Initialize variables, clusters and membership matrix |
||
86 | $this->sampleCount = count($samples); |
||
87 | $this->samples = &$samples; |
||
88 | $this->space = new Space(count($samples[0])); |
||
89 | $this->initClusters(); |
||
90 | |||
91 | // Our goal is minimizing the objective value while |
||
92 | // executing the clustering steps at a maximum number of iterations |
||
93 | $lastObjective = 0.0; |
||
94 | $iterations = 0; |
||
95 | do { |
||
96 | // Update the membership matrix and cluster centers, respectively |
||
97 | $this->updateMembershipMatrix(); |
||
98 | $this->updateClusters(); |
||
99 | |||
100 | // Calculate the new value of the objective function |
||
101 | $objectiveVal = $this->getObjective(); |
||
102 | $difference = abs($lastObjective - $objectiveVal); |
||
103 | $lastObjective = $objectiveVal; |
||
104 | } while ($difference > $this->epsilon && $iterations++ <= $this->maxIterations); |
||
105 | |||
106 | // Attach (hard cluster) each data point to the nearest cluster |
||
107 | for ($k = 0; $k < $this->sampleCount; ++$k) { |
||
108 | $column = array_column($this->membership, $k); |
||
109 | arsort($column); |
||
110 | reset($column); |
||
111 | $i = key($column); |
||
112 | $cluster = $this->clusters[$i]; |
||
113 | $cluster->attach(new Point($this->samples[$k])); |
||
114 | } |
||
115 | |||
116 | // Return grouped samples |
||
117 | $grouped = []; |
||
118 | foreach ($this->clusters as $cluster) { |
||
119 | $grouped[] = $cluster->getPoints(); |
||
120 | } |
||
121 | |||
122 | return $grouped; |
||
123 | } |
||
124 | |||
125 | protected function initClusters(): void |
||
126 | { |
||
127 | // Membership array is a matrix of cluster number by sample counts |
||
128 | // We initilize the membership array with random values |
||
129 | $dim = $this->space->getDimension(); |
||
130 | $this->generateRandomMembership($dim, $this->sampleCount); |
||
131 | $this->updateClusters(); |
||
132 | } |
||
133 | |||
134 | protected function generateRandomMembership(int $rows, int $cols): void |
||
135 | { |
||
136 | $this->membership = []; |
||
137 | for ($i = 0; $i < $rows; ++$i) { |
||
138 | $row = []; |
||
139 | $total = 0.0; |
||
140 | for ($k = 0; $k < $cols; ++$k) { |
||
141 | $val = random_int(1, 5) / 10.0; |
||
142 | $row[] = $val; |
||
143 | $total += $val; |
||
144 | } |
||
145 | |||
146 | $this->membership[] = array_map(function ($val) use ($total) { |
||
147 | return $val / $total; |
||
148 | }, $row); |
||
149 | } |
||
150 | } |
||
151 | |||
152 | protected function updateClusters(): void |
||
153 | { |
||
154 | $dim = $this->space->getDimension(); |
||
155 | if (!$this->clusters) { |
||
0 ignored issues
–
show
|
|||
156 | $this->clusters = []; |
||
157 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
158 | $this->clusters[] = new Cluster($this->space, array_fill(0, $dim, 0.0)); |
||
159 | } |
||
160 | } |
||
161 | |||
162 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
163 | $cluster = $this->clusters[$i]; |
||
164 | $center = $cluster->getCoordinates(); |
||
165 | for ($k = 0; $k < $dim; ++$k) { |
||
166 | $a = $this->getMembershipRowTotal($i, $k, true); |
||
167 | $b = $this->getMembershipRowTotal($i, $k, false); |
||
168 | $center[$k] = $a / $b; |
||
169 | } |
||
170 | |||
171 | $cluster->setCoordinates($center); |
||
172 | } |
||
173 | } |
||
174 | |||
175 | protected function getMembershipRowTotal(int $row, int $col, bool $multiply) |
||
176 | { |
||
177 | $sum = 0.0; |
||
178 | for ($k = 0; $k < $this->sampleCount; ++$k) { |
||
179 | $val = pow($this->membership[$row][$k], $this->fuzziness); |
||
180 | if ($multiply) { |
||
181 | $val *= $this->samples[$k][$col]; |
||
182 | } |
||
183 | |||
184 | $sum += $val; |
||
185 | } |
||
186 | |||
187 | return $sum; |
||
188 | } |
||
189 | |||
190 | protected function updateMembershipMatrix(): void |
||
191 | { |
||
192 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
193 | for ($k = 0; $k < $this->sampleCount; ++$k) { |
||
194 | $distCalc = $this->getDistanceCalc($i, $k); |
||
195 | $this->membership[$i][$k] = 1.0 / $distCalc; |
||
196 | } |
||
197 | } |
||
198 | } |
||
199 | |||
200 | protected function getDistanceCalc(int $row, int $col): float |
||
201 | { |
||
202 | $sum = 0.0; |
||
203 | $distance = new Euclidean(); |
||
204 | $dist1 = $distance->distance( |
||
205 | $this->clusters[$row]->getCoordinates(), |
||
206 | $this->samples[$col] |
||
207 | ); |
||
208 | |||
209 | for ($j = 0; $j < $this->clustersNumber; ++$j) { |
||
210 | $dist2 = $distance->distance( |
||
211 | $this->clusters[$j]->getCoordinates(), |
||
212 | $this->samples[$col] |
||
213 | ); |
||
214 | |||
215 | $val = pow($dist1 / $dist2, 2.0 / ($this->fuzziness - 1)); |
||
216 | $sum += $val; |
||
217 | } |
||
218 | |||
219 | return $sum; |
||
220 | } |
||
221 | |||
222 | /** |
||
223 | * The objective is to minimize the distance between all data points |
||
224 | * and all cluster centers. This method returns the summation of all |
||
225 | * these distances |
||
226 | */ |
||
227 | protected function getObjective() |
||
228 | { |
||
229 | $sum = 0.0; |
||
230 | $distance = new Euclidean(); |
||
231 | for ($i = 0; $i < $this->clustersNumber; ++$i) { |
||
232 | $clust = $this->clusters[$i]->getCoordinates(); |
||
233 | for ($k = 0; $k < $this->sampleCount; ++$k) { |
||
234 | $point = $this->samples[$k]; |
||
235 | $sum += $distance->distance($clust, $point); |
||
236 | } |
||
237 | } |
||
238 | |||
239 | return $sum; |
||
240 | } |
||
241 | } |
||
242 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.