bdelespierre /
php-kmeans
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * @param array<float> $a |
||
| 5 | * @param array<float> $b |
||
| 6 | */ |
||
| 7 | function euclidean_dist(array $a, array $b): float |
||
| 8 | { |
||
| 9 | assert(count($a) == count($b)); |
||
| 10 | |||
| 11 | for ($dist = 0, $n = 0; $n < count($a); $n++) { |
||
|
0 ignored issues
–
show
|
|||
| 12 | $dist += pow($a[$n] - $b[$n], 2); |
||
| 13 | } |
||
| 14 | |||
| 15 | return sqrt($dist); |
||
| 16 | } |
||
| 17 | |||
| 18 | /** |
||
| 19 | * @param array<array<float>> $points |
||
| 20 | * @return array<float> |
||
| 21 | */ |
||
| 22 | function find_centroid(array $points): array |
||
| 23 | { |
||
| 24 | $centroid = []; |
||
| 25 | |||
| 26 | foreach ($points as $point) { |
||
| 27 | foreach ($point as $n => $value) { |
||
| 28 | $centroid[$n] = ($centroid[$n] ?? 0) + $value; |
||
| 29 | } |
||
| 30 | } |
||
| 31 | |||
| 32 | foreach ($centroid as &$value) { |
||
| 33 | $value /= count($points); |
||
| 34 | } |
||
| 35 | |||
| 36 | return $centroid; |
||
| 37 | } |
||
| 38 | |||
| 39 | /** |
||
| 40 | * The standard Box–Muller transform generates values from the standard normal |
||
| 41 | * distribution (i.e. standard normal deviates). |
||
| 42 | * |
||
| 43 | * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform |
||
| 44 | * |
||
| 45 | * @return array{float, float} |
||
|
0 ignored issues
–
show
|
|||
| 46 | */ |
||
| 47 | function generate_gaussian_noise(float $mu, float $sigma): array |
||
| 48 | { |
||
| 49 | static $twoPi = 2 * M_PI; |
||
| 50 | |||
| 51 | // create two random numbers, make sure u1 is greater than epsilon |
||
| 52 | do { |
||
| 53 | $u1 = (float) mt_rand() / (float) mt_getrandmax(); |
||
| 54 | $u2 = (float) mt_rand() / (float) mt_getrandmax(); |
||
| 55 | } while ($u1 < PHP_FLOAT_EPSILON); |
||
| 56 | |||
| 57 | // compute z0 and z1 |
||
| 58 | $mag = $sigma * sqrt(-2.0 * log($u1)); |
||
| 59 | $z0 = $mag * cos($twoPi * $u2) + $mu; |
||
| 60 | $z1 = $mag * sin($twoPi * $u2) + $mu; |
||
| 61 | |||
| 62 | return [$z0, $z1]; |
||
| 63 | } |
||
| 64 |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: