bdelespierre /
php-kmeans
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Kmeans; |
||
| 4 | |||
| 5 | class Math |
||
| 6 | { |
||
| 7 | /** |
||
| 8 | * @param array<float> $a |
||
| 9 | * @param array<float> $b |
||
| 10 | */ |
||
| 11 | public static function euclideanDist(array $a, array $b): float |
||
| 12 | { |
||
| 13 | assert(count($a) == count($b)); |
||
| 14 | |||
| 15 | for ($dist = 0, $n = 0; $n < count($a); $n++) { |
||
|
0 ignored issues
–
show
|
|||
| 16 | $dist += pow($a[$n] - $b[$n], 2); |
||
| 17 | } |
||
| 18 | |||
| 19 | return sqrt($dist); |
||
| 20 | } |
||
| 21 | |||
| 22 | /** |
||
| 23 | * @param array<array<float>> $points |
||
| 24 | * @return array<float> |
||
| 25 | */ |
||
| 26 | public static function centroid(array $points): array |
||
| 27 | { |
||
| 28 | $centroid = []; |
||
| 29 | |||
| 30 | foreach ($points as $point) { |
||
| 31 | foreach ($point as $n => $value) { |
||
| 32 | $centroid[$n] = ($centroid[$n] ?? 0) + $value; |
||
| 33 | } |
||
| 34 | } |
||
| 35 | |||
| 36 | foreach ($centroid as &$value) { |
||
| 37 | $value /= count($points); |
||
| 38 | } |
||
| 39 | |||
| 40 | return $centroid; |
||
| 41 | } |
||
| 42 | |||
| 43 | /** |
||
| 44 | * The standard Box–Muller transform generates values from the standard normal |
||
| 45 | * distribution (i.e. standard normal deviates). |
||
| 46 | * |
||
| 47 | * @see https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform |
||
| 48 | * |
||
| 49 | * @return array{float, float} |
||
|
0 ignored issues
–
show
|
|||
| 50 | */ |
||
| 51 | public static function gaussianNoise(float $mu, float $sigma): array |
||
| 52 | { |
||
| 53 | static $twoPi = 2 * M_PI; |
||
| 54 | |||
| 55 | // create two random numbers, make sure u1 is greater than epsilon |
||
| 56 | do { |
||
| 57 | $u1 = (float) mt_rand() / (float) mt_getrandmax(); |
||
| 58 | $u2 = (float) mt_rand() / (float) mt_getrandmax(); |
||
| 59 | } while ($u1 < PHP_FLOAT_EPSILON); |
||
| 60 | |||
| 61 | // compute z0 and z1 |
||
| 62 | $mag = $sigma * sqrt(-2.0 * log($u1)); |
||
| 63 | $z0 = $mag * cos($twoPi * $u2) + $mu; |
||
| 64 | $z1 = $mag * sin($twoPi * $u2) + $mu; |
||
| 65 | |||
| 66 | return [$z0, $z1]; |
||
| 67 | } |
||
| 68 | |||
| 69 | public static int $earthRadius = 6371009; // meters |
||
| 70 | |||
| 71 | /** |
||
| 72 | * Calculates the great-circle distance (in meters) between two points, |
||
| 73 | * with the Haversine formula. |
||
| 74 | * |
||
| 75 | * @see https://stackoverflow.com/a/14751773/17403258 |
||
| 76 | * |
||
| 77 | * @param array{0: float, 1: float} $from |
||
| 78 | * @param array{0: float, 1: float} $to |
||
| 79 | * @return float |
||
| 80 | */ |
||
| 81 | public static function haversine($from, $to): float |
||
| 82 | { |
||
| 83 | return 2 * self::$earthRadius * asin(sqrt( |
||
| 84 | pow(sin(deg2rad($to[0] - $from[0]) / 2), 2) |
||
| 85 | + cos(deg2rad($from[0])) * cos(deg2rad($to[0])) |
||
| 86 | * pow(sin(deg2rad($to[1] - $from[1]) / 2), 2) |
||
| 87 | )); |
||
| 88 | } |
||
| 89 | |||
| 90 | /** |
||
| 91 | * Calculates the centroid of GPS coordinates. |
||
| 92 | * |
||
| 93 | * @see https://stackoverflow.com/questions/6671183 |
||
| 94 | * |
||
| 95 | * @param array<array{0: float, 1: float}> $points |
||
| 96 | * @return array{0: float, 1: float} |
||
| 97 | */ |
||
| 98 | public static function gpsCentroid(array $points): array |
||
| 99 | { |
||
| 100 | if (count($points) == 1) { |
||
| 101 | return $points[0]; |
||
| 102 | } |
||
| 103 | |||
| 104 | $x = $y = $z = 0; |
||
| 105 | |||
| 106 | foreach ($points as $point) { |
||
| 107 | $lat = deg2rad($point[0]); |
||
| 108 | $long = deg2rad($point[1]); |
||
| 109 | |||
| 110 | $x += cos($lat) * cos($long); |
||
| 111 | $y += cos($lat) * sin($long); |
||
| 112 | $z += sin($lat); |
||
| 113 | } |
||
| 114 | |||
| 115 | $x /= count($points); |
||
| 116 | $y /= count($points); |
||
| 117 | $z /= count($points); |
||
| 118 | |||
| 119 | $hypotenuse = sqrt(pow($x, 2) + pow($y, 2)); |
||
| 120 | |||
| 121 | $long = atan2($y, $x); |
||
| 122 | $lat = atan2($z, $hypotenuse); |
||
| 123 | |||
| 124 | return [rad2deg($lat), rad2deg($long)]; |
||
| 125 | } |
||
| 126 | } |
||
| 127 |
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: