Total Complexity | 31 |
Total Lines | 138 |
Duplicated Lines | 0 % |
Changes | 0 |
1 | <?php |
||
9 | class Covariance |
||
10 | { |
||
11 | /** |
||
12 | * Calculates covariance from two given arrays, x and y, respectively |
||
13 | * |
||
14 | * @throws InvalidArgumentException |
||
15 | */ |
||
16 | public static function fromXYArrays(array $x, array $y, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float |
||
46 | } |
||
47 | |||
48 | /** |
||
49 | * Calculates covariance of two dimensions, i and k in the given data. |
||
50 | * |
||
51 | * @throws InvalidArgumentException |
||
52 | * @throws \Exception |
||
53 | */ |
||
54 | public static function fromDataset(array $data, int $i, int $k, bool $sample = true, ?float $meanX = null, ?float $meanY = null): float |
||
55 | { |
||
56 | if (count($data) === 0) { |
||
57 | throw new InvalidArgumentException('The array has zero elements'); |
||
58 | } |
||
59 | |||
60 | $n = count($data); |
||
61 | if ($sample && $n === 1) { |
||
62 | throw new InvalidArgumentException('The array must have at least 2 elements'); |
||
63 | } |
||
64 | |||
65 | if ($i < 0 || $k < 0 || $i >= $n || $k >= $n) { |
||
66 | throw new InvalidArgumentException('Given indices i and k do not match with the dimensionality of data'); |
||
67 | } |
||
68 | |||
69 | if ($meanX === null || $meanY === null) { |
||
70 | $x = array_column($data, $i); |
||
71 | $y = array_column($data, $k); |
||
72 | |||
73 | $meanX = Mean::arithmetic($x); |
||
74 | $meanY = Mean::arithmetic($y); |
||
75 | $sum = 0.0; |
||
76 | foreach ($x as $index => $xi) { |
||
77 | $yi = $y[$index]; |
||
78 | $sum += ($xi - $meanX) * ($yi - $meanY); |
||
79 | } |
||
80 | } else { |
||
81 | // In the case, whole dataset given along with dimension indices, i and k, |
||
82 | // we would like to avoid getting column data with array_column and operate |
||
83 | // over this extra copy of column data for memory efficiency purposes. |
||
84 | // |
||
85 | // Instead we traverse through the whole data and get what we actually need |
||
86 | // without copying the data. This way, memory use will be reduced |
||
87 | // with a slight cost of CPU utilization. |
||
88 | $sum = 0.0; |
||
89 | foreach ($data as $row) { |
||
90 | $val = [0, 0]; |
||
91 | foreach ($row as $index => $col) { |
||
92 | if ($index == $i) { |
||
93 | $val[0] = $col - $meanX; |
||
94 | } |
||
95 | |||
96 | if ($index == $k) { |
||
97 | $val[1] = $col - $meanY; |
||
98 | } |
||
99 | } |
||
100 | |||
101 | $sum += $val[0] * $val[1]; |
||
102 | } |
||
103 | } |
||
104 | |||
105 | if ($sample) { |
||
106 | --$n; |
||
107 | } |
||
108 | |||
109 | return $sum / $n; |
||
110 | } |
||
111 | |||
112 | /** |
||
113 | * Returns the covariance matrix of n-dimensional data |
||
114 | * |
||
115 | * @param array|null $means |
||
116 | */ |
||
117 | public static function covarianceMatrix(array $data, ?array $means = null): array |
||
147 | } |
||
148 | } |
||
149 |