1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions; |
4
|
|
|
|
5
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\Exception; |
6
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\Functions; |
7
|
|
|
|
8
|
|
|
class ChiSquared |
9
|
|
|
{ |
10
|
|
|
private const MAX_ITERATIONS = 256; |
11
|
|
|
|
12
|
|
|
private const EPS = 2.22e-16; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* CHIDIST. |
16
|
|
|
* |
17
|
|
|
* Returns the one-tailed probability of the chi-squared distribution. |
18
|
|
|
* |
19
|
|
|
* @param mixed $value Float value for which we want the probability |
20
|
|
|
* @param mixed $degrees Integer degrees of freedom |
21
|
|
|
* |
22
|
|
|
* @return float|string |
23
|
|
|
*/ |
24
|
41 |
|
public static function distributionRightTail($value, $degrees) |
25
|
|
|
{ |
26
|
41 |
|
$value = Functions::flattenSingleValue($value); |
27
|
41 |
|
$degrees = Functions::flattenSingleValue($degrees); |
28
|
|
|
|
29
|
|
|
try { |
30
|
41 |
|
$value = DistributionValidations::validateFloat($value); |
31
|
40 |
|
$degrees = DistributionValidations::validateInt($degrees); |
32
|
2 |
|
} catch (Exception $e) { |
33
|
2 |
|
return $e->getMessage(); |
34
|
|
|
} |
35
|
|
|
|
36
|
39 |
|
if ($degrees < 1) { |
37
|
1 |
|
return Functions::NAN(); |
38
|
|
|
} |
39
|
38 |
|
if ($value < 0) { |
40
|
1 |
|
if (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_GNUMERIC) { |
41
|
|
|
return 1; |
42
|
|
|
} |
43
|
|
|
|
44
|
1 |
|
return Functions::NAN(); |
45
|
|
|
} |
46
|
|
|
|
47
|
37 |
|
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2) / Gamma::gammaValue($degrees / 2)); |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* CHIDIST. |
52
|
|
|
* |
53
|
|
|
* Returns the one-tailed probability of the chi-squared distribution. |
54
|
|
|
* |
55
|
|
|
* @param mixed $value Float value for which we want the probability |
56
|
|
|
* @param mixed $degrees Integer degrees of freedom |
57
|
|
|
* @param mixed $cumulative Boolean value indicating if we want the cdf (true) or the pdf (false) |
58
|
|
|
* |
59
|
|
|
* @return float|string |
60
|
|
|
*/ |
61
|
25 |
|
public static function distributionLeftTail($value, $degrees, $cumulative) |
62
|
|
|
{ |
63
|
25 |
|
$value = Functions::flattenSingleValue($value); |
64
|
25 |
|
$degrees = Functions::flattenSingleValue($degrees); |
65
|
25 |
|
$cumulative = Functions::flattenSingleValue($cumulative); |
66
|
|
|
|
67
|
|
|
try { |
68
|
25 |
|
$value = DistributionValidations::validateFloat($value); |
69
|
24 |
|
$degrees = DistributionValidations::validateInt($degrees); |
70
|
23 |
|
$cumulative = DistributionValidations::validateBool($cumulative); |
71
|
3 |
|
} catch (Exception $e) { |
72
|
3 |
|
return $e->getMessage(); |
73
|
|
|
} |
74
|
|
|
|
75
|
22 |
|
if ($degrees < 1) { |
76
|
1 |
|
return Functions::NAN(); |
77
|
|
|
} |
78
|
21 |
|
if ($value < 0) { |
79
|
1 |
|
if (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_GNUMERIC) { |
80
|
|
|
return 1; |
81
|
|
|
} |
82
|
|
|
|
83
|
1 |
|
return Functions::NAN(); |
84
|
|
|
} |
85
|
|
|
|
86
|
20 |
|
if ($cumulative === true) { |
87
|
15 |
|
return 1 - self::distributionRightTail($value, $degrees); |
88
|
|
|
} |
89
|
|
|
|
90
|
5 |
|
return (($value ** (($degrees / 2) - 1) * exp(-$value / 2))) / |
91
|
5 |
|
((2 ** ($degrees / 2)) * Gamma::gammaValue($degrees / 2)); |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* CHIINV. |
96
|
|
|
* |
97
|
|
|
* Returns the inverse of the right-tailed probability of the chi-squared distribution. |
98
|
|
|
* |
99
|
|
|
* @param mixed $probability Float probability at which you want to evaluate the distribution |
100
|
|
|
* @param mixed $degrees Integer degrees of freedom |
101
|
|
|
* |
102
|
|
|
* @return float|string |
103
|
|
|
*/ |
104
|
15 |
|
public static function inverseRightTail($probability, $degrees) |
105
|
|
|
{ |
106
|
15 |
|
$probability = Functions::flattenSingleValue($probability); |
107
|
15 |
|
$degrees = Functions::flattenSingleValue($degrees); |
108
|
|
|
|
109
|
|
|
try { |
110
|
15 |
|
$probability = DistributionValidations::validateProbability($probability); |
111
|
12 |
|
$degrees = DistributionValidations::validateInt($degrees); |
112
|
4 |
|
} catch (Exception $e) { |
113
|
4 |
|
return $e->getMessage(); |
114
|
|
|
} |
115
|
|
|
|
116
|
11 |
|
if ($degrees < 1) { |
117
|
1 |
|
return Functions::NAN(); |
118
|
|
|
} |
119
|
|
|
|
120
|
10 |
|
$callback = function ($value) use ($degrees) { |
121
|
10 |
|
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2) |
122
|
10 |
|
/ Gamma::gammaValue($degrees / 2)); |
123
|
10 |
|
}; |
124
|
|
|
|
125
|
10 |
|
$newtonRaphson = new NewtonRaphson($callback); |
126
|
|
|
|
127
|
10 |
|
return $newtonRaphson->execute($probability); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* CHIINV. |
132
|
|
|
* |
133
|
|
|
* Returns the inverse of the left-tailed probability of the chi-squared distribution. |
134
|
|
|
* |
135
|
|
|
* @param mixed $probability Float probability at which you want to evaluate the distribution |
136
|
|
|
* @param mixed $degrees Integer degrees of freedom |
137
|
|
|
* |
138
|
|
|
* @return float|string |
139
|
|
|
*/ |
140
|
15 |
|
public static function inverseLeftTail($probability, $degrees) |
141
|
|
|
{ |
142
|
15 |
|
$probability = Functions::flattenSingleValue($probability); |
143
|
15 |
|
$degrees = Functions::flattenSingleValue($degrees); |
144
|
|
|
|
145
|
|
|
try { |
146
|
15 |
|
$probability = DistributionValidations::validateProbability($probability); |
147
|
12 |
|
$degrees = DistributionValidations::validateInt($degrees); |
148
|
4 |
|
} catch (Exception $e) { |
149
|
4 |
|
return $e->getMessage(); |
150
|
|
|
} |
151
|
|
|
|
152
|
11 |
|
if ($degrees < 1) { |
153
|
1 |
|
return Functions::NAN(); |
154
|
|
|
} |
155
|
|
|
|
156
|
10 |
|
return self::inverseLeftTailCalculation($probability, $degrees); |
157
|
|
|
} |
158
|
|
|
|
159
|
|
|
/** |
160
|
|
|
* CHITEST. |
161
|
|
|
* |
162
|
|
|
* Uses the chi-square test to calculate the probability that the differences between two supplied data sets |
163
|
|
|
* (of observed and expected frequencies), are likely to be simply due to sampling error, |
164
|
|
|
* or if they are likely to be real. |
165
|
|
|
* |
166
|
|
|
* @param mixed $actual an array of observed frequencies |
167
|
|
|
* @param mixed $expected an array of expected frequencies |
168
|
|
|
* |
169
|
|
|
* @return float|string |
170
|
|
|
*/ |
171
|
7 |
|
public static function test($actual, $expected) |
172
|
|
|
{ |
173
|
7 |
|
$rows = count($actual); |
174
|
7 |
|
$actual = Functions::flattenArray($actual); |
175
|
7 |
|
$expected = Functions::flattenArray($expected); |
176
|
7 |
|
$columns = count($actual) / $rows; |
177
|
|
|
|
178
|
7 |
|
$countActuals = count($actual); |
179
|
7 |
|
$countExpected = count($expected); |
180
|
7 |
|
if ($countActuals !== $countExpected || $countActuals === 1) { |
181
|
1 |
|
return Functions::NAN(); |
182
|
|
|
} |
183
|
|
|
|
184
|
6 |
|
$result = 0.0; |
185
|
6 |
|
for ($i = 0; $i < $countActuals; ++$i) { |
186
|
6 |
|
if ($expected[$i] == 0.0) { |
187
|
1 |
|
return Functions::DIV0(); |
188
|
6 |
|
} elseif ($expected[$i] < 0.0) { |
189
|
1 |
|
return Functions::NAN(); |
190
|
|
|
} |
191
|
6 |
|
$result += (($actual[$i] - $expected[$i]) ** 2) / $expected[$i]; |
192
|
|
|
} |
193
|
|
|
|
194
|
4 |
|
$degrees = self::degrees($rows, $columns); |
195
|
|
|
|
196
|
4 |
|
$result = self::distributionRightTail($result, $degrees); |
197
|
|
|
|
198
|
4 |
|
return $result; |
199
|
|
|
} |
200
|
|
|
|
201
|
4 |
|
protected static function degrees(int $rows, int $columns): int |
202
|
|
|
{ |
203
|
4 |
|
if ($rows === 1) { |
204
|
1 |
|
return $columns - 1; |
205
|
3 |
|
} elseif ($columns === 1) { |
206
|
1 |
|
return $rows - 1; |
207
|
|
|
} |
208
|
|
|
|
209
|
2 |
|
return ($columns - 1) * ($rows - 1); |
210
|
|
|
} |
211
|
|
|
|
212
|
10 |
|
private static function inverseLeftTailCalculation(float $probability, int $degrees): float |
213
|
|
|
{ |
214
|
|
|
// bracket the root |
215
|
10 |
|
$min = 0; |
216
|
10 |
|
$sd = sqrt(2.0 * $degrees); |
217
|
10 |
|
$max = 2 * $sd; |
218
|
10 |
|
$s = -1; |
219
|
|
|
|
220
|
10 |
|
while ($s * self::pchisq($max, $degrees) > $probability * $s) { |
221
|
2 |
|
$min = $max; |
222
|
2 |
|
$max += 2 * $sd; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
// Find root using bisection |
226
|
10 |
|
$chi2 = 0.5 * ($min + $max); |
227
|
|
|
|
228
|
10 |
|
while (($max - $min) > self::EPS * $chi2) { |
229
|
10 |
|
if ($s * self::pchisq($chi2, $degrees) > $probability * $s) { |
230
|
10 |
|
$min = $chi2; |
231
|
|
|
} else { |
232
|
10 |
|
$max = $chi2; |
233
|
|
|
} |
234
|
10 |
|
$chi2 = 0.5 * ($min + $max); |
235
|
|
|
} |
236
|
|
|
|
237
|
10 |
|
return $chi2; |
238
|
|
|
} |
239
|
|
|
|
240
|
10 |
|
private static function pchisq($chi2, $degrees) |
241
|
|
|
{ |
242
|
10 |
|
return self::gammp($degrees, 0.5 * $chi2); |
243
|
|
|
} |
244
|
|
|
|
245
|
10 |
|
private static function gammp($n, $x) |
246
|
|
|
{ |
247
|
10 |
|
if ($x < 0.5 * $n + 1) { |
248
|
10 |
|
return self::gser($n, $x); |
249
|
|
|
} |
250
|
|
|
|
251
|
5 |
|
return 1 - self::gcf($n, $x); |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
// Return the incomplete gamma function P(n/2,x) evaluated by |
255
|
|
|
// series representation. Algorithm from numerical recipe. |
256
|
|
|
// Assume that n is a positive integer and x>0, won't check arguments. |
257
|
|
|
// Relative error controlled by the eps parameter |
258
|
10 |
|
private static function gser($n, $x) |
259
|
|
|
{ |
260
|
10 |
|
$gln = Gamma::ln($n / 2); |
261
|
10 |
|
$a = 0.5 * $n; |
262
|
10 |
|
$ap = $a; |
263
|
10 |
|
$sum = 1.0 / $a; |
264
|
10 |
|
$del = $sum; |
265
|
10 |
|
for ($i = 1; $i < 101; ++$i) { |
266
|
10 |
|
++$ap; |
267
|
10 |
|
$del = $del * $x / $ap; |
268
|
10 |
|
$sum += $del; |
269
|
10 |
|
if ($del < $sum * self::EPS) { |
270
|
10 |
|
break; |
271
|
|
|
} |
272
|
|
|
} |
273
|
|
|
|
274
|
10 |
|
return $sum * exp(-$x + $a * log($x) - $gln); |
275
|
|
|
} |
276
|
|
|
|
277
|
|
|
// Return the incomplete gamma function Q(n/2,x) evaluated by |
278
|
|
|
// its continued fraction representation. Algorithm from numerical recipe. |
279
|
|
|
// Assume that n is a postive integer and x>0, won't check arguments. |
280
|
|
|
// Relative error controlled by the eps parameter |
281
|
5 |
|
private static function gcf($n, $x) |
282
|
|
|
{ |
283
|
5 |
|
$gln = Gamma::ln($n / 2); |
284
|
5 |
|
$a = 0.5 * $n; |
285
|
5 |
|
$b = $x + 1 - $a; |
286
|
5 |
|
$fpmin = 1.e-300; |
287
|
5 |
|
$c = 1 / $fpmin; |
288
|
5 |
|
$d = 1 / $b; |
289
|
5 |
|
$h = $d; |
290
|
5 |
|
for ($i = 1; $i < 101; ++$i) { |
291
|
5 |
|
$an = -$i * ($i - $a); |
292
|
5 |
|
$b += 2; |
293
|
5 |
|
$d = $an * $d + $b; |
294
|
5 |
|
if (abs($d) < $fpmin) { |
295
|
|
|
$d = $fpmin; |
296
|
|
|
} |
297
|
5 |
|
$c = $b + $an / $c; |
298
|
5 |
|
if (abs($c) < $fpmin) { |
299
|
|
|
$c = $fpmin; |
300
|
|
|
} |
301
|
5 |
|
$d = 1 / $d; |
302
|
5 |
|
$del = $d * $c; |
303
|
5 |
|
$h = $h * $del; |
304
|
5 |
|
if (abs($del - 1) < self::EPS) { |
305
|
5 |
|
break; |
306
|
|
|
} |
307
|
|
|
} |
308
|
|
|
|
309
|
5 |
|
return $h * exp(-$x + $a * log($x) - $gln); |
310
|
|
|
} |
311
|
|
|
} |
312
|
|
|
|