1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpOffice\PhpSpreadsheet\Calculation\Statistical\Distributions; |
4
|
|
|
|
5
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\ArrayEnabled; |
6
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\Exception; |
7
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\Functions; |
8
|
|
|
use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; |
9
|
|
|
|
10
|
|
|
class ChiSquared |
11
|
|
|
{ |
12
|
|
|
use ArrayEnabled; |
13
|
|
|
|
14
|
|
|
private const EPS = 2.22e-16; |
15
|
|
|
|
16
|
|
|
/** |
17
|
|
|
* CHIDIST. |
18
|
|
|
* |
19
|
|
|
* Returns the one-tailed probability of the chi-squared distribution. |
20
|
|
|
* |
21
|
|
|
* @param mixed $value Float value for which we want the probability |
22
|
|
|
* Or can be an array of values |
23
|
|
|
* @param mixed $degrees Integer degrees of freedom |
24
|
|
|
* Or can be an array of values |
25
|
|
|
* |
26
|
|
|
* @return array|float|string |
27
|
|
|
* If an array of numbers is passed as an argument, then the returned result will also be an array |
28
|
|
|
* with the same dimensions |
29
|
|
|
*/ |
30
|
23 |
|
public static function distributionRightTail($value, $degrees) |
31
|
|
|
{ |
32
|
23 |
|
if (is_array($value) || is_array($degrees)) { |
33
|
13 |
|
return self::evaluateArrayArguments([self::class, __FUNCTION__], $value, $degrees); |
34
|
|
|
} |
35
|
|
|
|
36
|
|
|
try { |
37
|
23 |
|
$value = DistributionValidations::validateFloat($value); |
38
|
22 |
|
$degrees = DistributionValidations::validateInt($degrees); |
39
|
2 |
|
} catch (Exception $e) { |
40
|
2 |
|
return $e->getMessage(); |
41
|
|
|
} |
42
|
|
|
|
43
|
21 |
|
if ($degrees < 1) { |
44
|
1 |
|
return ExcelError::NAN(); |
45
|
|
|
} |
46
|
20 |
|
if ($value < 0) { |
47
|
1 |
|
if (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_GNUMERIC) { |
48
|
|
|
return 1; |
49
|
|
|
} |
50
|
|
|
|
51
|
1 |
|
return ExcelError::NAN(); |
52
|
|
|
} |
53
|
|
|
|
54
|
19 |
|
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2) / Gamma::gammaValue($degrees / 2)); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* CHIDIST. |
59
|
|
|
* |
60
|
|
|
* Returns the one-tailed probability of the chi-squared distribution. |
61
|
|
|
* |
62
|
|
|
* @param mixed $value Float value for which we want the probability |
63
|
|
|
* Or can be an array of values |
64
|
|
|
* @param mixed $degrees Integer degrees of freedom |
65
|
|
|
* Or can be an array of values |
66
|
|
|
* @param mixed $cumulative Boolean value indicating if we want the cdf (true) or the pdf (false) |
67
|
|
|
* Or can be an array of values |
68
|
|
|
* |
69
|
|
|
* @return array|float|string |
70
|
|
|
* If an array of numbers is passed as an argument, then the returned result will also be an array |
71
|
|
|
* with the same dimensions |
72
|
|
|
*/ |
73
|
16 |
|
public static function distributionLeftTail($value, $degrees, $cumulative) |
74
|
|
|
{ |
75
|
16 |
|
if (is_array($value) || is_array($degrees) || is_array($cumulative)) { |
76
|
16 |
|
return self::evaluateArrayArguments([self::class, __FUNCTION__], $value, $degrees, $cumulative); |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
try { |
80
|
16 |
|
$value = DistributionValidations::validateFloat($value); |
81
|
15 |
|
$degrees = DistributionValidations::validateInt($degrees); |
82
|
14 |
|
$cumulative = DistributionValidations::validateBool($cumulative); |
83
|
3 |
|
} catch (Exception $e) { |
84
|
3 |
|
return $e->getMessage(); |
85
|
|
|
} |
86
|
|
|
|
87
|
13 |
|
if ($degrees < 1) { |
88
|
1 |
|
return ExcelError::NAN(); |
89
|
|
|
} |
90
|
12 |
|
if ($value < 0) { |
91
|
1 |
|
if (Functions::getCompatibilityMode() == Functions::COMPATIBILITY_GNUMERIC) { |
92
|
|
|
return 1; |
93
|
|
|
} |
94
|
|
|
|
95
|
1 |
|
return ExcelError::NAN(); |
96
|
|
|
} |
97
|
|
|
|
98
|
11 |
|
if ($cumulative === true) { |
99
|
5 |
|
return 1 - self::distributionRightTail($value, $degrees); |
100
|
|
|
} |
101
|
|
|
|
102
|
6 |
|
return ($value ** (($degrees / 2) - 1) * exp(-$value / 2)) / |
103
|
6 |
|
((2 ** ($degrees / 2)) * Gamma::gammaValue($degrees / 2)); |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* CHIINV. |
108
|
|
|
* |
109
|
|
|
* Returns the inverse of the right-tailed probability of the chi-squared distribution. |
110
|
|
|
* |
111
|
|
|
* @param mixed $probability Float probability at which you want to evaluate the distribution |
112
|
|
|
* Or can be an array of values |
113
|
|
|
* @param mixed $degrees Integer degrees of freedom |
114
|
|
|
* Or can be an array of values |
115
|
|
|
* |
116
|
|
|
* @return array|float|string |
117
|
|
|
* If an array of numbers is passed as an argument, then the returned result will also be an array |
118
|
|
|
* with the same dimensions |
119
|
|
|
*/ |
120
|
17 |
|
public static function inverseRightTail($probability, $degrees) |
121
|
|
|
{ |
122
|
17 |
|
if (is_array($probability) || is_array($degrees)) { |
123
|
16 |
|
return self::evaluateArrayArguments([self::class, __FUNCTION__], $probability, $degrees); |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
try { |
127
|
17 |
|
$probability = DistributionValidations::validateProbability($probability); |
128
|
14 |
|
$degrees = DistributionValidations::validateInt($degrees); |
129
|
4 |
|
} catch (Exception $e) { |
130
|
4 |
|
return $e->getMessage(); |
131
|
|
|
} |
132
|
|
|
|
133
|
13 |
|
if ($degrees < 1) { |
134
|
1 |
|
return ExcelError::NAN(); |
135
|
|
|
} |
136
|
|
|
|
137
|
12 |
|
$callback = function ($value) use ($degrees) { |
138
|
12 |
|
return 1 - (Gamma::incompleteGamma($degrees / 2, $value / 2) |
139
|
12 |
|
/ Gamma::gammaValue($degrees / 2)); |
140
|
12 |
|
}; |
141
|
|
|
|
142
|
12 |
|
$newtonRaphson = new NewtonRaphson($callback); |
143
|
|
|
|
144
|
12 |
|
return $newtonRaphson->execute($probability); |
145
|
|
|
} |
146
|
|
|
|
147
|
|
|
/** |
148
|
|
|
* CHIINV. |
149
|
|
|
* |
150
|
|
|
* Returns the inverse of the left-tailed probability of the chi-squared distribution. |
151
|
|
|
* |
152
|
|
|
* @param mixed $probability Float probability at which you want to evaluate the distribution |
153
|
|
|
* Or can be an array of values |
154
|
|
|
* @param mixed $degrees Integer degrees of freedom |
155
|
|
|
* Or can be an array of values |
156
|
|
|
* |
157
|
|
|
* @return array|float|string |
158
|
|
|
* If an array of numbers is passed as an argument, then the returned result will also be an array |
159
|
|
|
* with the same dimensions |
160
|
|
|
*/ |
161
|
16 |
|
public static function inverseLeftTail($probability, $degrees) |
162
|
|
|
{ |
163
|
16 |
|
if (is_array($probability) || is_array($degrees)) { |
164
|
16 |
|
return self::evaluateArrayArguments([self::class, __FUNCTION__], $probability, $degrees); |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
try { |
168
|
16 |
|
$probability = DistributionValidations::validateProbability($probability); |
169
|
13 |
|
$degrees = DistributionValidations::validateInt($degrees); |
170
|
4 |
|
} catch (Exception $e) { |
171
|
4 |
|
return $e->getMessage(); |
172
|
|
|
} |
173
|
|
|
|
174
|
12 |
|
if ($degrees < 1) { |
175
|
1 |
|
return ExcelError::NAN(); |
176
|
|
|
} |
177
|
|
|
|
178
|
11 |
|
return self::inverseLeftTailCalculation($probability, $degrees); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* CHITEST. |
183
|
|
|
* |
184
|
|
|
* Uses the chi-square test to calculate the probability that the differences between two supplied data sets |
185
|
|
|
* (of observed and expected frequencies), are likely to be simply due to sampling error, |
186
|
|
|
* or if they are likely to be real. |
187
|
|
|
* |
188
|
|
|
* @param mixed $actual an array of observed frequencies |
189
|
|
|
* @param mixed $expected an array of expected frequencies |
190
|
|
|
* |
191
|
|
|
* @return float|string |
192
|
|
|
*/ |
193
|
7 |
|
public static function test($actual, $expected) |
194
|
|
|
{ |
195
|
7 |
|
$rows = count($actual); |
196
|
7 |
|
$actual = Functions::flattenArray($actual); |
197
|
7 |
|
$expected = Functions::flattenArray($expected); |
198
|
7 |
|
$columns = intdiv(count($actual), $rows); |
199
|
|
|
|
200
|
7 |
|
$countActuals = count($actual); |
201
|
7 |
|
$countExpected = count($expected); |
202
|
7 |
|
if ($countActuals !== $countExpected || $countActuals === 1) { |
203
|
1 |
|
return ExcelError::NAN(); |
204
|
|
|
} |
205
|
|
|
|
206
|
6 |
|
$result = 0.0; |
207
|
6 |
|
for ($i = 0; $i < $countActuals; ++$i) { |
208
|
6 |
|
if ($expected[$i] == 0.0) { |
209
|
1 |
|
return ExcelError::DIV0(); |
210
|
6 |
|
} elseif ($expected[$i] < 0.0) { |
211
|
1 |
|
return ExcelError::NAN(); |
212
|
|
|
} |
213
|
6 |
|
$result += (($actual[$i] - $expected[$i]) ** 2) / $expected[$i]; |
214
|
|
|
} |
215
|
|
|
|
216
|
4 |
|
$degrees = self::degrees($rows, $columns); |
217
|
|
|
|
218
|
4 |
|
$result = Functions::scalar(self::distributionRightTail($result, $degrees)); |
219
|
|
|
|
220
|
4 |
|
return $result; |
221
|
|
|
} |
222
|
|
|
|
223
|
4 |
|
protected static function degrees(int $rows, int $columns): int |
224
|
|
|
{ |
225
|
4 |
|
if ($rows === 1) { |
226
|
1 |
|
return $columns - 1; |
227
|
3 |
|
} elseif ($columns === 1) { |
228
|
1 |
|
return $rows - 1; |
229
|
|
|
} |
230
|
|
|
|
231
|
2 |
|
return ($columns - 1) * ($rows - 1); |
232
|
|
|
} |
233
|
|
|
|
234
|
11 |
|
private static function inverseLeftTailCalculation(float $probability, int $degrees): float |
235
|
|
|
{ |
236
|
|
|
// bracket the root |
237
|
11 |
|
$min = 0; |
238
|
11 |
|
$sd = sqrt(2.0 * $degrees); |
239
|
11 |
|
$max = 2 * $sd; |
240
|
11 |
|
$s = -1; |
241
|
|
|
|
242
|
11 |
|
while ($s * self::pchisq($max, $degrees) > $probability * $s) { |
243
|
3 |
|
$min = $max; |
244
|
3 |
|
$max += 2 * $sd; |
245
|
|
|
} |
246
|
|
|
|
247
|
|
|
// Find root using bisection |
248
|
11 |
|
$chi2 = 0.5 * ($min + $max); |
249
|
|
|
|
250
|
11 |
|
while (($max - $min) > self::EPS * $chi2) { |
251
|
11 |
|
if ($s * self::pchisq($chi2, $degrees) > $probability * $s) { |
252
|
11 |
|
$min = $chi2; |
253
|
|
|
} else { |
254
|
11 |
|
$max = $chi2; |
255
|
|
|
} |
256
|
11 |
|
$chi2 = 0.5 * ($min + $max); |
257
|
|
|
} |
258
|
|
|
|
259
|
11 |
|
return $chi2; |
260
|
|
|
} |
261
|
|
|
|
262
|
11 |
|
private static function pchisq(float $chi2, int $degrees): float |
263
|
|
|
{ |
264
|
11 |
|
return self::gammp($degrees, 0.5 * $chi2); |
265
|
|
|
} |
266
|
|
|
|
267
|
11 |
|
private static function gammp(int $n, float $x): float |
268
|
|
|
{ |
269
|
11 |
|
if ($x < 0.5 * $n + 1) { |
270
|
11 |
|
return self::gser($n, $x); |
271
|
|
|
} |
272
|
|
|
|
273
|
6 |
|
return 1 - self::gcf($n, $x); |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
// Return the incomplete gamma function P(n/2,x) evaluated by |
277
|
|
|
// series representation. Algorithm from numerical recipe. |
278
|
|
|
// Assume that n is a positive integer and x>0, won't check arguments. |
279
|
|
|
// Relative error controlled by the eps parameter |
280
|
11 |
|
private static function gser(int $n, float $x): float |
281
|
|
|
{ |
282
|
|
|
/** @var float */ |
283
|
11 |
|
$gln = Gamma::ln($n / 2); |
284
|
11 |
|
$a = 0.5 * $n; |
285
|
11 |
|
$ap = $a; |
286
|
11 |
|
$sum = 1.0 / $a; |
287
|
11 |
|
$del = $sum; |
288
|
11 |
|
for ($i = 1; $i < 101; ++$i) { |
289
|
11 |
|
++$ap; |
290
|
11 |
|
$del = $del * $x / $ap; |
291
|
11 |
|
$sum += $del; |
292
|
11 |
|
if ($del < $sum * self::EPS) { |
293
|
11 |
|
break; |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
|
297
|
11 |
|
return $sum * exp(-$x + $a * log($x) - $gln); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
// Return the incomplete gamma function Q(n/2,x) evaluated by |
301
|
|
|
// its continued fraction representation. Algorithm from numerical recipe. |
302
|
|
|
// Assume that n is a postive integer and x>0, won't check arguments. |
303
|
|
|
// Relative error controlled by the eps parameter |
304
|
6 |
|
private static function gcf(int $n, float $x): float |
305
|
|
|
{ |
306
|
|
|
/** @var float */ |
307
|
6 |
|
$gln = Gamma::ln($n / 2); |
308
|
6 |
|
$a = 0.5 * $n; |
309
|
6 |
|
$b = $x + 1 - $a; |
310
|
6 |
|
$fpmin = 1.e-300; |
311
|
6 |
|
$c = 1 / $fpmin; |
312
|
6 |
|
$d = 1 / $b; |
313
|
6 |
|
$h = $d; |
314
|
6 |
|
for ($i = 1; $i < 101; ++$i) { |
315
|
6 |
|
$an = -$i * ($i - $a); |
316
|
6 |
|
$b += 2; |
317
|
6 |
|
$d = $an * $d + $b; |
318
|
6 |
|
if (abs($d) < $fpmin) { |
319
|
|
|
$d = $fpmin; |
320
|
|
|
} |
321
|
6 |
|
$c = $b + $an / $c; |
322
|
6 |
|
if (abs($c) < $fpmin) { |
323
|
|
|
$c = $fpmin; |
324
|
|
|
} |
325
|
6 |
|
$d = 1 / $d; |
326
|
6 |
|
$del = $d * $c; |
327
|
6 |
|
$h = $h * $del; |
328
|
6 |
|
if (abs($del - 1) < self::EPS) { |
329
|
6 |
|
break; |
330
|
|
|
} |
331
|
|
|
} |
332
|
|
|
|
333
|
6 |
|
return $h * exp(-$x + $a * log($x) - $gln); |
334
|
|
|
} |
335
|
|
|
} |
336
|
|
|
|