Total Complexity | 231 |
Total Lines | 1698 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Stats often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Stats, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class Stats |
||
13 | { |
||
14 | |||
15 | // Constants for defining the statistics to calculate |
||
16 | /** |
||
17 | * STATS_BASIC to generate the basic descriptive statistics |
||
18 | */ |
||
19 | const STATS_BASIC = 1; |
||
20 | /** |
||
21 | * STATS_FULL to generate also higher moments, mode, median, etc. |
||
22 | */ |
||
23 | const STATS_FULL = 2; |
||
24 | |||
25 | // Constants describing the data set format |
||
26 | /** |
||
27 | * STATS_DATA_SIMPLE for an array of numeric values. This is the default. |
||
28 | * e.g. $data = array(2,3,4,5,1,1,6); |
||
29 | */ |
||
30 | const STATS_DATA_SIMPLE = 0; |
||
31 | /** |
||
32 | * STATS_DATA_CUMMULATIVE for an associative array of frequency values, |
||
33 | * where in each array entry, the index is the data point and the |
||
34 | * value the count (frequency): |
||
35 | * e.g. $data = array(3=>4, 2.3=>5, 1.25=>6, 0.5=>3) |
||
36 | */ |
||
37 | const STATS_DATA_CUMMULATIVE = 1; |
||
38 | |||
39 | // Constants defining how to handle nulls |
||
40 | /** |
||
41 | * STATS_REJECT_NULL, reject data sets with null values. This is the default. |
||
42 | * Any non-numeric value is considered a null in this context. |
||
43 | */ |
||
44 | const STATS_REJECT_NULL = -1; |
||
45 | /** |
||
46 | * STATS_IGNORE_NULL, ignore null values and prune them from the data. |
||
47 | * Any non-numeric value is considered a null in this context. |
||
48 | */ |
||
49 | const STATS_IGNORE_NULL = -2; |
||
50 | /** |
||
51 | * STATS_USE_NULL_AS_ZERO, assign the value of 0 (zero) to null values. |
||
52 | * Any non-numeric value is considered a null in this context. |
||
53 | */ |
||
54 | const STATS_USE_NULL_AS_ZERO = -3; |
||
55 | |||
56 | // properties |
||
57 | |||
58 | /** |
||
59 | * The simple or cummulative data set. |
||
60 | * Null by default. |
||
61 | * |
||
62 | * @access private |
||
63 | * @var array |
||
64 | */ |
||
65 | private $_data = null; |
||
66 | |||
67 | /** |
||
68 | * Expanded data set. Only set when cummulative data |
||
69 | * is being used. Null by default. |
||
70 | * |
||
71 | * @access private |
||
72 | * @var array |
||
73 | */ |
||
74 | private $_dataExpanded = null; |
||
75 | |||
76 | /** |
||
77 | * Flag for data type, one of STATS_DATA_SIMPLE or |
||
78 | * STATS_DATA_CUMMULATIVE. Null by default. |
||
79 | * |
||
80 | * @access private |
||
81 | * @var int |
||
82 | */ |
||
83 | private $_dataOption = null; |
||
84 | |||
85 | /** |
||
86 | * Flag for null handling options. One of STATS_REJECT_NULL, |
||
87 | * STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO |
||
88 | * |
||
89 | * @access private |
||
90 | * @var int |
||
91 | */ |
||
92 | private $_nullOption; |
||
93 | |||
94 | /** |
||
95 | * Array for caching result values, should be reset |
||
96 | * when using setData() |
||
97 | * |
||
98 | * @access private |
||
99 | * @var array |
||
100 | */ |
||
101 | private $_calculatedValues = array(); |
||
102 | |||
103 | /** |
||
104 | * Constructor for the class |
||
105 | * |
||
106 | * @access public |
||
107 | * @param optional int $nullOption how to handle null values |
||
108 | * @return object Math_Stats |
||
109 | */ |
||
110 | public function __construct($nullOption = self::STATS_REJECT_NULL) |
||
111 | { |
||
112 | $this->_nullOption = $nullOption; |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * Sets and verifies the data, checking for nulls and using |
||
117 | * the current null handling option |
||
118 | * |
||
119 | * @access public |
||
120 | * @param array $arr the data set |
||
121 | * @param optional int $opt data format: STATS_DATA_CUMMULATIVE or STATS_DATA_SIMPLE (default) |
||
122 | * @return mixed true on success, a PEAR_Error object otherwise |
||
123 | */ |
||
124 | public function setData($arr, $opt = self::STATS_DATA_SIMPLE) |
||
125 | { |
||
126 | if (!is_array($arr)) { |
||
|
|||
127 | throw new \PEAR_Exception('invalid data, an array of numeric data was expected'); |
||
128 | } |
||
129 | $this->_data = null; |
||
130 | $this->_dataExpanded = null; |
||
131 | $this->_dataOption = null; |
||
132 | $this->_calculatedValues = array(); |
||
133 | if ($opt == self::STATS_DATA_SIMPLE) { |
||
134 | $this->_dataOption = $opt; |
||
135 | $this->_data = array_values($arr); |
||
136 | } elseif ($opt == self::STATS_DATA_CUMMULATIVE) { |
||
137 | $this->_dataOption = $opt; |
||
138 | $this->_data = $arr; |
||
139 | $this->_dataExpanded = array(); |
||
140 | } |
||
141 | return $this->_validate(); |
||
142 | } |
||
143 | |||
144 | /** |
||
145 | * Returns the data which might have been modified |
||
146 | * according to the current null handling options. |
||
147 | * |
||
148 | * @access public |
||
149 | * @param boolean $expanded whether to return a expanded list, default is false |
||
150 | * @return mixed array of data on success, a PEAR_Error object otherwise |
||
151 | * @see _validate() |
||
152 | */ |
||
153 | public function getData($expanded = false) |
||
154 | { |
||
155 | if ($this->_data == null) { |
||
156 | throw new \PEAR_Exception('data has not been set'); |
||
157 | } |
||
158 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE && $expanded) { |
||
159 | return $this->_dataExpanded; |
||
160 | } else { |
||
161 | return $this->_data; |
||
162 | } |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * Sets the null handling option. |
||
167 | * Must be called before assigning a new data set containing null values |
||
168 | * |
||
169 | * @access public |
||
170 | * @return mixed true on success, a PEAR_Error object otherwise |
||
171 | * @see _validate() |
||
172 | */ |
||
173 | public function setNullOption($nullOption) |
||
174 | { |
||
175 | if ($nullOption == self::STATS_REJECT_NULL |
||
176 | || $nullOption == self::STATS_IGNORE_NULL |
||
177 | || $nullOption == self::STATS_USE_NULL_AS_ZERO) { |
||
178 | $this->_nullOption = $nullOption; |
||
179 | return true; |
||
180 | } else { |
||
181 | throw new \PEAR_Exception('invalid null handling option expecting: ' . |
||
182 | 'STATS_REJECT_NULL, STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO'); |
||
183 | } |
||
184 | } |
||
185 | |||
186 | /** |
||
187 | * Transforms the data by substracting each entry from the mean and |
||
188 | * dividing by its standard deviation. This will reset all pre-calculated |
||
189 | * values to their original (unset) defaults. |
||
190 | * |
||
191 | * @access public |
||
192 | * @return mixed true on success, a PEAR_Error object otherwise |
||
193 | * @see mean() |
||
194 | * @see stDev() |
||
195 | * @see setData() |
||
196 | */ |
||
197 | public function studentize() |
||
198 | { |
||
199 | try { |
||
200 | $mean = $this->mean(); |
||
201 | } catch (\PEAR_Exception $e) { |
||
202 | return $mean; |
||
203 | } |
||
204 | try { |
||
205 | $std = $this->stDev(); |
||
206 | } catch (\PEAR_Exception $e) { |
||
207 | return $std; |
||
208 | } |
||
209 | if ($std == 0) { |
||
210 | throw new \PEAR_Exception('cannot studentize data, standard deviation is zero.'); |
||
211 | } |
||
212 | $arr = array(); |
||
213 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
214 | foreach ($this->_data as $val => $freq) { |
||
215 | $newval = ($val - $mean) / $std; |
||
216 | $arr["$newval"] = $freq; |
||
217 | } |
||
218 | } else { |
||
219 | foreach ($this->_data as $val) { |
||
220 | $newval = ($val - $mean) / $std; |
||
221 | $arr[] = $newval; |
||
222 | } |
||
223 | } |
||
224 | return $this->setData($arr, $this->_dataOption); |
||
225 | } |
||
226 | |||
227 | /** |
||
228 | * Transforms the data by substracting each entry from the mean. |
||
229 | * This will reset all pre-calculated values to their original (unset) defaults. |
||
230 | * |
||
231 | * @access public |
||
232 | * @return mixed true on success, a PEAR_Error object otherwise |
||
233 | * @see mean() |
||
234 | * @see setData() |
||
235 | */ |
||
236 | public function center() |
||
237 | { |
||
238 | try { |
||
239 | $mean = $this->mean(); |
||
240 | } catch (\PEAR_Exception $e) { |
||
241 | return $mean; |
||
242 | } |
||
243 | $arr = array(); |
||
244 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
245 | foreach ($this->_data as $val => $freq) { |
||
246 | $newval = $val - $mean; |
||
247 | $arr["$newval"] = $freq; |
||
248 | } |
||
249 | } else { |
||
250 | foreach ($this->_data as $val) { |
||
251 | $newval = $val - $mean; |
||
252 | $arr[] = $newval; |
||
253 | } |
||
254 | } |
||
255 | return $this->setData($arr, $this->_dataOption); |
||
256 | } |
||
257 | |||
258 | /** |
||
259 | * Calculates the basic or full statistics for the data set |
||
260 | * |
||
261 | * @access public |
||
262 | * @param int $mode one of STATS_BASIC or STATS_FULL |
||
263 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
264 | * or only the error message will be returned (when false), if an error happens. |
||
265 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
266 | * @see calcBasic() |
||
267 | * @see calcFull() |
||
268 | */ |
||
269 | public function calc($mode, $returnErrorObject = true) |
||
270 | { |
||
271 | if ($this->_data == null) { |
||
272 | throw new \PEAR_Exception('data has not been set'); |
||
273 | } |
||
274 | |||
275 | if ($mode == self::STATS_BASIC) { |
||
276 | return $this->calcBasic($returnErrorObject); |
||
277 | } elseif ($mode == self::STATS_FULL) { |
||
278 | return $this->calcFull($returnErrorObject); |
||
279 | } else { |
||
280 | throw new \PEAR_Exception('incorrect mode, expected STATS_BASIC or STATS_FULL'); |
||
281 | } |
||
282 | } |
||
283 | |||
284 | /** |
||
285 | * Calculates a basic set of statistics |
||
286 | * |
||
287 | * @access public |
||
288 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
289 | * or only the error message will be returned (when false), if an error happens. |
||
290 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
291 | * @see calc() |
||
292 | * @see calcFull() |
||
293 | */ |
||
294 | public function calcBasic($returnErrorObject = true) |
||
295 | { |
||
296 | return array( |
||
297 | 'min' => $this->__format($this->min(), $returnErrorObject), |
||
298 | 'max' => $this->__format($this->max(), $returnErrorObject), |
||
299 | 'sum' => $this->__format($this->sum(), $returnErrorObject), |
||
300 | 'sum2' => $this->__format($this->sum2(), $returnErrorObject), |
||
301 | 'count' => $this->__format($this->count(), $returnErrorObject), |
||
302 | 'mean' => $this->__format($this->mean(), $returnErrorObject), |
||
303 | 'stdev' => $this->__format($this->stDev(), $returnErrorObject), |
||
304 | 'variance' => $this->__format($this->variance(), $returnErrorObject), |
||
305 | 'range' => $this->__format($this->range(), $returnErrorObject), |
||
306 | ); |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Calculates a full set of statistics |
||
311 | * |
||
312 | * @access public |
||
313 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
314 | * or only the error message will be returned (when false), if an error happens. |
||
315 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
316 | * @see calc() |
||
317 | * @see calcBasic() |
||
318 | */ |
||
319 | public function calcFull($returnErrorObject = true) |
||
320 | { |
||
321 | return array( |
||
322 | 'min' => $this->__format($this->min(), $returnErrorObject), |
||
323 | 'max' => $this->__format($this->max(), $returnErrorObject), |
||
324 | 'sum' => $this->__format($this->sum(), $returnErrorObject), |
||
325 | 'sum2' => $this->__format($this->sum2(), $returnErrorObject), |
||
326 | 'count' => $this->__format($this->count(), $returnErrorObject), |
||
327 | 'mean' => $this->__format($this->mean(), $returnErrorObject), |
||
328 | 'median' => $this->__format($this->median(), $returnErrorObject), |
||
329 | 'mode' => $this->__format($this->mode(), $returnErrorObject), |
||
330 | 'midrange' => $this->__format($this->midrange(), $returnErrorObject), |
||
331 | 'geometric_mean' => $this->__format($this->geometricMean(), $returnErrorObject), |
||
332 | 'harmonic_mean' => $this->__format($this->harmonicMean(), $returnErrorObject), |
||
333 | 'stdev' => $this->__format($this->stDev(), $returnErrorObject), |
||
334 | 'absdev' => $this->__format($this->absDev(), $returnErrorObject), |
||
335 | 'variance' => $this->__format($this->variance(), $returnErrorObject), |
||
336 | 'range' => $this->__format($this->range(), $returnErrorObject), |
||
337 | 'std_error_of_mean' => $this->__format($this->stdErrorOfMean(), $returnErrorObject), |
||
338 | 'skewness' => $this->__format($this->skewness(), $returnErrorObject), |
||
339 | 'kurtosis' => $this->__format($this->kurtosis(), $returnErrorObject), |
||
340 | 'coeff_of_variation' => $this->__format($this->coeffOfVariation(), $returnErrorObject), |
||
341 | 'sample_central_moments' => array( |
||
342 | 1 => $this->__format($this->sampleCentralMoment(1), $returnErrorObject), |
||
343 | 2 => $this->__format($this->sampleCentralMoment(2), $returnErrorObject), |
||
344 | 3 => $this->__format($this->sampleCentralMoment(3), $returnErrorObject), |
||
345 | 4 => $this->__format($this->sampleCentralMoment(4), $returnErrorObject), |
||
346 | 5 => $this->__format($this->sampleCentralMoment(5), $returnErrorObject), |
||
347 | ), |
||
348 | 'sample_raw_moments' => array( |
||
349 | 1 => $this->__format($this->sampleRawMoment(1), $returnErrorObject), |
||
350 | 2 => $this->__format($this->sampleRawMoment(2), $returnErrorObject), |
||
351 | 3 => $this->__format($this->sampleRawMoment(3), $returnErrorObject), |
||
352 | 4 => $this->__format($this->sampleRawMoment(4), $returnErrorObject), |
||
353 | 5 => $this->__format($this->sampleRawMoment(5), $returnErrorObject), |
||
354 | ), |
||
355 | 'frequency' => $this->__format($this->frequency(), $returnErrorObject), |
||
356 | 'quartiles' => $this->__format($this->quartiles(), $returnErrorObject), |
||
357 | 'interquartile_range' => $this->__format($this->interquartileRange(), $returnErrorObject), |
||
358 | 'interquartile_mean' => $this->__format($this->interquartileMean(), $returnErrorObject), |
||
359 | 'quartile_deviation' => $this->__format($this->quartileDeviation(), $returnErrorObject), |
||
360 | 'quartile_variation_coefficient' => $this->__format($this->quartileVariationCoefficient(), $returnErrorObject), |
||
361 | 'quartile_skewness_coefficient' => $this->__format($this->quartileSkewnessCoefficient(), $returnErrorObject), |
||
362 | ); |
||
363 | } |
||
364 | |||
365 | /** |
||
366 | * Calculates the minimum of a data set. |
||
367 | * Handles cummulative data sets correctly$this->_data[0] |
||
368 | * |
||
369 | * @access public |
||
370 | * @return mixed the minimum value on success, a PEAR_Error object otherwise |
||
371 | * @see calc() |
||
372 | * @see max() |
||
373 | */ |
||
374 | public function min() |
||
375 | { |
||
376 | if ($this->_data == null) { |
||
377 | throw new \PEAR_Exception('data has not been set'); |
||
378 | } |
||
379 | |||
380 | if (!array_key_exists('min', $this->_calculatedValues)) { |
||
381 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
382 | $min = min(array_keys($this->_data)); |
||
383 | } else { |
||
384 | $min = min($this->_data); |
||
385 | } |
||
386 | |||
387 | $this->_calculatedValues['min'] = $min; |
||
388 | } |
||
389 | |||
390 | return $this->_calculatedValues['min']; |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Calculates the maximum of a data set. |
||
395 | * Handles cummulative data sets correctly |
||
396 | * |
||
397 | * @access public |
||
398 | * @return mixed the maximum value on success, a PEAR_Error object otherwise |
||
399 | * @see calc() |
||
400 | * @see min() |
||
401 | */ |
||
402 | public function max() |
||
403 | { |
||
404 | if ($this->_data == null) { |
||
405 | throw new \PEAR_Exception('data has not been set'); |
||
406 | } |
||
407 | if (!array_key_exists('max', $this->_calculatedValues)) { |
||
408 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
409 | $max = max(array_keys($this->_data)); |
||
410 | } else { |
||
411 | $max = max($this->_data); |
||
412 | } |
||
413 | $this->_calculatedValues['max'] = $max; |
||
414 | } |
||
415 | return $this->_calculatedValues['max']; |
||
416 | } |
||
417 | |||
418 | /** |
||
419 | * Calculates SUM { xi } |
||
420 | * Handles cummulative data sets correctly |
||
421 | * |
||
422 | * @access public |
||
423 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
424 | * @see calc() |
||
425 | * @see sum2() |
||
426 | * @see sumN() |
||
427 | */ |
||
428 | public function sum() |
||
429 | { |
||
430 | if (!array_key_exists('sum', $this->_calculatedValues)) { |
||
431 | try { |
||
432 | $sum = $this->sumN(1); |
||
433 | $this->_calculatedValues['sum'] = $sum; |
||
434 | } catch (\PEAR_Exception $e) { |
||
435 | return $sum; |
||
436 | } |
||
437 | } |
||
438 | return $this->_calculatedValues['sum']; |
||
439 | } |
||
440 | |||
441 | /** |
||
442 | * Calculates SUM { (xi)^2 } |
||
443 | * Handles cummulative data sets correctly |
||
444 | * |
||
445 | * @access public |
||
446 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
447 | * @see calc() |
||
448 | * @see sum() |
||
449 | * @see sumN() |
||
450 | */ |
||
451 | public function sum2() |
||
452 | { |
||
453 | if (!array_key_exists('sum2', $this->_calculatedValues)) { |
||
454 | try { |
||
455 | $sum2 = $this->sumN(2); |
||
456 | $this->_calculatedValues['sum2'] = $sum2; |
||
457 | } catch (\PEAR_Exception $e) { |
||
458 | return $sum2; |
||
459 | } |
||
460 | } |
||
461 | return $this->_calculatedValues['sum2']; |
||
462 | } |
||
463 | |||
464 | /** |
||
465 | * Calculates SUM { (xi)^n } |
||
466 | * Handles cummulative data sets correctly |
||
467 | * |
||
468 | * @access public |
||
469 | * @param numeric $n the exponent |
||
470 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
471 | * @see calc() |
||
472 | * @see sum() |
||
473 | * @see sum2() |
||
474 | */ |
||
475 | public function sumN($n) |
||
476 | { |
||
477 | if ($this->_data == null) { |
||
478 | throw new \PEAR_Exception('data has not been set'); |
||
479 | } |
||
480 | $sumN = 0; |
||
481 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
482 | foreach ($this->_data as $val => $freq) { |
||
483 | $sumN += $freq * pow((double) $val, (double) $n); |
||
484 | } |
||
485 | } else { |
||
486 | foreach ($this->_data as $val) { |
||
487 | $sumN += pow((double) $val, (double) $n); |
||
488 | } |
||
489 | } |
||
490 | return $sumN; |
||
491 | } |
||
492 | |||
493 | /** |
||
494 | * Calculates PROD { (xi) }, (the product of all observations) |
||
495 | * Handles cummulative data sets correctly |
||
496 | * |
||
497 | * @access public |
||
498 | * @return numeric|array|PEAR_Error the product as a number or an array of numbers |
||
499 | * (if there is numeric overflow) on success, |
||
500 | * a PEAR_Error object otherwise |
||
501 | * @see productN() |
||
502 | */ |
||
503 | public function product() |
||
504 | { |
||
505 | if (!array_key_exists('product', $this->_calculatedValues)) { |
||
506 | try { |
||
507 | $product = $this->productN(1); |
||
508 | $this->_calculatedValues['product'] = $product; |
||
509 | } catch (\PEAR_Exception $e) { |
||
510 | return $product; |
||
511 | } |
||
512 | } |
||
513 | return $this->_calculatedValues['product']; |
||
514 | } |
||
515 | |||
516 | /** |
||
517 | * Calculates PROD { (xi)^n }, which is the product of all observations |
||
518 | * Handles cummulative data sets correctly |
||
519 | * |
||
520 | * @access public |
||
521 | * @param numeric $n the exponent |
||
522 | * @return numeric|array|PEAR_Error the product as a number or an array of numbers |
||
523 | * (if there is numeric overflow) on success, |
||
524 | * a PEAR_Error object otherwise |
||
525 | * @see product() |
||
526 | */ |
||
527 | public function productN($n) |
||
528 | { |
||
529 | if ($this->_data == null) { |
||
530 | throw new \PEAR_Exception('data has not been set'); |
||
531 | } |
||
532 | $prodN = 1.0; |
||
533 | $partial = array(); |
||
534 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
535 | foreach ($this->_data as $val => $freq) { |
||
536 | if ($val == 0) { |
||
537 | return 0.0; |
||
538 | } |
||
539 | $prodN *= $freq * pow((double) $val, (double) $n); |
||
540 | if ($prodN > 10000 * $n) { |
||
541 | $partial[] = $prodN; |
||
542 | $prodN = 1.0; |
||
543 | } |
||
544 | } |
||
545 | } else { |
||
546 | foreach ($this->_data as $val) { |
||
547 | if ($val == 0) { |
||
548 | return 0.0; |
||
549 | } |
||
550 | $prodN *= pow((double) $val, (double) $n); |
||
551 | if ($prodN > 10 * $n) { |
||
552 | $partial[] = $prodN; |
||
553 | $prodN = 1.0; |
||
554 | } |
||
555 | } |
||
556 | } |
||
557 | if (!empty($partial)) { |
||
558 | $partial[] = $prodN; |
||
559 | // try to reduce to a single value |
||
560 | $tmp = 1.0; |
||
561 | foreach ($partial as $val) { |
||
562 | $tmp *= $val; |
||
563 | // cannot reduce, return an array |
||
564 | if (is_infinite($tmp)) { |
||
565 | return $partial; |
||
566 | } |
||
567 | } |
||
568 | return $tmp; |
||
569 | } else { |
||
570 | return $prodN; |
||
571 | } |
||
572 | } |
||
573 | |||
574 | /** |
||
575 | * Calculates the number of data points in the set |
||
576 | * Handles cummulative data sets correctly |
||
577 | * |
||
578 | * @access public |
||
579 | * @return mixed the count on success, a PEAR_Error object otherwise |
||
580 | * @see calc() |
||
581 | */ |
||
582 | public function count() |
||
583 | { |
||
584 | if ($this->_data == null) { |
||
585 | throw new \PEAR_Exception('data has not been set'); |
||
586 | } |
||
587 | if (!array_key_exists('count', $this->_calculatedValues)) { |
||
588 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
589 | $count = count($this->_dataExpanded); |
||
590 | } else { |
||
591 | $count = count($this->_data); |
||
592 | } |
||
593 | $this->_calculatedValues['count'] = $count; |
||
594 | } |
||
595 | return $this->_calculatedValues['count']; |
||
596 | } |
||
597 | |||
598 | /** |
||
599 | * Calculates the mean (average) of the data points in the set |
||
600 | * Handles cummulative data sets correctly |
||
601 | * |
||
602 | * @access public |
||
603 | * @return mixed the mean value on success, a PEAR_Error object otherwise |
||
604 | * @see calc() |
||
605 | * @see sum() |
||
606 | * @see count() |
||
607 | */ |
||
608 | public function mean() |
||
609 | { |
||
610 | if (!array_key_exists('mean', $this->_calculatedValues)) { |
||
611 | try { |
||
612 | $sum = $this->sum(); |
||
613 | try { |
||
614 | $count = $this->count(); |
||
615 | } catch (\PEAR_Exception $e) { |
||
616 | return $count; |
||
617 | } |
||
618 | $this->_calculatedValues['mean'] = $sum / $count; |
||
619 | } catch (\PEAR_Exception $e) { |
||
620 | return $sum; |
||
621 | } |
||
622 | } |
||
623 | return $this->_calculatedValues['mean']; |
||
624 | } |
||
625 | |||
626 | /** |
||
627 | * Calculates the range of the data set = max - min |
||
628 | * |
||
629 | * @access public |
||
630 | * @return mixed the value of the range on success, a PEAR_Error object otherwise. |
||
631 | */ |
||
632 | public function range() |
||
633 | { |
||
634 | if (!array_key_exists('range', $this->_calculatedValues)) { |
||
635 | try { |
||
636 | $min = $this->min(); |
||
637 | try { |
||
638 | $max = $this->max(); |
||
639 | } catch (\PEAR_Exception $e) { |
||
640 | return $max; |
||
641 | } |
||
642 | $this->_calculatedValues['range'] = $max - $min; |
||
643 | } catch (\PEAR_Exception $e) { |
||
644 | return $min; |
||
645 | } |
||
646 | } |
||
647 | return $this->_calculatedValues['range']; |
||
648 | } |
||
649 | |||
650 | /** |
||
651 | * Calculates the variance (unbiased) of the data points in the set |
||
652 | * Handles cummulative data sets correctly |
||
653 | * |
||
654 | * @access public |
||
655 | * @return mixed the variance value on success, a PEAR_Error object otherwise |
||
656 | * @see calc() |
||
657 | * @see __sumdiff() |
||
658 | * @see count() |
||
659 | */ |
||
660 | public function variance() |
||
661 | { |
||
662 | if (!array_key_exists('variance', $this->_calculatedValues)) { |
||
663 | try { |
||
664 | $variance = $this->__calcVariance(); |
||
665 | } catch (\PEAR_Exception $e) { |
||
666 | return $variance; |
||
667 | } |
||
668 | |||
669 | $this->_calculatedValues['variance'] = $variance; |
||
670 | } |
||
671 | return $this->_calculatedValues['variance']; |
||
672 | } |
||
673 | |||
674 | /** |
||
675 | * Calculates the standard deviation (unbiased) of the data points in the set |
||
676 | * Handles cummulative data sets correctly |
||
677 | * |
||
678 | * @access public |
||
679 | * @return mixed the standard deviation on success, a PEAR_Error object otherwise |
||
680 | * @see calc() |
||
681 | * @see variance() |
||
682 | */ |
||
683 | public function stDev() |
||
684 | { |
||
685 | if (!array_key_exists('stDev', $this->_calculatedValues)) { |
||
686 | try { |
||
687 | $variance = $this->variance(); |
||
688 | } catch (\PEAR_Exception $e) { |
||
689 | return $variance; |
||
690 | } |
||
691 | |||
692 | $this->_calculatedValues['stDev'] = sqrt($variance); |
||
693 | } |
||
694 | return $this->_calculatedValues['stDev']; |
||
695 | } |
||
696 | |||
697 | /** |
||
698 | * Calculates the variance (unbiased) of the data points in the set |
||
699 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
700 | * or calc(). |
||
701 | * Handles cummulative data sets correctly |
||
702 | * |
||
703 | * @access public |
||
704 | * @param numeric $mean the fixed mean value |
||
705 | * @return mixed the variance on success, a PEAR_Error object otherwise |
||
706 | * @see __sumdiff() |
||
707 | * @see count() |
||
708 | * @see variance() |
||
709 | */ |
||
710 | public function varianceWithMean($mean) |
||
711 | { |
||
712 | return $this->__calcVariance($mean); |
||
713 | } |
||
714 | |||
715 | /** |
||
716 | * Calculates the standard deviation (unbiased) of the data points in the set |
||
717 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
718 | * or calc(). |
||
719 | * Handles cummulative data sets correctly |
||
720 | * |
||
721 | * @access public |
||
722 | * @param numeric $mean the fixed mean value |
||
723 | * @return mixed the standard deviation on success, a PEAR_Error object otherwise |
||
724 | * @see varianceWithMean() |
||
725 | * @see stDev() |
||
726 | */ |
||
727 | public function stDevWithMean($mean) |
||
728 | { |
||
729 | try { |
||
730 | $varianceWM = $this->varianceWithMean($mean); |
||
731 | } catch (\PEAR_Exception $e) { |
||
732 | return $varianceWM; |
||
733 | } |
||
734 | |||
735 | return sqrt($varianceWM); |
||
736 | } |
||
737 | |||
738 | /** |
||
739 | * Calculates the absolute deviation of the data points in the set |
||
740 | * Handles cummulative data sets correctly |
||
741 | * |
||
742 | * @access public |
||
743 | * @return mixed the absolute deviation on success, a PEAR_Error object otherwise |
||
744 | * @see calc() |
||
745 | * @see __sumabsdev() |
||
746 | * @see count() |
||
747 | * @see absDevWithMean() |
||
748 | */ |
||
749 | public function absDev() |
||
750 | { |
||
751 | if (!array_key_exists('absDev', $this->_calculatedValues)) { |
||
752 | try { |
||
753 | $absDev = $this->__calcAbsoluteDeviation(); |
||
754 | } catch (\PEAR_Exception $e) { |
||
755 | return $absDev; |
||
756 | } |
||
757 | |||
758 | $this->_calculatedValues['absDev'] = $absDev; |
||
759 | } |
||
760 | return $this->_calculatedValues['absDev']; |
||
761 | } |
||
762 | |||
763 | /** |
||
764 | * Calculates the absolute deviation of the data points in the set |
||
765 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
766 | * or calc(). |
||
767 | * Handles cummulative data sets correctly |
||
768 | * |
||
769 | * @access public |
||
770 | * @param numeric $mean the fixed mean value |
||
771 | * @return mixed the absolute deviation on success, a PEAR_Error object otherwise |
||
772 | * @see __sumabsdev() |
||
773 | * @see absDev() |
||
774 | */ |
||
775 | public function absDevWithMean($mean) |
||
776 | { |
||
777 | return $this->__calcAbsoluteDeviation($mean); |
||
778 | } |
||
779 | |||
780 | /** |
||
781 | * Calculates the skewness of the data distribution in the set |
||
782 | * The skewness measures the degree of asymmetry of a distribution, |
||
783 | * and is related to the third central moment of a distribution. |
||
784 | * A normal distribution has a skewness = 0 |
||
785 | * A distribution with a tail off towards the high end of the scale |
||
786 | * (positive skew) has a skewness > 0 |
||
787 | * A distribution with a tail off towards the low end of the scale |
||
788 | * (negative skew) has a skewness < 0 |
||
789 | * Handles cummulative data sets correctly |
||
790 | * |
||
791 | * @access public |
||
792 | * @return mixed the skewness value on success, a PEAR_Error object otherwise |
||
793 | * @see __sumdiff() |
||
794 | * @see count() |
||
795 | * @see stDev() |
||
796 | * @see calc() |
||
797 | */ |
||
798 | public function skewness() |
||
799 | { |
||
800 | if (!array_key_exists('skewness', $this->_calculatedValues)) { |
||
801 | try { |
||
802 | $count = $this->count(); |
||
803 | try { |
||
804 | $stDev = $this->stDev(); |
||
805 | try { |
||
806 | $sumdiff3 = $this->__sumdiff(3); |
||
807 | } catch (\PEAR_Exception $e) { |
||
808 | return $sumdiff3; |
||
809 | } |
||
810 | } catch (\PEAR_Exception $e) { |
||
811 | return $stDev; |
||
812 | } |
||
813 | } catch (\PEAR_Exception $e) { |
||
814 | return $count; |
||
815 | } |
||
816 | |||
817 | $this->_calculatedValues['skewness'] = ($sumdiff3 / ($count * pow($stDev, 3))); |
||
818 | } |
||
819 | return $this->_calculatedValues['skewness']; |
||
820 | } |
||
821 | |||
822 | /** |
||
823 | * Calculates the kurtosis of the data distribution in the set |
||
824 | * The kurtosis measures the degrees of peakedness of a distribution. |
||
825 | * It is also called the "excess" or "excess coefficient", and is |
||
826 | * a normalized form of the fourth central moment of a distribution. |
||
827 | * A normal distributions has kurtosis = 0 |
||
828 | * A narrow and peaked (leptokurtic) distribution has a |
||
829 | * kurtosis > 0 |
||
830 | * A flat and wide (platykurtic) distribution has a kurtosis < 0 |
||
831 | * Handles cummulative data sets correctly |
||
832 | * |
||
833 | * @access public |
||
834 | * @return mixed the kurtosis value on success, a PEAR_Error object otherwise |
||
835 | * @see __sumdiff() |
||
836 | * @see count() |
||
837 | * @see stDev() |
||
838 | * @see calc() |
||
839 | */ |
||
840 | public function kurtosis() |
||
841 | { |
||
842 | if (!array_key_exists('kurtosis', $this->_calculatedValues)) { |
||
843 | try { |
||
844 | $count = $this->count(); |
||
845 | try { |
||
846 | $stDev = $this->stDev(); |
||
847 | try { |
||
848 | $sumdiff4 = $this->__sumdiff(4); |
||
849 | } catch (\PEAR_Exception $e) { |
||
850 | return $sumdiff4; |
||
851 | } |
||
852 | } catch (\PEAR_Exception $e) { |
||
853 | return $stDev; |
||
854 | } |
||
855 | } catch (\PEAR_Exception $e) { |
||
856 | return $count; |
||
857 | } |
||
858 | |||
859 | $this->_calculatedValues['kurtosis'] = ($sumdiff4 / ($count * pow($stDev, 4))) - 3; |
||
860 | } |
||
861 | return $this->_calculatedValues['kurtosis']; |
||
862 | } |
||
863 | |||
864 | /** |
||
865 | * Calculates the median of a data set. |
||
866 | * The median is the value such that half of the points are below it |
||
867 | * in a sorted data set. |
||
868 | * If the number of values is odd, it is the middle item. |
||
869 | * If the number of values is even, is the average of the two middle items. |
||
870 | * Handles cummulative data sets correctly |
||
871 | * |
||
872 | * @access public |
||
873 | * @return mixed the median value on success, a PEAR_Error object otherwise |
||
874 | * @see count() |
||
875 | * @see calc() |
||
876 | */ |
||
877 | public function median() |
||
878 | { |
||
879 | if ($this->_data == null) { |
||
880 | throw new \PEAR_Exception('data has not been set'); |
||
881 | } |
||
882 | if (!array_key_exists('median', $this->_calculatedValues)) { |
||
883 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
884 | $arr = &$this->_dataExpanded; |
||
885 | } else { |
||
886 | $arr = &$this->_data; |
||
887 | } |
||
888 | try { |
||
889 | $n = $this->count(); |
||
890 | } catch (\PEAR_Exception $e) { |
||
891 | return $n; |
||
892 | } |
||
893 | |||
894 | $h = intval($n / 2); |
||
895 | if ($n % 2 == 0) { |
||
896 | $median = ($arr[$h] + $arr[$h - 1]) / 2; |
||
897 | } else { |
||
898 | $median = $arr[$h]; |
||
899 | } |
||
900 | $this->_calculatedValues['median'] = $median; |
||
901 | } |
||
902 | return $this->_calculatedValues['median']; |
||
903 | } |
||
904 | |||
905 | /** |
||
906 | * Calculates the mode of a data set. |
||
907 | * The mode is the value with the highest frequency in the data set. |
||
908 | * There can be more than one mode. |
||
909 | * Handles cummulative data sets correctly |
||
910 | * |
||
911 | * @access public |
||
912 | * @return mixed an array of mode value on success, a PEAR_Error object otherwise |
||
913 | * @see frequency() |
||
914 | * @see calc() |
||
915 | */ |
||
916 | public function mode() |
||
917 | { |
||
918 | if ($this->_data == null) { |
||
919 | throw new \PEAR_Exception('data has not been set'); |
||
920 | } |
||
921 | if (!array_key_exists('mode', $this->_calculatedValues)) { |
||
922 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
923 | $arr = $this->_data; |
||
924 | } else { |
||
925 | $arr = $this->frequency(); |
||
926 | } |
||
927 | arsort($arr); |
||
928 | $mcount = 1; |
||
929 | foreach ($arr as $val => $freq) { |
||
930 | if ($mcount == 1) { |
||
931 | $mode = array($val); |
||
932 | $mfreq = $freq; |
||
933 | $mcount++; |
||
934 | continue; |
||
935 | } |
||
936 | if ($mfreq == $freq) { |
||
937 | $mode[] = $val; |
||
938 | } |
||
939 | |||
940 | if ($mfreq > $freq) { |
||
941 | break; |
||
942 | } |
||
943 | } |
||
944 | $this->_calculatedValues['mode'] = $mode; |
||
945 | } |
||
946 | return $this->_calculatedValues['mode']; |
||
947 | } |
||
948 | |||
949 | /** |
||
950 | * Calculates the midrange of a data set. |
||
951 | * The midrange is the average of the minimum and maximum of the data set. |
||
952 | * Handles cummulative data sets correctly |
||
953 | * |
||
954 | * @access public |
||
955 | * @return mixed the midrange value on success, a PEAR_Error object otherwise |
||
956 | * @see min() |
||
957 | * @see max() |
||
958 | * @see calc() |
||
959 | */ |
||
960 | public function midrange() |
||
961 | { |
||
962 | if (!array_key_exists('midrange', $this->_calculatedValues)) { |
||
963 | try { |
||
964 | $min = $this->min(); |
||
965 | try { |
||
966 | $max = $this->max(); |
||
967 | } catch (\PEAR_Exception $e) { |
||
968 | return $max; |
||
969 | } |
||
970 | } catch (\PEAR_Exception $e) { |
||
971 | return $min; |
||
972 | } |
||
973 | |||
974 | $this->_calculatedValues['midrange'] = (($max + $min) / 2); |
||
975 | } |
||
976 | return $this->_calculatedValues['midrange']; |
||
977 | } |
||
978 | |||
979 | /** |
||
980 | * Calculates the geometrical mean of the data points in the set |
||
981 | * Handles cummulative data sets correctly |
||
982 | * |
||
983 | * @access public |
||
984 | * @return mixed the geometrical mean value on success, a PEAR_Error object otherwise |
||
985 | * @see calc() |
||
986 | * @see product() |
||
987 | * @see count() |
||
988 | */ |
||
989 | public function geometricMean() |
||
990 | { |
||
991 | if (!array_key_exists('geometricMean', $this->_calculatedValues)) { |
||
992 | try { |
||
993 | $count = $this->count(); |
||
994 | } catch (\PEAR_Exception $e) { |
||
995 | return $count; |
||
996 | } |
||
997 | try { |
||
998 | $prod = $this->product(); |
||
999 | } catch (\PEAR_Exception $e) { |
||
1000 | return $prod; |
||
1001 | } |
||
1002 | if (is_array($prod)) { |
||
1003 | $geomMean = 1.0; |
||
1004 | foreach ($prod as $val) { |
||
1005 | $geomMean *= pow($val, 1 / $count); |
||
1006 | } |
||
1007 | $this->_calculatedValues['geometricMean'] = $geomMean; |
||
1008 | } else { |
||
1009 | if ($prod == 0.0) { |
||
1010 | return 0.0; |
||
1011 | } |
||
1012 | if ($prod < 0) { |
||
1013 | throw new \PEAR_Exception('The product of the data set is negative, geometric mean undefined.'); |
||
1014 | } |
||
1015 | $this->_calculatedValues['geometricMean'] = pow($prod, 1 / $count); |
||
1016 | } |
||
1017 | } |
||
1018 | return $this->_calculatedValues['geometricMean']; |
||
1019 | } |
||
1020 | |||
1021 | /** |
||
1022 | * Calculates the harmonic mean of the data points in the set |
||
1023 | * Handles cummulative data sets correctly |
||
1024 | * |
||
1025 | * @access public |
||
1026 | * @return mixed the harmonic mean value on success, a PEAR_Error object otherwise |
||
1027 | * @see calc() |
||
1028 | * @see count() |
||
1029 | */ |
||
1030 | public function harmonicMean() |
||
1031 | { |
||
1032 | if ($this->_data == null) { |
||
1033 | throw new \PEAR_Exception('data has not been set'); |
||
1034 | } |
||
1035 | if (!array_key_exists('harmonicMean', $this->_calculatedValues)) { |
||
1036 | try { |
||
1037 | $count = $this->count(); |
||
1038 | } catch (\PEAR_Exception $e) { |
||
1039 | return $count; |
||
1040 | } |
||
1041 | $invsum = 0.0; |
||
1042 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1043 | foreach ($this->_data as $val => $freq) { |
||
1044 | if ($val == 0) { |
||
1045 | throw new \PEAR_Exception('cannot calculate a ' . |
||
1046 | 'harmonic mean with data values of zero.'); |
||
1047 | } |
||
1048 | $invsum += $freq / $val; |
||
1049 | } |
||
1050 | } else { |
||
1051 | foreach ($this->_data as $val) { |
||
1052 | if ($val == 0) { |
||
1053 | throw new \PEAR_Exception('cannot calculate a ' . |
||
1054 | 'harmonic mean with data values of zero.'); |
||
1055 | } |
||
1056 | $invsum += 1 / $val; |
||
1057 | } |
||
1058 | } |
||
1059 | $this->_calculatedValues['harmonicMean'] = $count / $invsum; |
||
1060 | } |
||
1061 | return $this->_calculatedValues['harmonicMean']; |
||
1062 | } |
||
1063 | |||
1064 | /** |
||
1065 | * Calculates the nth central moment (m{n}) of a data set. |
||
1066 | * |
||
1067 | * The definition of a sample central moment is: |
||
1068 | * |
||
1069 | * m{n} = 1/N * SUM { (xi - avg)^n } |
||
1070 | * |
||
1071 | * where: N = sample size, avg = sample mean. |
||
1072 | * |
||
1073 | * @access public |
||
1074 | * @param integer $n moment to calculate |
||
1075 | * @return mixed the numeric value of the moment on success, PEAR_Error otherwise |
||
1076 | */ |
||
1077 | public function sampleCentralMoment($n) |
||
1078 | { |
||
1079 | if (!is_int($n) || $n < 1) { |
||
1080 | throw new \PEAR_Exception('moment must be a positive integer >= 1.'); |
||
1081 | } |
||
1082 | |||
1083 | if ($n == 1) { |
||
1084 | return 0; |
||
1085 | } |
||
1086 | try { |
||
1087 | $count = $this->count(); |
||
1088 | } catch (\PEAR_Exception $e) { |
||
1089 | return $count; |
||
1090 | } |
||
1091 | if ($count == 0) { |
||
1092 | throw new \PEAR_Exception("Cannot calculate {$n}th sample moment, " . |
||
1093 | 'there are zero data entries'); |
||
1094 | } |
||
1095 | try { |
||
1096 | $sum = $this->__sumdiff($n); |
||
1097 | } catch (\PEAR_Exception $e) { |
||
1098 | return $sum; |
||
1099 | } |
||
1100 | return ($sum / $count); |
||
1101 | } |
||
1102 | |||
1103 | /** |
||
1104 | * Calculates the nth raw moment (m{n}) of a data set. |
||
1105 | * |
||
1106 | * The definition of a sample central moment is: |
||
1107 | * |
||
1108 | * m{n} = 1/N * SUM { xi^n } |
||
1109 | * |
||
1110 | * where: N = sample size, avg = sample mean. |
||
1111 | * |
||
1112 | * @access public |
||
1113 | * @param integer $n moment to calculate |
||
1114 | * @return mixed the numeric value of the moment on success, PEAR_Error otherwise |
||
1115 | */ |
||
1116 | public function sampleRawMoment($n) |
||
1117 | { |
||
1118 | if (!is_int($n) || $n < 1) { |
||
1119 | throw new \PEAR_Exception('moment must be a positive integer >= 1.'); |
||
1120 | } |
||
1121 | |||
1122 | try { |
||
1123 | $count = $this->count(); |
||
1124 | } catch (\PEAR_Exception $e) { |
||
1125 | return $count; |
||
1126 | } |
||
1127 | if ($count == 0) { |
||
1128 | throw new \PEAR_Exception("Cannot calculate {$n}th raw moment, " . |
||
1129 | 'there are zero data entries.'); |
||
1130 | } |
||
1131 | try { |
||
1132 | $sum = $this->sumN($n); |
||
1133 | } catch (\PEAR_Exception $e) { |
||
1134 | return $sum; |
||
1135 | } |
||
1136 | return ($sum / $count); |
||
1137 | } |
||
1138 | |||
1139 | /** |
||
1140 | * Calculates the coefficient of variation of a data set. |
||
1141 | * The coefficient of variation measures the spread of a set of data |
||
1142 | * as a proportion of its mean. It is often expressed as a percentage. |
||
1143 | * Handles cummulative data sets correctly |
||
1144 | * |
||
1145 | * @access public |
||
1146 | * @return mixed the coefficient of variation on success, a PEAR_Error object otherwise |
||
1147 | * @see stDev() |
||
1148 | * @see mean() |
||
1149 | * @see calc() |
||
1150 | */ |
||
1151 | public function coeffOfVariation() |
||
1152 | { |
||
1153 | if (!array_key_exists('coeffOfVariation', $this->_calculatedValues)) { |
||
1154 | try { |
||
1155 | $mean = $this->mean(); |
||
1156 | } catch (\PEAR_Exception $e) { |
||
1157 | return $mean; |
||
1158 | } |
||
1159 | |||
1160 | if ($mean == 0.0) { |
||
1161 | throw new \PEAR_Exception('cannot calculate the coefficient ' . |
||
1162 | 'of variation, mean of sample is zero'); |
||
1163 | } |
||
1164 | try { |
||
1165 | $stDev = $this->stDev(); |
||
1166 | } catch (\PEAR_Exception $e) { |
||
1167 | return $stDev; |
||
1168 | } |
||
1169 | |||
1170 | $this->_calculatedValues['coeffOfVariation'] = $stDev / $mean; |
||
1171 | } |
||
1172 | return $this->_calculatedValues['coeffOfVariation']; |
||
1173 | } |
||
1174 | |||
1175 | /** |
||
1176 | * Calculates the standard error of the mean. |
||
1177 | * It is the standard deviation of the sampling distribution of |
||
1178 | * the mean. The formula is: |
||
1179 | * |
||
1180 | * S.E. Mean = SD / (N)^(1/2) |
||
1181 | * |
||
1182 | * This formula does not assume a normal distribution, and shows |
||
1183 | * that the size of the standard error of the mean is inversely |
||
1184 | * proportional to the square root of the sample size. |
||
1185 | * |
||
1186 | * @access public |
||
1187 | * @return mixed the standard error of the mean on success, a PEAR_Error object otherwise |
||
1188 | * @see stDev() |
||
1189 | * @see count() |
||
1190 | * @see calc() |
||
1191 | */ |
||
1192 | public function stdErrorOfMean() |
||
1193 | { |
||
1194 | if (!array_key_exists('stdErrorOfMean', $this->_calculatedValues)) { |
||
1195 | try { |
||
1196 | $count = $this->count(); |
||
1197 | } catch (\PEAR_Exception $e) { |
||
1198 | return $count; |
||
1199 | } |
||
1200 | try { |
||
1201 | $stDev = $this->stDev(); |
||
1202 | } catch (\PEAR_Exception $e) { |
||
1203 | return $stDev; |
||
1204 | } |
||
1205 | $this->_calculatedValues['stdErrorOfMean'] = $stDev / sqrt($count); |
||
1206 | } |
||
1207 | return $this->_calculatedValues['stdErrorOfMean']; |
||
1208 | } |
||
1209 | |||
1210 | /** |
||
1211 | * Calculates the value frequency table of a data set. |
||
1212 | * Handles cummulative data sets correctly |
||
1213 | * |
||
1214 | * @access public |
||
1215 | * @return mixed an associative array of value=>frequency items on success, a PEAR_Error object otherwise |
||
1216 | * @see min() |
||
1217 | * @see max() |
||
1218 | * @see calc() |
||
1219 | */ |
||
1220 | public function frequency() |
||
1221 | { |
||
1222 | if ($this->_data == null) { |
||
1223 | throw new \PEAR_Exception('data has not been set'); |
||
1224 | } |
||
1225 | if (!array_key_exists('frequency', $this->_calculatedValues)) { |
||
1226 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1227 | $freq = $this->_data; |
||
1228 | } else { |
||
1229 | $freq = array(); |
||
1230 | foreach ($this->_data as $val) { |
||
1231 | if (!isset($freq["$val"])) { |
||
1232 | $freq["$val"] = 0; |
||
1233 | } |
||
1234 | $freq["$val"]++; |
||
1235 | } |
||
1236 | ksort($freq); |
||
1237 | } |
||
1238 | $this->_calculatedValues['frequency'] = $freq; |
||
1239 | } |
||
1240 | return $this->_calculatedValues['frequency']; |
||
1241 | } |
||
1242 | |||
1243 | /** |
||
1244 | * The quartiles are defined as the values that divide a sorted |
||
1245 | * data set into four equal-sized subsets, and correspond to the |
||
1246 | * 25th, 50th, and 75th percentiles. |
||
1247 | * |
||
1248 | * @access public |
||
1249 | * @return mixed an associative array of quartiles on success, a PEAR_Error otherwise |
||
1250 | * @see percentile() |
||
1251 | */ |
||
1252 | public function quartiles() |
||
1253 | { |
||
1254 | if (!array_key_exists('quartiles', $this->_calculatedValues)) { |
||
1255 | try { |
||
1256 | $q1 = $this->percentile(25); |
||
1257 | try { |
||
1258 | $q2 = $this->percentile(50); |
||
1259 | try { |
||
1260 | $q3 = $this->percentile(75); |
||
1261 | } catch (\PEAR_Exception $e) { |
||
1262 | return $q3; |
||
1263 | } |
||
1264 | } catch (\PEAR_Exception $e) { |
||
1265 | return $q2; |
||
1266 | } |
||
1267 | } catch (\PEAR_Exception $e) { |
||
1268 | return $q1; |
||
1269 | } |
||
1270 | |||
1271 | $this->_calculatedValues['quartiles'] = array( |
||
1272 | '25' => $q1, |
||
1273 | '50' => $q2, |
||
1274 | '75' => $q3, |
||
1275 | ); |
||
1276 | } |
||
1277 | return $this->_calculatedValues['quartiles']; |
||
1278 | } |
||
1279 | |||
1280 | /** |
||
1281 | * The interquartile mean is defined as the mean of the values left |
||
1282 | * after discarding the lower 25% and top 25% ranked values, i.e.: |
||
1283 | * |
||
1284 | * interquart mean = mean(<P(25),P(75)>) |
||
1285 | * |
||
1286 | * where: P = percentile |
||
1287 | * |
||
1288 | * @todo need to double check the equation |
||
1289 | * @access public |
||
1290 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1291 | * @see quartiles() |
||
1292 | */ |
||
1293 | public function interquartileMean() |
||
1294 | { |
||
1295 | if (!array_key_exists('interquartileMean', $this->_calculatedValues)) { |
||
1296 | try { |
||
1297 | $quart = $this->quartiles(); |
||
1298 | } catch (\PEAR_Exception $e) { |
||
1299 | return $quart; |
||
1300 | } |
||
1301 | $q3 = $quart['75']; |
||
1302 | $q1 = $quart['25']; |
||
1303 | $sum = 0; |
||
1304 | $n = 0; |
||
1305 | foreach ($this->getData(true) as $val) { |
||
1306 | if ($val >= $q1 && $val <= $q3) { |
||
1307 | $sum += $val; |
||
1308 | $n++; |
||
1309 | } |
||
1310 | } |
||
1311 | if ($n == 0) { |
||
1312 | throw new \PEAR_Exception('error calculating interquartile mean, ' . |
||
1313 | 'empty interquartile range of values.'); |
||
1314 | } |
||
1315 | $this->_calculatedValues['interquartileMean'] = $sum / $n; |
||
1316 | } |
||
1317 | return $this->_calculatedValues['interquartileMean']; |
||
1318 | } |
||
1319 | |||
1320 | /** |
||
1321 | * The interquartile range is the distance between the 75th and 25th |
||
1322 | * percentiles. Basically the range of the middle 50% of the data set, |
||
1323 | * and thus is not affected by outliers or extreme values. |
||
1324 | * |
||
1325 | * interquart range = P(75) - P(25) |
||
1326 | * |
||
1327 | * where: P = percentile |
||
1328 | * |
||
1329 | * @access public |
||
1330 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1331 | * @see quartiles() |
||
1332 | */ |
||
1333 | public function interquartileRange() |
||
1346 | } |
||
1347 | |||
1348 | /** |
||
1349 | * The quartile deviation is half of the interquartile range value |
||
1350 | * |
||
1351 | * quart dev = (P(75) - P(25)) / 2 |
||
1352 | * |
||
1353 | * where: P = percentile |
||
1354 | * |
||
1355 | * @access public |
||
1356 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1357 | * @see quartiles() |
||
1358 | * @see interquartileRange() |
||
1359 | */ |
||
1360 | public function quartileDeviation() |
||
1361 | { |
||
1362 | if (!array_key_exists('quartileDeviation', $this->_calculatedValues)) { |
||
1363 | try { |
||
1364 | $iqr = $this->interquartileRange(); |
||
1365 | } catch (\PEAR_Exception $e) { |
||
1366 | return $iqr; |
||
1367 | } |
||
1368 | $this->_calculatedValues['quartileDeviation'] = $iqr / 2; |
||
1369 | } |
||
1370 | return $this->_calculatedValues['quartileDeviation']; |
||
1371 | } |
||
1372 | |||
1373 | /** |
||
1374 | * The quartile variation coefficient is defined as follows: |
||
1375 | * |
||
1376 | * quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25)) |
||
1377 | * |
||
1378 | * where: P = percentile |
||
1379 | * |
||
1380 | * @todo need to double check the equation |
||
1381 | * @access public |
||
1382 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1383 | * @see quartiles() |
||
1384 | */ |
||
1385 | public function quartileVariationCoefficient() |
||
1400 | } |
||
1401 | |||
1402 | /** |
||
1403 | * The quartile skewness coefficient (also known as Bowley Skewness), |
||
1404 | * is defined as follows: |
||
1405 | * |
||
1406 | * quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25)) |
||
1407 | * |
||
1408 | * where: P = percentile |
||
1409 | * |
||
1410 | * @todo need to double check the equation |
||
1411 | * @access public |
||
1412 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1413 | * @see quartiles() |
||
1414 | */ |
||
1415 | public function quartileSkewnessCoefficient() |
||
1416 | { |
||
1417 | if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) { |
||
1418 | try { |
||
1419 | $quart = $this->quartiles(); |
||
1420 | } catch (\PEAR_Exception $e) { |
||
1421 | return $quart; |
||
1422 | } |
||
1423 | $q3 = $quart['75']; |
||
1424 | $q2 = $quart['50']; |
||
1425 | $q1 = $quart['25']; |
||
1426 | $d = $q3 - 2 * $q2 + $q1; |
||
1427 | $s = $q3 - $q1; |
||
1428 | $this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s; |
||
1429 | } |
||
1430 | return $this->_calculatedValues['quartileSkewnessCoefficient']; |
||
1431 | } |
||
1432 | |||
1433 | /** |
||
1434 | * The pth percentile is the value such that p% of the a sorted data set |
||
1435 | * is smaller than it, and (100 - p)% of the data is larger. |
||
1436 | * |
||
1437 | * A quick algorithm to pick the appropriate value from a sorted data |
||
1438 | * set is as follows: |
||
1439 | * |
||
1440 | * - Count the number of values: n |
||
1441 | * - Calculate the position of the value in the data list: i = p * (n + 1) |
||
1442 | * - if i is an integer, return the data at that position |
||
1443 | * - if i < 1, return the minimum of the data set |
||
1444 | * - if i > n, return the maximum of the data set |
||
1445 | * - otherwise, average the entries at adjacent positions to i |
||
1446 | * |
||
1447 | * The median is the 50th percentile value. |
||
1448 | * |
||
1449 | * @todo need to double check generality of the algorithm |
||
1450 | * |
||
1451 | * @access public |
||
1452 | * @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile |
||
1453 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1454 | * @see quartiles() |
||
1455 | * @see median() |
||
1456 | */ |
||
1457 | public function percentile($p) |
||
1481 | } |
||
1482 | } |
||
1483 | |||
1484 | // private methods |
||
1485 | |||
1486 | /** |
||
1487 | * Utility function to calculate: SUM { (xi - mean)^n } |
||
1488 | * |
||
1489 | * @access private |
||
1490 | * @param numeric $power the exponent |
||
1491 | * @param optional double $mean the data set mean value |
||
1492 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
1493 | * |
||
1494 | * @see stDev() |
||
1495 | * @see variaceWithMean(); |
||
1496 | * @see skewness(); |
||
1497 | * @see kurtosis(); |
||
1498 | */ |
||
1499 | public function __sumdiff($power, $mean = null) |
||
1500 | { |
||
1501 | if ($this->_data == null) { |
||
1502 | throw new \PEAR_Exception('data has not been set'); |
||
1503 | } |
||
1504 | if (is_null($mean)) { |
||
1505 | try { |
||
1506 | $mean = $this->mean(); |
||
1507 | } catch (\PEAR_Exception $e) { |
||
1508 | return $mean; |
||
1509 | } |
||
1510 | } |
||
1511 | $sdiff = 0; |
||
1512 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1513 | foreach ($this->_data as $val => $freq) { |
||
1514 | $sdiff += $freq * pow((double) ($val - $mean), (double) $power); |
||
1515 | } |
||
1516 | } else { |
||
1517 | foreach ($this->_data as $val) { |
||
1518 | $sdiff += pow((double) ($val - $mean), (double) $power); |
||
1519 | } |
||
1520 | } |
||
1521 | return $sdiff; |
||
1522 | } |
||
1523 | |||
1524 | /** |
||
1525 | * Utility function to calculate the variance with or without |
||
1526 | * a fixed mean |
||
1527 | * |
||
1528 | * @access private |
||
1529 | * @param $mean the fixed mean to use, null as default |
||
1530 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1531 | * @see variance() |
||
1532 | * @see varianceWithMean() |
||
1533 | */ |
||
1534 | public function __calcVariance($mean = null) |
||
1535 | { |
||
1536 | if ($this->_data == null) { |
||
1537 | throw new \PEAR_Exception('data has not been set'); |
||
1538 | } |
||
1539 | try { |
||
1540 | $sumdiff2 = $this->__sumdiff(2, $mean); |
||
1541 | try { |
||
1542 | $count = $this->count(); |
||
1543 | } catch (\PEAR_Exception $e) { |
||
1544 | return $count; |
||
1545 | } |
||
1546 | } catch (\PEAR_Exception $e) { |
||
1547 | return $sumdiff2; |
||
1548 | } |
||
1549 | |||
1550 | if ($count == 1) { |
||
1551 | throw new \PEAR_Exception('cannot calculate variance of a singe data point'); |
||
1552 | } |
||
1553 | return ($sumdiff2 / ($count - 1)); |
||
1554 | } |
||
1555 | |||
1556 | /** |
||
1557 | * Utility function to calculate the absolute deviation with or without |
||
1558 | * a fixed mean |
||
1559 | * |
||
1560 | * @access private |
||
1561 | * @param $mean the fixed mean to use, null as default |
||
1562 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1563 | * @see absDev() |
||
1564 | * @see absDevWithMean() |
||
1565 | */ |
||
1566 | public function __calcAbsoluteDeviation($mean = null) |
||
1567 | { |
||
1568 | if ($this->_data == null) { |
||
1569 | throw new \PEAR_Exception('data has not been set'); |
||
1570 | } |
||
1571 | try { |
||
1572 | $count = $this->count(); |
||
1573 | try { |
||
1574 | $sumabsdev = $this->__sumabsdev($mean); |
||
1575 | } catch (\PEAR_Exception $e) { |
||
1576 | return $sumabsdev; |
||
1577 | } |
||
1578 | } catch (\PEAR_Exception $e) { |
||
1579 | return $count; |
||
1580 | } |
||
1581 | |||
1582 | return $sumabsdev / $count; |
||
1583 | } |
||
1584 | |||
1585 | /** |
||
1586 | * Utility function to calculate: SUM { | xi - mean | } |
||
1587 | * |
||
1588 | * @access private |
||
1589 | * @param optional double $mean the mean value for the set or population |
||
1590 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
1591 | * |
||
1592 | * @see absDev() |
||
1593 | * @see absDevWithMean() |
||
1594 | */ |
||
1595 | public function __sumabsdev($mean = null) |
||
1596 | { |
||
1597 | if ($this->_data == null) { |
||
1598 | throw new \PEAR_Exception('data has not been set'); |
||
1599 | } |
||
1600 | if (is_null($mean)) { |
||
1601 | $mean = $this->mean(); |
||
1602 | } |
||
1603 | $sdev = 0; |
||
1604 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1605 | foreach ($this->_data as $val => $freq) { |
||
1606 | $sdev += $freq * abs($val - $mean); |
||
1607 | } |
||
1608 | } else { |
||
1609 | foreach ($this->_data as $val) { |
||
1610 | $sdev += abs($val - $mean); |
||
1611 | } |
||
1612 | } |
||
1613 | return $sdev; |
||
1614 | } |
||
1615 | |||
1616 | /** |
||
1617 | * Utility function to format a PEAR_Error to be used by calc(), |
||
1618 | * calcBasic() and calcFull() |
||
1619 | * |
||
1620 | * @access private |
||
1621 | * @param mixed $v value to be formatted |
||
1622 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
1623 | * or only the error message will be returned (when false) |
||
1624 | * @return mixed if the value is a PEAR_Error object, and $useErrorObject |
||
1625 | * is false, then a string with the error message will be returned, |
||
1626 | * otherwise the value will not be modified and returned as passed. |
||
1627 | */ |
||
1628 | public function __format($v, $useErrorObject = true) |
||
1634 | } |
||
1635 | } |
||
1636 | |||
1637 | /** |
||
1638 | * Utility function to validate the data and modify it |
||
1639 | * according to the current null handling option |
||
1640 | * |
||
1641 | * @access private |
||
1642 | * @return mixed true on success, a PEAR_Error object otherwise |
||
1643 | * |
||
1644 | * @see setData() |
||
1645 | */ |
||
1646 | public function _validate() |
||
1647 | { |
||
1648 | $cummulativeData = ($this->_dataOption == self::STATS_DATA_CUMMULATIVE); |
||
1649 | foreach ($this->_data as $key => $value) { |
||
1650 | $d = ($cummulativeData) ? $key : $value; |
||
1651 | $v = ($cummulativeData) ? $value : $key; |
||
1652 | if (!is_numeric($d)) { |
||
1653 | switch ($this->_nullOption) { |
||
1654 | case self::STATS_IGNORE_NULL: |
||
1655 | unset($this->_data["$key"]); |
||
1656 | break; |
||
1657 | case self::STATS_USE_NULL_AS_ZERO: |
||
1658 | if ($cummulativeData) { |
||
1659 | unset($this->_data["$key"]); |
||
1660 | // TODO: shift up? |
||
1661 | if (!isset($this->_data[0])) { |
||
1662 | $this->_data[0] = 0; |
||
1663 | } |
||
1664 | $this->_data[0] += $v; |
||
1665 | } else { |
||
1666 | $this->_data[$key] = 0; |
||
1667 | } |
||
1668 | break; |
||
1669 | case self::STATS_REJECT_NULL: |
||
1670 | default: |
||
1671 | throw new \PEAR_Exception('data rejected, contains NULL values'); |
||
1672 | break; |
||
1710 | } |
||
1711 | } |
||
1712 | |||
1715 |