Total Complexity | 231 |
Total Lines | 1698 |
Duplicated Lines | 23.38 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Stats often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Stats, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class Stats |
||
13 | { |
||
14 | |||
15 | // Constants for defining the statistics to calculate |
||
16 | /** |
||
17 | * STATS_BASIC to generate the basic descriptive statistics |
||
18 | */ |
||
19 | const STATS_BASIC = 1; |
||
20 | /** |
||
21 | * STATS_FULL to generate also higher moments, mode, median, etc. |
||
22 | */ |
||
23 | const STATS_FULL = 2; |
||
24 | |||
25 | // Constants describing the data set format |
||
26 | /** |
||
27 | * STATS_DATA_SIMPLE for an array of numeric values. This is the default. |
||
28 | * e.g. $data = array(2,3,4,5,1,1,6); |
||
29 | */ |
||
30 | const STATS_DATA_SIMPLE = 0; |
||
31 | /** |
||
32 | * STATS_DATA_CUMMULATIVE for an associative array of frequency values, |
||
33 | * where in each array entry, the index is the data point and the |
||
34 | * value the count (frequency): |
||
35 | * e.g. $data = array(3=>4, 2.3=>5, 1.25=>6, 0.5=>3) |
||
36 | */ |
||
37 | const STATS_DATA_CUMMULATIVE = 1; |
||
38 | |||
39 | // Constants defining how to handle nulls |
||
40 | /** |
||
41 | * STATS_REJECT_NULL, reject data sets with null values. This is the default. |
||
42 | * Any non-numeric value is considered a null in this context. |
||
43 | */ |
||
44 | const STATS_REJECT_NULL = -1; |
||
45 | /** |
||
46 | * STATS_IGNORE_NULL, ignore null values and prune them from the data. |
||
47 | * Any non-numeric value is considered a null in this context. |
||
48 | */ |
||
49 | const STATS_IGNORE_NULL = -2; |
||
50 | /** |
||
51 | * STATS_USE_NULL_AS_ZERO, assign the value of 0 (zero) to null values. |
||
52 | * Any non-numeric value is considered a null in this context. |
||
53 | */ |
||
54 | const STATS_USE_NULL_AS_ZERO = -3; |
||
55 | |||
56 | // properties |
||
57 | |||
58 | /** |
||
59 | * The simple or cummulative data set. |
||
60 | * Null by default. |
||
61 | * |
||
62 | * @access private |
||
63 | * @var array |
||
64 | */ |
||
65 | private $_data = null; |
||
66 | |||
67 | /** |
||
68 | * Expanded data set. Only set when cummulative data |
||
69 | * is being used. Null by default. |
||
70 | * |
||
71 | * @access private |
||
72 | * @var array |
||
73 | */ |
||
74 | private $_dataExpanded = null; |
||
75 | |||
76 | /** |
||
77 | * Flag for data type, one of STATS_DATA_SIMPLE or |
||
78 | * STATS_DATA_CUMMULATIVE. Null by default. |
||
79 | * |
||
80 | * @access private |
||
81 | * @var int |
||
82 | */ |
||
83 | private $_dataOption = null; |
||
84 | |||
85 | /** |
||
86 | * Flag for null handling options. One of STATS_REJECT_NULL, |
||
87 | * STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO |
||
88 | * |
||
89 | * @access private |
||
90 | * @var int |
||
91 | */ |
||
92 | private $_nullOption; |
||
93 | |||
94 | /** |
||
95 | * Array for caching result values, should be reset |
||
96 | * when using setData() |
||
97 | * |
||
98 | * @access private |
||
99 | * @var array |
||
100 | */ |
||
101 | private $_calculatedValues = array(); |
||
102 | |||
103 | /** |
||
104 | * Constructor for the class |
||
105 | * |
||
106 | * @access public |
||
107 | * @param optional int $nullOption how to handle null values |
||
108 | * @return object Math_Stats |
||
109 | */ |
||
110 | public function __construct($nullOption = self::STATS_REJECT_NULL) |
||
111 | { |
||
112 | $this->_nullOption = $nullOption; |
||
113 | } |
||
114 | |||
115 | /** |
||
116 | * Sets and verifies the data, checking for nulls and using |
||
117 | * the current null handling option |
||
118 | * |
||
119 | * @access public |
||
120 | * @param array $arr the data set |
||
121 | * @param optional int $opt data format: STATS_DATA_CUMMULATIVE or STATS_DATA_SIMPLE (default) |
||
122 | * @return mixed true on success, a PEAR_Error object otherwise |
||
123 | */ |
||
124 | public function setData($arr, $opt = self::STATS_DATA_SIMPLE) |
||
125 | { |
||
126 | if (!is_array($arr)) { |
||
127 | throw new \PEAR_Exception('invalid data, an array of numeric data was expected'); |
||
128 | } |
||
129 | $this->_data = null; |
||
130 | $this->_dataExpanded = null; |
||
131 | $this->_dataOption = null; |
||
132 | $this->_calculatedValues = array(); |
||
133 | if ($opt == self::STATS_DATA_SIMPLE) { |
||
134 | $this->_dataOption = $opt; |
||
135 | $this->_data = array_values($arr); |
||
136 | } elseif ($opt == self::STATS_DATA_CUMMULATIVE) { |
||
137 | $this->_dataOption = $opt; |
||
138 | $this->_data = $arr; |
||
139 | $this->_dataExpanded = array(); |
||
140 | } |
||
141 | return $this->_validate(); |
||
142 | } |
||
143 | |||
144 | /** |
||
145 | * Returns the data which might have been modified |
||
146 | * according to the current null handling options. |
||
147 | * |
||
148 | * @access public |
||
149 | * @param boolean $expanded whether to return a expanded list, default is false |
||
150 | * @return mixed array of data on success, a PEAR_Error object otherwise |
||
151 | * @see _validate() |
||
152 | */ |
||
153 | public function getData($expanded = false) |
||
154 | { |
||
155 | if ($this->_data == null) { |
||
156 | throw new \PEAR_Exception('data has not been set'); |
||
157 | } |
||
158 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE && $expanded) { |
||
159 | return $this->_dataExpanded; |
||
160 | } else { |
||
161 | return $this->_data; |
||
162 | } |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * Sets the null handling option. |
||
167 | * Must be called before assigning a new data set containing null values |
||
168 | * |
||
169 | * @access public |
||
170 | * @return mixed true on success, a PEAR_Error object otherwise |
||
171 | * @see _validate() |
||
172 | */ |
||
173 | public function setNullOption($nullOption) |
||
174 | { |
||
175 | if ($nullOption == self::STATS_REJECT_NULL |
||
176 | || $nullOption == self::STATS_IGNORE_NULL |
||
177 | || $nullOption == self::STATS_USE_NULL_AS_ZERO) { |
||
178 | $this->_nullOption = $nullOption; |
||
179 | return true; |
||
180 | } else { |
||
181 | throw new \PEAR_Exception('invalid null handling option expecting: ' . |
||
182 | 'STATS_REJECT_NULL, STATS_IGNORE_NULL or STATS_USE_NULL_AS_ZERO'); |
||
183 | } |
||
184 | } |
||
185 | |||
186 | /** |
||
187 | * Transforms the data by substracting each entry from the mean and |
||
188 | * dividing by its standard deviation. This will reset all pre-calculated |
||
189 | * values to their original (unset) defaults. |
||
190 | * |
||
191 | * @access public |
||
192 | * @return mixed true on success, a PEAR_Error object otherwise |
||
193 | * @see mean() |
||
194 | * @see stDev() |
||
195 | * @see setData() |
||
196 | */ |
||
197 | public function studentize() |
||
198 | { |
||
199 | try { |
||
200 | $mean = $this->mean(); |
||
201 | } catch (\PEAR_Exception $e) { |
||
202 | return $mean; |
||
203 | } |
||
204 | try { |
||
205 | $std = $this->stDev(); |
||
206 | } catch (\PEAR_Exception $e) { |
||
207 | return $std; |
||
208 | } |
||
209 | if ($std == 0) { |
||
210 | throw new \PEAR_Exception('cannot studentize data, standard deviation is zero.'); |
||
211 | } |
||
212 | $arr = array(); |
||
213 | View Code Duplication | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
|
|
|||
214 | foreach ($this->_data as $val => $freq) { |
||
215 | $newval = ($val - $mean) / $std; |
||
216 | $arr["$newval"] = $freq; |
||
217 | } |
||
218 | } else { |
||
219 | foreach ($this->_data as $val) { |
||
220 | $newval = ($val - $mean) / $std; |
||
221 | $arr[] = $newval; |
||
222 | } |
||
223 | } |
||
224 | return $this->setData($arr, $this->_dataOption); |
||
225 | } |
||
226 | |||
227 | /** |
||
228 | * Transforms the data by substracting each entry from the mean. |
||
229 | * This will reset all pre-calculated values to their original (unset) defaults. |
||
230 | * |
||
231 | * @access public |
||
232 | * @return mixed true on success, a PEAR_Error object otherwise |
||
233 | * @see mean() |
||
234 | * @see setData() |
||
235 | */ |
||
236 | public function center() |
||
237 | { |
||
238 | try { |
||
239 | $mean = $this->mean(); |
||
240 | } catch (\PEAR_Exception $e) { |
||
241 | return $mean; |
||
242 | } |
||
243 | $arr = array(); |
||
244 | View Code Duplication | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
|
245 | foreach ($this->_data as $val => $freq) { |
||
246 | $newval = $val - $mean; |
||
247 | $arr["$newval"] = $freq; |
||
248 | } |
||
249 | } else { |
||
250 | foreach ($this->_data as $val) { |
||
251 | $newval = $val - $mean; |
||
252 | $arr[] = $newval; |
||
253 | } |
||
254 | } |
||
255 | return $this->setData($arr, $this->_dataOption); |
||
256 | } |
||
257 | |||
258 | /** |
||
259 | * Calculates the basic or full statistics for the data set |
||
260 | * |
||
261 | * @access public |
||
262 | * @param int $mode one of STATS_BASIC or STATS_FULL |
||
263 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
264 | * or only the error message will be returned (when false), if an error happens. |
||
265 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
266 | * @see calcBasic() |
||
267 | * @see calcFull() |
||
268 | */ |
||
269 | public function calc($mode, $returnErrorObject = true) |
||
270 | { |
||
271 | if ($this->_data == null) { |
||
272 | throw new \PEAR_Exception('data has not been set'); |
||
273 | } |
||
274 | |||
275 | if ($mode == self::STATS_BASIC) { |
||
276 | return $this->calcBasic($returnErrorObject); |
||
277 | } elseif ($mode == self::STATS_FULL) { |
||
278 | return $this->calcFull($returnErrorObject); |
||
279 | } else { |
||
280 | throw new \PEAR_Exception('incorrect mode, expected STATS_BASIC or STATS_FULL'); |
||
281 | } |
||
282 | } |
||
283 | |||
284 | /** |
||
285 | * Calculates a basic set of statistics |
||
286 | * |
||
287 | * @access public |
||
288 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
289 | * or only the error message will be returned (when false), if an error happens. |
||
290 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
291 | * @see calc() |
||
292 | * @see calcFull() |
||
293 | */ |
||
294 | public function calcBasic($returnErrorObject = true) |
||
295 | { |
||
296 | return array( |
||
297 | 'min' => $this->__format($this->min(), $returnErrorObject), |
||
298 | 'max' => $this->__format($this->max(), $returnErrorObject), |
||
299 | 'sum' => $this->__format($this->sum(), $returnErrorObject), |
||
300 | 'sum2' => $this->__format($this->sum2(), $returnErrorObject), |
||
301 | 'count' => $this->__format($this->count(), $returnErrorObject), |
||
302 | 'mean' => $this->__format($this->mean(), $returnErrorObject), |
||
303 | 'stdev' => $this->__format($this->stDev(), $returnErrorObject), |
||
304 | 'variance' => $this->__format($this->variance(), $returnErrorObject), |
||
305 | 'range' => $this->__format($this->range(), $returnErrorObject), |
||
306 | ); |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Calculates a full set of statistics |
||
311 | * |
||
312 | * @access public |
||
313 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
314 | * or only the error message will be returned (when false), if an error happens. |
||
315 | * @return mixed an associative array of statistics on success, a PEAR_Error object otherwise |
||
316 | * @see calc() |
||
317 | * @see calcBasic() |
||
318 | */ |
||
319 | public function calcFull($returnErrorObject = true) |
||
320 | { |
||
321 | return array( |
||
322 | 'min' => $this->__format($this->min(), $returnErrorObject), |
||
323 | 'max' => $this->__format($this->max(), $returnErrorObject), |
||
324 | 'sum' => $this->__format($this->sum(), $returnErrorObject), |
||
325 | 'sum2' => $this->__format($this->sum2(), $returnErrorObject), |
||
326 | 'count' => $this->__format($this->count(), $returnErrorObject), |
||
327 | 'mean' => $this->__format($this->mean(), $returnErrorObject), |
||
328 | 'median' => $this->__format($this->median(), $returnErrorObject), |
||
329 | 'mode' => $this->__format($this->mode(), $returnErrorObject), |
||
330 | 'midrange' => $this->__format($this->midrange(), $returnErrorObject), |
||
331 | 'geometric_mean' => $this->__format($this->geometricMean(), $returnErrorObject), |
||
332 | 'harmonic_mean' => $this->__format($this->harmonicMean(), $returnErrorObject), |
||
333 | 'stdev' => $this->__format($this->stDev(), $returnErrorObject), |
||
334 | 'absdev' => $this->__format($this->absDev(), $returnErrorObject), |
||
335 | 'variance' => $this->__format($this->variance(), $returnErrorObject), |
||
336 | 'range' => $this->__format($this->range(), $returnErrorObject), |
||
337 | 'std_error_of_mean' => $this->__format($this->stdErrorOfMean(), $returnErrorObject), |
||
338 | 'skewness' => $this->__format($this->skewness(), $returnErrorObject), |
||
339 | 'kurtosis' => $this->__format($this->kurtosis(), $returnErrorObject), |
||
340 | 'coeff_of_variation' => $this->__format($this->coeffOfVariation(), $returnErrorObject), |
||
341 | 'sample_central_moments' => array( |
||
342 | 1 => $this->__format($this->sampleCentralMoment(1), $returnErrorObject), |
||
343 | 2 => $this->__format($this->sampleCentralMoment(2), $returnErrorObject), |
||
344 | 3 => $this->__format($this->sampleCentralMoment(3), $returnErrorObject), |
||
345 | 4 => $this->__format($this->sampleCentralMoment(4), $returnErrorObject), |
||
346 | 5 => $this->__format($this->sampleCentralMoment(5), $returnErrorObject), |
||
347 | ), |
||
348 | 'sample_raw_moments' => array( |
||
349 | 1 => $this->__format($this->sampleRawMoment(1), $returnErrorObject), |
||
350 | 2 => $this->__format($this->sampleRawMoment(2), $returnErrorObject), |
||
351 | 3 => $this->__format($this->sampleRawMoment(3), $returnErrorObject), |
||
352 | 4 => $this->__format($this->sampleRawMoment(4), $returnErrorObject), |
||
353 | 5 => $this->__format($this->sampleRawMoment(5), $returnErrorObject), |
||
354 | ), |
||
355 | 'frequency' => $this->__format($this->frequency(), $returnErrorObject), |
||
356 | 'quartiles' => $this->__format($this->quartiles(), $returnErrorObject), |
||
357 | 'interquartile_range' => $this->__format($this->interquartileRange(), $returnErrorObject), |
||
358 | 'interquartile_mean' => $this->__format($this->interquartileMean(), $returnErrorObject), |
||
359 | 'quartile_deviation' => $this->__format($this->quartileDeviation(), $returnErrorObject), |
||
360 | 'quartile_variation_coefficient' => $this->__format($this->quartileVariationCoefficient(), $returnErrorObject), |
||
361 | 'quartile_skewness_coefficient' => $this->__format($this->quartileSkewnessCoefficient(), $returnErrorObject), |
||
362 | ); |
||
363 | } |
||
364 | |||
365 | /** |
||
366 | * Calculates the minimum of a data set. |
||
367 | * Handles cummulative data sets correctly$this->_data[0] |
||
368 | * |
||
369 | * @access public |
||
370 | * @return mixed the minimum value on success, a PEAR_Error object otherwise |
||
371 | * @see calc() |
||
372 | * @see max() |
||
373 | */ |
||
374 | View Code Duplication | public function min() |
|
375 | { |
||
376 | if ($this->_data == null) { |
||
377 | throw new \PEAR_Exception('data has not been set'); |
||
378 | } |
||
379 | |||
380 | if (!array_key_exists('min', $this->_calculatedValues)) { |
||
381 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
382 | $min = min(array_keys($this->_data)); |
||
383 | } else { |
||
384 | $min = min($this->_data); |
||
385 | } |
||
386 | |||
387 | $this->_calculatedValues['min'] = $min; |
||
388 | } |
||
389 | |||
390 | return $this->_calculatedValues['min']; |
||
391 | } |
||
392 | |||
393 | /** |
||
394 | * Calculates the maximum of a data set. |
||
395 | * Handles cummulative data sets correctly |
||
396 | * |
||
397 | * @access public |
||
398 | * @return mixed the maximum value on success, a PEAR_Error object otherwise |
||
399 | * @see calc() |
||
400 | * @see min() |
||
401 | */ |
||
402 | View Code Duplication | public function max() |
|
403 | { |
||
404 | if ($this->_data == null) { |
||
405 | throw new \PEAR_Exception('data has not been set'); |
||
406 | } |
||
407 | if (!array_key_exists('max', $this->_calculatedValues)) { |
||
408 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
409 | $max = max(array_keys($this->_data)); |
||
410 | } else { |
||
411 | $max = max($this->_data); |
||
412 | } |
||
413 | $this->_calculatedValues['max'] = $max; |
||
414 | } |
||
415 | return $this->_calculatedValues['max']; |
||
416 | } |
||
417 | |||
418 | /** |
||
419 | * Calculates SUM { xi } |
||
420 | * Handles cummulative data sets correctly |
||
421 | * |
||
422 | * @access public |
||
423 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
424 | * @see calc() |
||
425 | * @see sum2() |
||
426 | * @see sumN() |
||
427 | */ |
||
428 | View Code Duplication | public function sum() |
|
429 | { |
||
430 | if (!array_key_exists('sum', $this->_calculatedValues)) { |
||
431 | try { |
||
432 | $sum = $this->sumN(1); |
||
433 | $this->_calculatedValues['sum'] = $sum; |
||
434 | } catch (\PEAR_Exception $e) { |
||
435 | return $sum; |
||
436 | } |
||
437 | } |
||
438 | return $this->_calculatedValues['sum']; |
||
439 | } |
||
440 | |||
441 | /** |
||
442 | * Calculates SUM { (xi)^2 } |
||
443 | * Handles cummulative data sets correctly |
||
444 | * |
||
445 | * @access public |
||
446 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
447 | * @see calc() |
||
448 | * @see sum() |
||
449 | * @see sumN() |
||
450 | */ |
||
451 | View Code Duplication | public function sum2() |
|
452 | { |
||
453 | if (!array_key_exists('sum2', $this->_calculatedValues)) { |
||
454 | try { |
||
455 | $sum2 = $this->sumN(2); |
||
456 | $this->_calculatedValues['sum2'] = $sum2; |
||
457 | } catch (\PEAR_Exception $e) { |
||
458 | return $sum2; |
||
459 | } |
||
460 | } |
||
461 | return $this->_calculatedValues['sum2']; |
||
462 | } |
||
463 | |||
464 | /** |
||
465 | * Calculates SUM { (xi)^n } |
||
466 | * Handles cummulative data sets correctly |
||
467 | * |
||
468 | * @access public |
||
469 | * @param numeric $n the exponent |
||
470 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
471 | * @see calc() |
||
472 | * @see sum() |
||
473 | * @see sum2() |
||
474 | */ |
||
475 | public function sumN($n) |
||
476 | { |
||
477 | if ($this->_data == null) { |
||
478 | throw new \PEAR_Exception('data has not been set'); |
||
479 | } |
||
480 | $sumN = 0; |
||
481 | View Code Duplication | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
|
482 | foreach ($this->_data as $val => $freq) { |
||
483 | $sumN += $freq * pow((double) $val, (double) $n); |
||
484 | } |
||
485 | } else { |
||
486 | foreach ($this->_data as $val) { |
||
487 | $sumN += pow((double) $val, (double) $n); |
||
488 | } |
||
489 | } |
||
490 | return $sumN; |
||
491 | } |
||
492 | |||
493 | /** |
||
494 | * Calculates PROD { (xi) }, (the product of all observations) |
||
495 | * Handles cummulative data sets correctly |
||
496 | * |
||
497 | * @access public |
||
498 | * @return numeric|array|PEAR_Error the product as a number or an array of numbers |
||
499 | * (if there is numeric overflow) on success, |
||
500 | * a PEAR_Error object otherwise |
||
501 | * @see productN() |
||
502 | */ |
||
503 | View Code Duplication | public function product() |
|
504 | { |
||
505 | if (!array_key_exists('product', $this->_calculatedValues)) { |
||
506 | try { |
||
507 | $product = $this->productN(1); |
||
508 | $this->_calculatedValues['product'] = $product; |
||
509 | } catch (\PEAR_Exception $e) { |
||
510 | return $product; |
||
511 | } |
||
512 | } |
||
513 | return $this->_calculatedValues['product']; |
||
514 | } |
||
515 | |||
516 | /** |
||
517 | * Calculates PROD { (xi)^n }, which is the product of all observations |
||
518 | * Handles cummulative data sets correctly |
||
519 | * |
||
520 | * @access public |
||
521 | * @param numeric $n the exponent |
||
522 | * @return numeric|array|PEAR_Error the product as a number or an array of numbers |
||
523 | * (if there is numeric overflow) on success, |
||
524 | * a PEAR_Error object otherwise |
||
525 | * @see product() |
||
526 | */ |
||
527 | public function productN($n) |
||
528 | { |
||
529 | if ($this->_data == null) { |
||
530 | throw new \PEAR_Exception('data has not been set'); |
||
531 | } |
||
532 | $prodN = 1.0; |
||
533 | $partial = array(); |
||
534 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
535 | View Code Duplication | foreach ($this->_data as $val => $freq) { |
|
536 | if ($val == 0) { |
||
537 | return 0.0; |
||
538 | } |
||
539 | $prodN *= $freq * pow((double) $val, (double) $n); |
||
540 | if ($prodN > 10000 * $n) { |
||
541 | $partial[] = $prodN; |
||
542 | $prodN = 1.0; |
||
543 | } |
||
544 | } |
||
545 | } else { |
||
546 | View Code Duplication | foreach ($this->_data as $val) { |
|
547 | if ($val == 0) { |
||
548 | return 0.0; |
||
549 | } |
||
550 | $prodN *= pow((double) $val, (double) $n); |
||
551 | if ($prodN > 10 * $n) { |
||
552 | $partial[] = $prodN; |
||
553 | $prodN = 1.0; |
||
554 | } |
||
555 | } |
||
556 | } |
||
557 | if (!empty($partial)) { |
||
558 | $partial[] = $prodN; |
||
559 | // try to reduce to a single value |
||
560 | $tmp = 1.0; |
||
561 | foreach ($partial as $val) { |
||
562 | $tmp *= $val; |
||
563 | // cannot reduce, return an array |
||
564 | if (is_infinite($tmp)) { |
||
565 | return $partial; |
||
566 | } |
||
567 | } |
||
568 | return $tmp; |
||
569 | } else { |
||
570 | return $prodN; |
||
571 | } |
||
572 | } |
||
573 | |||
574 | /** |
||
575 | * Calculates the number of data points in the set |
||
576 | * Handles cummulative data sets correctly |
||
577 | * |
||
578 | * @access public |
||
579 | * @return mixed the count on success, a PEAR_Error object otherwise |
||
580 | * @see calc() |
||
581 | */ |
||
582 | View Code Duplication | public function count() |
|
583 | { |
||
584 | if ($this->_data == null) { |
||
585 | throw new \PEAR_Exception('data has not been set'); |
||
586 | } |
||
587 | if (!array_key_exists('count', $this->_calculatedValues)) { |
||
588 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
589 | $count = count($this->_dataExpanded); |
||
590 | } else { |
||
591 | $count = count($this->_data); |
||
592 | } |
||
593 | $this->_calculatedValues['count'] = $count; |
||
594 | } |
||
595 | return $this->_calculatedValues['count']; |
||
596 | } |
||
597 | |||
598 | /** |
||
599 | * Calculates the mean (average) of the data points in the set |
||
600 | * Handles cummulative data sets correctly |
||
601 | * |
||
602 | * @access public |
||
603 | * @return mixed the mean value on success, a PEAR_Error object otherwise |
||
604 | * @see calc() |
||
605 | * @see sum() |
||
606 | * @see count() |
||
607 | */ |
||
608 | View Code Duplication | public function mean() |
|
609 | { |
||
610 | if (!array_key_exists('mean', $this->_calculatedValues)) { |
||
611 | try { |
||
612 | $sum = $this->sum(); |
||
613 | try { |
||
614 | $count = $this->count(); |
||
615 | } catch (\PEAR_Exception $e) { |
||
616 | return $count; |
||
617 | } |
||
618 | $this->_calculatedValues['mean'] = $sum / $count; |
||
619 | } catch (\PEAR_Exception $e) { |
||
620 | return $sum; |
||
621 | } |
||
622 | } |
||
623 | return $this->_calculatedValues['mean']; |
||
624 | } |
||
625 | |||
626 | /** |
||
627 | * Calculates the range of the data set = max - min |
||
628 | * |
||
629 | * @access public |
||
630 | * @return mixed the value of the range on success, a PEAR_Error object otherwise. |
||
631 | */ |
||
632 | View Code Duplication | public function range() |
|
648 | } |
||
649 | |||
650 | /** |
||
651 | * Calculates the variance (unbiased) of the data points in the set |
||
652 | * Handles cummulative data sets correctly |
||
653 | * |
||
654 | * @access public |
||
655 | * @return mixed the variance value on success, a PEAR_Error object otherwise |
||
656 | * @see calc() |
||
657 | * @see __sumdiff() |
||
658 | * @see count() |
||
659 | */ |
||
660 | View Code Duplication | public function variance() |
|
661 | { |
||
662 | if (!array_key_exists('variance', $this->_calculatedValues)) { |
||
663 | try { |
||
664 | $variance = $this->__calcVariance(); |
||
665 | } catch (\PEAR_Exception $e) { |
||
666 | return $variance; |
||
667 | } |
||
668 | |||
669 | $this->_calculatedValues['variance'] = $variance; |
||
670 | } |
||
671 | return $this->_calculatedValues['variance']; |
||
672 | } |
||
673 | |||
674 | /** |
||
675 | * Calculates the standard deviation (unbiased) of the data points in the set |
||
676 | * Handles cummulative data sets correctly |
||
677 | * |
||
678 | * @access public |
||
679 | * @return mixed the standard deviation on success, a PEAR_Error object otherwise |
||
680 | * @see calc() |
||
681 | * @see variance() |
||
682 | */ |
||
683 | View Code Duplication | public function stDev() |
|
684 | { |
||
685 | if (!array_key_exists('stDev', $this->_calculatedValues)) { |
||
686 | try { |
||
687 | $variance = $this->variance(); |
||
688 | } catch (\PEAR_Exception $e) { |
||
689 | return $variance; |
||
690 | } |
||
691 | |||
692 | $this->_calculatedValues['stDev'] = sqrt($variance); |
||
693 | } |
||
694 | return $this->_calculatedValues['stDev']; |
||
695 | } |
||
696 | |||
697 | /** |
||
698 | * Calculates the variance (unbiased) of the data points in the set |
||
699 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
700 | * or calc(). |
||
701 | * Handles cummulative data sets correctly |
||
702 | * |
||
703 | * @access public |
||
704 | * @param numeric $mean the fixed mean value |
||
705 | * @return mixed the variance on success, a PEAR_Error object otherwise |
||
706 | * @see __sumdiff() |
||
707 | * @see count() |
||
708 | * @see variance() |
||
709 | */ |
||
710 | public function varianceWithMean($mean) |
||
711 | { |
||
712 | return $this->__calcVariance($mean); |
||
713 | } |
||
714 | |||
715 | /** |
||
716 | * Calculates the standard deviation (unbiased) of the data points in the set |
||
717 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
718 | * or calc(). |
||
719 | * Handles cummulative data sets correctly |
||
720 | * |
||
721 | * @access public |
||
722 | * @param numeric $mean the fixed mean value |
||
723 | * @return mixed the standard deviation on success, a PEAR_Error object otherwise |
||
724 | * @see varianceWithMean() |
||
725 | * @see stDev() |
||
726 | */ |
||
727 | public function stDevWithMean($mean) |
||
728 | { |
||
729 | try { |
||
730 | $varianceWM = $this->varianceWithMean($mean); |
||
731 | } catch (\PEAR_Exception $e) { |
||
732 | return $varianceWM; |
||
733 | } |
||
734 | |||
735 | return sqrt($varianceWM); |
||
736 | } |
||
737 | |||
738 | /** |
||
739 | * Calculates the absolute deviation of the data points in the set |
||
740 | * Handles cummulative data sets correctly |
||
741 | * |
||
742 | * @access public |
||
743 | * @return mixed the absolute deviation on success, a PEAR_Error object otherwise |
||
744 | * @see calc() |
||
745 | * @see __sumabsdev() |
||
746 | * @see count() |
||
747 | * @see absDevWithMean() |
||
748 | */ |
||
749 | View Code Duplication | public function absDev() |
|
761 | } |
||
762 | |||
763 | /** |
||
764 | * Calculates the absolute deviation of the data points in the set |
||
765 | * given a fixed mean (average) value. Not used in calcBasic(), calcFull() |
||
766 | * or calc(). |
||
767 | * Handles cummulative data sets correctly |
||
768 | * |
||
769 | * @access public |
||
770 | * @param numeric $mean the fixed mean value |
||
771 | * @return mixed the absolute deviation on success, a PEAR_Error object otherwise |
||
772 | * @see __sumabsdev() |
||
773 | * @see absDev() |
||
774 | */ |
||
775 | public function absDevWithMean($mean) |
||
776 | { |
||
777 | return $this->__calcAbsoluteDeviation($mean); |
||
778 | } |
||
779 | |||
780 | /** |
||
781 | * Calculates the skewness of the data distribution in the set |
||
782 | * The skewness measures the degree of asymmetry of a distribution, |
||
783 | * and is related to the third central moment of a distribution. |
||
784 | * A normal distribution has a skewness = 0 |
||
785 | * A distribution with a tail off towards the high end of the scale |
||
786 | * (positive skew) has a skewness > 0 |
||
787 | * A distribution with a tail off towards the low end of the scale |
||
788 | * (negative skew) has a skewness < 0 |
||
789 | * Handles cummulative data sets correctly |
||
790 | * |
||
791 | * @access public |
||
792 | * @return mixed the skewness value on success, a PEAR_Error object otherwise |
||
793 | * @see __sumdiff() |
||
794 | * @see count() |
||
795 | * @see stDev() |
||
796 | * @see calc() |
||
797 | */ |
||
798 | View Code Duplication | public function skewness() |
|
799 | { |
||
800 | if (!array_key_exists('skewness', $this->_calculatedValues)) { |
||
801 | try { |
||
802 | $count = $this->count(); |
||
803 | try { |
||
804 | $stDev = $this->stDev(); |
||
805 | try { |
||
806 | $sumdiff3 = $this->__sumdiff(3); |
||
807 | } catch (\PEAR_Exception $e) { |
||
808 | return $sumdiff3; |
||
809 | } |
||
810 | } catch (\PEAR_Exception $e) { |
||
811 | return $stDev; |
||
812 | } |
||
813 | } catch (\PEAR_Exception $e) { |
||
814 | return $count; |
||
815 | } |
||
816 | |||
817 | $this->_calculatedValues['skewness'] = ($sumdiff3 / ($count * pow($stDev, 3))); |
||
818 | } |
||
819 | return $this->_calculatedValues['skewness']; |
||
820 | } |
||
821 | |||
822 | /** |
||
823 | * Calculates the kurtosis of the data distribution in the set |
||
824 | * The kurtosis measures the degrees of peakedness of a distribution. |
||
825 | * It is also called the "excess" or "excess coefficient", and is |
||
826 | * a normalized form of the fourth central moment of a distribution. |
||
827 | * A normal distributions has kurtosis = 0 |
||
828 | * A narrow and peaked (leptokurtic) distribution has a |
||
829 | * kurtosis > 0 |
||
830 | * A flat and wide (platykurtic) distribution has a kurtosis < 0 |
||
831 | * Handles cummulative data sets correctly |
||
832 | * |
||
833 | * @access public |
||
834 | * @return mixed the kurtosis value on success, a PEAR_Error object otherwise |
||
835 | * @see __sumdiff() |
||
836 | * @see count() |
||
837 | * @see stDev() |
||
838 | * @see calc() |
||
839 | */ |
||
840 | View Code Duplication | public function kurtosis() |
|
841 | { |
||
842 | if (!array_key_exists('kurtosis', $this->_calculatedValues)) { |
||
843 | try { |
||
844 | $count = $this->count(); |
||
845 | try { |
||
846 | $stDev = $this->stDev(); |
||
847 | try { |
||
848 | $sumdiff4 = $this->__sumdiff(4); |
||
849 | } catch (\PEAR_Exception $e) { |
||
850 | return $sumdiff4; |
||
851 | } |
||
852 | } catch (\PEAR_Exception $e) { |
||
853 | return $stDev; |
||
854 | } |
||
855 | } catch (\PEAR_Exception $e) { |
||
856 | return $count; |
||
857 | } |
||
858 | |||
859 | $this->_calculatedValues['kurtosis'] = ($sumdiff4 / ($count * pow($stDev, 4))) - 3; |
||
860 | } |
||
861 | return $this->_calculatedValues['kurtosis']; |
||
862 | } |
||
863 | |||
864 | /** |
||
865 | * Calculates the median of a data set. |
||
866 | * The median is the value such that half of the points are below it |
||
867 | * in a sorted data set. |
||
868 | * If the number of values is odd, it is the middle item. |
||
869 | * If the number of values is even, is the average of the two middle items. |
||
870 | * Handles cummulative data sets correctly |
||
871 | * |
||
872 | * @access public |
||
873 | * @return mixed the median value on success, a PEAR_Error object otherwise |
||
874 | * @see count() |
||
875 | * @see calc() |
||
876 | */ |
||
877 | public function median() |
||
878 | { |
||
879 | if ($this->_data == null) { |
||
880 | throw new \PEAR_Exception('data has not been set'); |
||
881 | } |
||
882 | if (!array_key_exists('median', $this->_calculatedValues)) { |
||
883 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
884 | $arr = &$this->_dataExpanded; |
||
885 | } else { |
||
886 | $arr = &$this->_data; |
||
887 | } |
||
888 | try { |
||
889 | $n = $this->count(); |
||
890 | } catch (\PEAR_Exception $e) { |
||
891 | return $n; |
||
892 | } |
||
893 | |||
894 | $h = intval($n / 2); |
||
895 | if ($n % 2 == 0) { |
||
896 | $median = ($arr[$h] + $arr[$h - 1]) / 2; |
||
897 | } else { |
||
898 | $median = $arr[$h]; |
||
899 | } |
||
900 | $this->_calculatedValues['median'] = $median; |
||
901 | } |
||
902 | return $this->_calculatedValues['median']; |
||
903 | } |
||
904 | |||
905 | /** |
||
906 | * Calculates the mode of a data set. |
||
907 | * The mode is the value with the highest frequency in the data set. |
||
908 | * There can be more than one mode. |
||
909 | * Handles cummulative data sets correctly |
||
910 | * |
||
911 | * @access public |
||
912 | * @return mixed an array of mode value on success, a PEAR_Error object otherwise |
||
913 | * @see frequency() |
||
914 | * @see calc() |
||
915 | */ |
||
916 | public function mode() |
||
917 | { |
||
918 | if ($this->_data == null) { |
||
919 | throw new \PEAR_Exception('data has not been set'); |
||
920 | } |
||
921 | if (!array_key_exists('mode', $this->_calculatedValues)) { |
||
922 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
923 | $arr = $this->_data; |
||
924 | } else { |
||
925 | $arr = $this->frequency(); |
||
926 | } |
||
927 | arsort($arr); |
||
928 | $mcount = 1; |
||
929 | foreach ($arr as $val => $freq) { |
||
930 | if ($mcount == 1) { |
||
931 | $mode = array($val); |
||
932 | $mfreq = $freq; |
||
933 | $mcount++; |
||
934 | continue; |
||
935 | } |
||
936 | if ($mfreq == $freq) { |
||
937 | $mode[] = $val; |
||
938 | } |
||
939 | |||
940 | if ($mfreq > $freq) { |
||
941 | break; |
||
942 | } |
||
943 | } |
||
944 | $this->_calculatedValues['mode'] = $mode; |
||
945 | } |
||
946 | return $this->_calculatedValues['mode']; |
||
947 | } |
||
948 | |||
949 | /** |
||
950 | * Calculates the midrange of a data set. |
||
951 | * The midrange is the average of the minimum and maximum of the data set. |
||
952 | * Handles cummulative data sets correctly |
||
953 | * |
||
954 | * @access public |
||
955 | * @return mixed the midrange value on success, a PEAR_Error object otherwise |
||
956 | * @see min() |
||
957 | * @see max() |
||
958 | * @see calc() |
||
959 | */ |
||
960 | View Code Duplication | public function midrange() |
|
961 | { |
||
962 | if (!array_key_exists('midrange', $this->_calculatedValues)) { |
||
963 | try { |
||
964 | $min = $this->min(); |
||
965 | try { |
||
966 | $max = $this->max(); |
||
967 | } catch (\PEAR_Exception $e) { |
||
968 | return $max; |
||
969 | } |
||
970 | } catch (\PEAR_Exception $e) { |
||
971 | return $min; |
||
972 | } |
||
973 | |||
974 | $this->_calculatedValues['midrange'] = (($max + $min) / 2); |
||
975 | } |
||
976 | return $this->_calculatedValues['midrange']; |
||
977 | } |
||
978 | |||
979 | /** |
||
980 | * Calculates the geometrical mean of the data points in the set |
||
981 | * Handles cummulative data sets correctly |
||
982 | * |
||
983 | * @access public |
||
984 | * @return mixed the geometrical mean value on success, a PEAR_Error object otherwise |
||
985 | * @see calc() |
||
986 | * @see product() |
||
987 | * @see count() |
||
988 | */ |
||
989 | public function geometricMean() |
||
990 | { |
||
991 | if (!array_key_exists('geometricMean', $this->_calculatedValues)) { |
||
992 | try { |
||
993 | $count = $this->count(); |
||
994 | } catch (\PEAR_Exception $e) { |
||
995 | return $count; |
||
996 | } |
||
997 | try { |
||
998 | $prod = $this->product(); |
||
999 | } catch (\PEAR_Exception $e) { |
||
1000 | return $prod; |
||
1001 | } |
||
1002 | if (is_array($prod)) { |
||
1003 | $geomMean = 1.0; |
||
1004 | foreach ($prod as $val) { |
||
1005 | $geomMean *= pow($val, 1 / $count); |
||
1006 | } |
||
1007 | $this->_calculatedValues['geometricMean'] = $geomMean; |
||
1008 | } else { |
||
1009 | if ($prod == 0.0) { |
||
1010 | return 0.0; |
||
1011 | } |
||
1012 | if ($prod < 0) { |
||
1013 | throw new \PEAR_Exception('The product of the data set is negative, geometric mean undefined.'); |
||
1014 | } |
||
1015 | $this->_calculatedValues['geometricMean'] = pow($prod, 1 / $count); |
||
1016 | } |
||
1017 | } |
||
1018 | return $this->_calculatedValues['geometricMean']; |
||
1019 | } |
||
1020 | |||
1021 | /** |
||
1022 | * Calculates the harmonic mean of the data points in the set |
||
1023 | * Handles cummulative data sets correctly |
||
1024 | * |
||
1025 | * @access public |
||
1026 | * @return mixed the harmonic mean value on success, a PEAR_Error object otherwise |
||
1027 | * @see calc() |
||
1028 | * @see count() |
||
1029 | */ |
||
1030 | public function harmonicMean() |
||
1031 | { |
||
1032 | if ($this->_data == null) { |
||
1033 | throw new \PEAR_Exception('data has not been set'); |
||
1034 | } |
||
1035 | if (!array_key_exists('harmonicMean', $this->_calculatedValues)) { |
||
1036 | try { |
||
1037 | $count = $this->count(); |
||
1038 | } catch (\PEAR_Exception $e) { |
||
1039 | return $count; |
||
1040 | } |
||
1041 | $invsum = 0.0; |
||
1042 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1043 | View Code Duplication | foreach ($this->_data as $val => $freq) { |
|
1044 | if ($val == 0) { |
||
1045 | throw new \PEAR_Exception('cannot calculate a ' . |
||
1046 | 'harmonic mean with data values of zero.'); |
||
1047 | } |
||
1048 | $invsum += $freq / $val; |
||
1049 | } |
||
1050 | } else { |
||
1051 | View Code Duplication | foreach ($this->_data as $val) { |
|
1052 | if ($val == 0) { |
||
1053 | throw new \PEAR_Exception('cannot calculate a ' . |
||
1054 | 'harmonic mean with data values of zero.'); |
||
1055 | } |
||
1056 | $invsum += 1 / $val; |
||
1057 | } |
||
1058 | } |
||
1059 | $this->_calculatedValues['harmonicMean'] = $count / $invsum; |
||
1060 | } |
||
1061 | return $this->_calculatedValues['harmonicMean']; |
||
1062 | } |
||
1063 | |||
1064 | /** |
||
1065 | * Calculates the nth central moment (m{n}) of a data set. |
||
1066 | * |
||
1067 | * The definition of a sample central moment is: |
||
1068 | * |
||
1069 | * m{n} = 1/N * SUM { (xi - avg)^n } |
||
1070 | * |
||
1071 | * where: N = sample size, avg = sample mean. |
||
1072 | * |
||
1073 | * @access public |
||
1074 | * @param integer $n moment to calculate |
||
1075 | * @return mixed the numeric value of the moment on success, PEAR_Error otherwise |
||
1076 | */ |
||
1077 | View Code Duplication | public function sampleCentralMoment($n) |
|
1078 | { |
||
1079 | if (!is_int($n) || $n < 1) { |
||
1080 | throw new \PEAR_Exception('moment must be a positive integer >= 1.'); |
||
1081 | } |
||
1082 | |||
1083 | if ($n == 1) { |
||
1084 | return 0; |
||
1085 | } |
||
1086 | try { |
||
1087 | $count = $this->count(); |
||
1088 | } catch (\PEAR_Exception $e) { |
||
1089 | return $count; |
||
1090 | } |
||
1091 | if ($count == 0) { |
||
1092 | throw new \PEAR_Exception("Cannot calculate {$n}th sample moment, " . |
||
1093 | 'there are zero data entries'); |
||
1094 | } |
||
1095 | try { |
||
1096 | $sum = $this->__sumdiff($n); |
||
1097 | } catch (\PEAR_Exception $e) { |
||
1098 | return $sum; |
||
1099 | } |
||
1100 | return ($sum / $count); |
||
1101 | } |
||
1102 | |||
1103 | /** |
||
1104 | * Calculates the nth raw moment (m{n}) of a data set. |
||
1105 | * |
||
1106 | * The definition of a sample central moment is: |
||
1107 | * |
||
1108 | * m{n} = 1/N * SUM { xi^n } |
||
1109 | * |
||
1110 | * where: N = sample size, avg = sample mean. |
||
1111 | * |
||
1112 | * @access public |
||
1113 | * @param integer $n moment to calculate |
||
1114 | * @return mixed the numeric value of the moment on success, PEAR_Error otherwise |
||
1115 | */ |
||
1116 | View Code Duplication | public function sampleRawMoment($n) |
|
1117 | { |
||
1118 | if (!is_int($n) || $n < 1) { |
||
1119 | throw new \PEAR_Exception('moment must be a positive integer >= 1.'); |
||
1120 | } |
||
1121 | |||
1122 | try { |
||
1123 | $count = $this->count(); |
||
1124 | } catch (\PEAR_Exception $e) { |
||
1125 | return $count; |
||
1126 | } |
||
1127 | if ($count == 0) { |
||
1128 | throw new \PEAR_Exception("Cannot calculate {$n}th raw moment, " . |
||
1129 | 'there are zero data entries.'); |
||
1130 | } |
||
1131 | try { |
||
1132 | $sum = $this->sumN($n); |
||
1133 | } catch (\PEAR_Exception $e) { |
||
1134 | return $sum; |
||
1135 | } |
||
1136 | return ($sum / $count); |
||
1137 | } |
||
1138 | |||
1139 | /** |
||
1140 | * Calculates the coefficient of variation of a data set. |
||
1141 | * The coefficient of variation measures the spread of a set of data |
||
1142 | * as a proportion of its mean. It is often expressed as a percentage. |
||
1143 | * Handles cummulative data sets correctly |
||
1144 | * |
||
1145 | * @access public |
||
1146 | * @return mixed the coefficient of variation on success, a PEAR_Error object otherwise |
||
1147 | * @see stDev() |
||
1148 | * @see mean() |
||
1149 | * @see calc() |
||
1150 | */ |
||
1151 | public function coeffOfVariation() |
||
1152 | { |
||
1153 | if (!array_key_exists('coeffOfVariation', $this->_calculatedValues)) { |
||
1154 | try { |
||
1155 | $mean = $this->mean(); |
||
1156 | } catch (\PEAR_Exception $e) { |
||
1157 | return $mean; |
||
1158 | } |
||
1159 | |||
1160 | if ($mean == 0.0) { |
||
1161 | throw new \PEAR_Exception('cannot calculate the coefficient ' . |
||
1162 | 'of variation, mean of sample is zero'); |
||
1163 | } |
||
1164 | try { |
||
1165 | $stDev = $this->stDev(); |
||
1166 | } catch (\PEAR_Exception $e) { |
||
1167 | return $stDev; |
||
1168 | } |
||
1169 | |||
1170 | $this->_calculatedValues['coeffOfVariation'] = $stDev / $mean; |
||
1171 | } |
||
1172 | return $this->_calculatedValues['coeffOfVariation']; |
||
1173 | } |
||
1174 | |||
1175 | /** |
||
1176 | * Calculates the standard error of the mean. |
||
1177 | * It is the standard deviation of the sampling distribution of |
||
1178 | * the mean. The formula is: |
||
1179 | * |
||
1180 | * S.E. Mean = SD / (N)^(1/2) |
||
1181 | * |
||
1182 | * This formula does not assume a normal distribution, and shows |
||
1183 | * that the size of the standard error of the mean is inversely |
||
1184 | * proportional to the square root of the sample size. |
||
1185 | * |
||
1186 | * @access public |
||
1187 | * @return mixed the standard error of the mean on success, a PEAR_Error object otherwise |
||
1188 | * @see stDev() |
||
1189 | * @see count() |
||
1190 | * @see calc() |
||
1191 | */ |
||
1192 | View Code Duplication | public function stdErrorOfMean() |
|
1193 | { |
||
1194 | if (!array_key_exists('stdErrorOfMean', $this->_calculatedValues)) { |
||
1195 | try { |
||
1196 | $count = $this->count(); |
||
1197 | } catch (\PEAR_Exception $e) { |
||
1198 | return $count; |
||
1199 | } |
||
1200 | try { |
||
1201 | $stDev = $this->stDev(); |
||
1202 | } catch (\PEAR_Exception $e) { |
||
1203 | return $stDev; |
||
1204 | } |
||
1205 | $this->_calculatedValues['stdErrorOfMean'] = $stDev / sqrt($count); |
||
1206 | } |
||
1207 | return $this->_calculatedValues['stdErrorOfMean']; |
||
1208 | } |
||
1209 | |||
1210 | /** |
||
1211 | * Calculates the value frequency table of a data set. |
||
1212 | * Handles cummulative data sets correctly |
||
1213 | * |
||
1214 | * @access public |
||
1215 | * @return mixed an associative array of value=>frequency items on success, a PEAR_Error object otherwise |
||
1216 | * @see min() |
||
1217 | * @see max() |
||
1218 | * @see calc() |
||
1219 | */ |
||
1220 | public function frequency() |
||
1221 | { |
||
1222 | if ($this->_data == null) { |
||
1223 | throw new \PEAR_Exception('data has not been set'); |
||
1224 | } |
||
1225 | if (!array_key_exists('frequency', $this->_calculatedValues)) { |
||
1226 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1227 | $freq = $this->_data; |
||
1228 | } else { |
||
1229 | $freq = array(); |
||
1230 | foreach ($this->_data as $val) { |
||
1231 | if (!isset($freq["$val"])) { |
||
1232 | $freq["$val"] = 0; |
||
1233 | } |
||
1234 | $freq["$val"]++; |
||
1235 | } |
||
1236 | ksort($freq); |
||
1237 | } |
||
1238 | $this->_calculatedValues['frequency'] = $freq; |
||
1239 | } |
||
1240 | return $this->_calculatedValues['frequency']; |
||
1241 | } |
||
1242 | |||
1243 | /** |
||
1244 | * The quartiles are defined as the values that divide a sorted |
||
1245 | * data set into four equal-sized subsets, and correspond to the |
||
1246 | * 25th, 50th, and 75th percentiles. |
||
1247 | * |
||
1248 | * @access public |
||
1249 | * @return mixed an associative array of quartiles on success, a PEAR_Error otherwise |
||
1250 | * @see percentile() |
||
1251 | */ |
||
1252 | public function quartiles() |
||
1253 | { |
||
1254 | if (!array_key_exists('quartiles', $this->_calculatedValues)) { |
||
1255 | try { |
||
1256 | $q1 = $this->percentile(25); |
||
1257 | try { |
||
1258 | $q2 = $this->percentile(50); |
||
1259 | try { |
||
1260 | $q3 = $this->percentile(75); |
||
1261 | } catch (\PEAR_Exception $e) { |
||
1262 | return $q3; |
||
1263 | } |
||
1264 | } catch (\PEAR_Exception $e) { |
||
1265 | return $q2; |
||
1266 | } |
||
1267 | } catch (\PEAR_Exception $e) { |
||
1268 | return $q1; |
||
1269 | } |
||
1270 | |||
1271 | $this->_calculatedValues['quartiles'] = array( |
||
1272 | '25' => $q1, |
||
1273 | '50' => $q2, |
||
1274 | '75' => $q3, |
||
1275 | ); |
||
1276 | } |
||
1277 | return $this->_calculatedValues['quartiles']; |
||
1278 | } |
||
1279 | |||
1280 | /** |
||
1281 | * The interquartile mean is defined as the mean of the values left |
||
1282 | * after discarding the lower 25% and top 25% ranked values, i.e.: |
||
1283 | * |
||
1284 | * interquart mean = mean(<P(25),P(75)>) |
||
1285 | * |
||
1286 | * where: P = percentile |
||
1287 | * |
||
1288 | * @todo need to double check the equation |
||
1289 | * @access public |
||
1290 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1291 | * @see quartiles() |
||
1292 | */ |
||
1293 | public function interquartileMean() |
||
1294 | { |
||
1295 | if (!array_key_exists('interquartileMean', $this->_calculatedValues)) { |
||
1296 | try { |
||
1297 | $quart = $this->quartiles(); |
||
1298 | } catch (\PEAR_Exception $e) { |
||
1299 | return $quart; |
||
1300 | } |
||
1301 | $q3 = $quart['75']; |
||
1302 | $q1 = $quart['25']; |
||
1303 | $sum = 0; |
||
1304 | $n = 0; |
||
1305 | foreach ($this->getData(true) as $val) { |
||
1306 | if ($val >= $q1 && $val <= $q3) { |
||
1307 | $sum += $val; |
||
1308 | $n++; |
||
1309 | } |
||
1310 | } |
||
1311 | if ($n == 0) { |
||
1312 | throw new \PEAR_Exception('error calculating interquartile mean, ' . |
||
1313 | 'empty interquartile range of values.'); |
||
1314 | } |
||
1315 | $this->_calculatedValues['interquartileMean'] = $sum / $n; |
||
1316 | } |
||
1317 | return $this->_calculatedValues['interquartileMean']; |
||
1318 | } |
||
1319 | |||
1320 | /** |
||
1321 | * The interquartile range is the distance between the 75th and 25th |
||
1322 | * percentiles. Basically the range of the middle 50% of the data set, |
||
1323 | * and thus is not affected by outliers or extreme values. |
||
1324 | * |
||
1325 | * interquart range = P(75) - P(25) |
||
1326 | * |
||
1327 | * where: P = percentile |
||
1328 | * |
||
1329 | * @access public |
||
1330 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1331 | * @see quartiles() |
||
1332 | */ |
||
1333 | public function interquartileRange() |
||
1334 | { |
||
1335 | if (!array_key_exists('interquartileRange', $this->_calculatedValues)) { |
||
1336 | try { |
||
1337 | $quart = $this->quartiles(); |
||
1338 | } catch (\PEAR_Exception $e) { |
||
1339 | return $quart; |
||
1340 | } |
||
1341 | $q3 = $quart['75']; |
||
1342 | $q1 = $quart['25']; |
||
1343 | $this->_calculatedValues['interquartileRange'] = $q3 - $q1; |
||
1344 | } |
||
1345 | return $this->_calculatedValues['interquartileRange']; |
||
1346 | } |
||
1347 | |||
1348 | /** |
||
1349 | * The quartile deviation is half of the interquartile range value |
||
1350 | * |
||
1351 | * quart dev = (P(75) - P(25)) / 2 |
||
1352 | * |
||
1353 | * where: P = percentile |
||
1354 | * |
||
1355 | * @access public |
||
1356 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1357 | * @see quartiles() |
||
1358 | * @see interquartileRange() |
||
1359 | */ |
||
1360 | View Code Duplication | public function quartileDeviation() |
|
1361 | { |
||
1362 | if (!array_key_exists('quartileDeviation', $this->_calculatedValues)) { |
||
1363 | try { |
||
1364 | $iqr = $this->interquartileRange(); |
||
1365 | } catch (\PEAR_Exception $e) { |
||
1366 | return $iqr; |
||
1367 | } |
||
1368 | $this->_calculatedValues['quartileDeviation'] = $iqr / 2; |
||
1369 | } |
||
1370 | return $this->_calculatedValues['quartileDeviation']; |
||
1371 | } |
||
1372 | |||
1373 | /** |
||
1374 | * The quartile variation coefficient is defined as follows: |
||
1375 | * |
||
1376 | * quart var coeff = 100 * (P(75) - P(25)) / (P(75) + P(25)) |
||
1377 | * |
||
1378 | * where: P = percentile |
||
1379 | * |
||
1380 | * @todo need to double check the equation |
||
1381 | * @access public |
||
1382 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1383 | * @see quartiles() |
||
1384 | */ |
||
1385 | public function quartileVariationCoefficient() |
||
1386 | { |
||
1387 | if (!array_key_exists('quartileVariationCoefficient', $this->_calculatedValues)) { |
||
1388 | try { |
||
1389 | $quart = $this->quartiles(); |
||
1390 | } catch (\PEAR_Exception $e) { |
||
1391 | return $quart; |
||
1392 | } |
||
1393 | $q3 = $quart['75']; |
||
1394 | $q1 = $quart['25']; |
||
1395 | $d = $q3 - $q1; |
||
1396 | $s = $q3 + $q1; |
||
1397 | $this->_calculatedValues['quartileVariationCoefficient'] = 100 * $d / $s; |
||
1398 | } |
||
1399 | return $this->_calculatedValues['quartileVariationCoefficient']; |
||
1400 | } |
||
1401 | |||
1402 | /** |
||
1403 | * The quartile skewness coefficient (also known as Bowley Skewness), |
||
1404 | * is defined as follows: |
||
1405 | * |
||
1406 | * quart skewness coeff = (P(25) - 2*P(50) + P(75)) / (P(75) - P(25)) |
||
1407 | * |
||
1408 | * where: P = percentile |
||
1409 | * |
||
1410 | * @todo need to double check the equation |
||
1411 | * @access public |
||
1412 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1413 | * @see quartiles() |
||
1414 | */ |
||
1415 | public function quartileSkewnessCoefficient() |
||
1416 | { |
||
1417 | if (!array_key_exists('quartileSkewnessCoefficient', $this->_calculatedValues)) { |
||
1418 | try { |
||
1419 | $quart = $this->quartiles(); |
||
1420 | } catch (\PEAR_Exception $e) { |
||
1421 | return $quart; |
||
1422 | } |
||
1423 | $q3 = $quart['75']; |
||
1424 | $q2 = $quart['50']; |
||
1425 | $q1 = $quart['25']; |
||
1426 | $d = $q3 - 2 * $q2 + $q1; |
||
1427 | $s = $q3 - $q1; |
||
1428 | $this->_calculatedValues['quartileSkewnessCoefficient'] = $d / $s; |
||
1429 | } |
||
1430 | return $this->_calculatedValues['quartileSkewnessCoefficient']; |
||
1431 | } |
||
1432 | |||
1433 | /** |
||
1434 | * The pth percentile is the value such that p% of the a sorted data set |
||
1435 | * is smaller than it, and (100 - p)% of the data is larger. |
||
1436 | * |
||
1437 | * A quick algorithm to pick the appropriate value from a sorted data |
||
1438 | * set is as follows: |
||
1439 | * |
||
1440 | * - Count the number of values: n |
||
1441 | * - Calculate the position of the value in the data list: i = p * (n + 1) |
||
1442 | * - if i is an integer, return the data at that position |
||
1443 | * - if i < 1, return the minimum of the data set |
||
1444 | * - if i > n, return the maximum of the data set |
||
1445 | * - otherwise, average the entries at adjacent positions to i |
||
1446 | * |
||
1447 | * The median is the 50th percentile value. |
||
1448 | * |
||
1449 | * @todo need to double check generality of the algorithm |
||
1450 | * |
||
1451 | * @access public |
||
1452 | * @param numeric $p the percentile to estimate, e.g. 25 for 25th percentile |
||
1453 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1454 | * @see quartiles() |
||
1455 | * @see median() |
||
1456 | */ |
||
1457 | public function percentile($p) |
||
1458 | { |
||
1459 | try { |
||
1460 | $count = $this->count(); |
||
1461 | } catch (\PEAR_Exception $e) { |
||
1462 | return $count; |
||
1463 | } |
||
1464 | |||
1465 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1466 | $data = &$this->_dataExpanded; |
||
1467 | } else { |
||
1468 | $data = &$this->_data; |
||
1469 | } |
||
1470 | $obsidx = $p * ($count + 1) / 100; |
||
1471 | if (intval($obsidx) == $obsidx) { |
||
1472 | return $data[($obsidx - 1)]; |
||
1473 | } elseif ($obsidx < 1) { |
||
1474 | return $data[0]; |
||
1475 | } elseif ($obsidx > $count) { |
||
1476 | return $data[($count - 1)]; |
||
1477 | } else { |
||
1478 | $left = floor($obsidx - 1); |
||
1479 | $right = ceil($obsidx - 1); |
||
1480 | return ($data[$left] + $data[$right]) / 2; |
||
1481 | } |
||
1482 | } |
||
1483 | |||
1484 | // private methods |
||
1485 | |||
1486 | /** |
||
1487 | * Utility function to calculate: SUM { (xi - mean)^n } |
||
1488 | * |
||
1489 | * @access private |
||
1490 | * @param numeric $power the exponent |
||
1491 | * @param optional double $mean the data set mean value |
||
1492 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
1493 | * |
||
1494 | * @see stDev() |
||
1495 | * @see variaceWithMean(); |
||
1496 | * @see skewness(); |
||
1497 | * @see kurtosis(); |
||
1498 | */ |
||
1499 | public function __sumdiff($power, $mean = null) |
||
1522 | } |
||
1523 | |||
1524 | /** |
||
1525 | * Utility function to calculate the variance with or without |
||
1526 | * a fixed mean |
||
1527 | * |
||
1528 | * @access private |
||
1529 | * @param $mean the fixed mean to use, null as default |
||
1530 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1531 | * @see variance() |
||
1532 | * @see varianceWithMean() |
||
1533 | */ |
||
1534 | public function __calcVariance($mean = null) |
||
1535 | { |
||
1536 | if ($this->_data == null) { |
||
1537 | throw new \PEAR_Exception('data has not been set'); |
||
1538 | } |
||
1539 | try { |
||
1540 | $sumdiff2 = $this->__sumdiff(2, $mean); |
||
1541 | try { |
||
1542 | $count = $this->count(); |
||
1543 | } catch (\PEAR_Exception $e) { |
||
1544 | return $count; |
||
1545 | } |
||
1546 | } catch (\PEAR_Exception $e) { |
||
1547 | return $sumdiff2; |
||
1548 | } |
||
1549 | |||
1550 | if ($count == 1) { |
||
1551 | throw new \PEAR_Exception('cannot calculate variance of a singe data point'); |
||
1552 | } |
||
1553 | return ($sumdiff2 / ($count - 1)); |
||
1554 | } |
||
1555 | |||
1556 | /** |
||
1557 | * Utility function to calculate the absolute deviation with or without |
||
1558 | * a fixed mean |
||
1559 | * |
||
1560 | * @access private |
||
1561 | * @param $mean the fixed mean to use, null as default |
||
1562 | * @return mixed a numeric value on success, a PEAR_Error otherwise |
||
1563 | * @see absDev() |
||
1564 | * @see absDevWithMean() |
||
1565 | */ |
||
1566 | public function __calcAbsoluteDeviation($mean = null) |
||
1583 | } |
||
1584 | |||
1585 | /** |
||
1586 | * Utility function to calculate: SUM { | xi - mean | } |
||
1587 | * |
||
1588 | * @access private |
||
1589 | * @param optional double $mean the mean value for the set or population |
||
1590 | * @return mixed the sum on success, a PEAR_Error object otherwise |
||
1591 | * |
||
1592 | * @see absDev() |
||
1593 | * @see absDevWithMean() |
||
1594 | */ |
||
1595 | public function __sumabsdev($mean = null) |
||
1596 | { |
||
1597 | if ($this->_data == null) { |
||
1598 | throw new \PEAR_Exception('data has not been set'); |
||
1599 | } |
||
1600 | if (is_null($mean)) { |
||
1601 | $mean = $this->mean(); |
||
1602 | } |
||
1603 | $sdev = 0; |
||
1604 | if ($this->_dataOption == self::STATS_DATA_CUMMULATIVE) { |
||
1605 | foreach ($this->_data as $val => $freq) { |
||
1606 | $sdev += $freq * abs($val - $mean); |
||
1607 | } |
||
1608 | } else { |
||
1609 | foreach ($this->_data as $val) { |
||
1610 | $sdev += abs($val - $mean); |
||
1611 | } |
||
1612 | } |
||
1613 | return $sdev; |
||
1614 | } |
||
1615 | |||
1616 | /** |
||
1617 | * Utility function to format a PEAR_Error to be used by calc(), |
||
1618 | * calcBasic() and calcFull() |
||
1619 | * |
||
1620 | * @access private |
||
1621 | * @param mixed $v value to be formatted |
||
1622 | * @param boolean $returnErrorObject whether the raw PEAR_Error (when true, default), |
||
1623 | * or only the error message will be returned (when false) |
||
1624 | * @return mixed if the value is a PEAR_Error object, and $useErrorObject |
||
1625 | * is false, then a string with the error message will be returned, |
||
1626 | * otherwise the value will not be modified and returned as passed. |
||
1627 | */ |
||
1628 | public function __format($v, $useErrorObject = true) |
||
1634 | } |
||
1635 | } |
||
1636 | |||
1637 | /** |
||
1638 | * Utility function to validate the data and modify it |
||
1639 | * according to the current null handling option |
||
1640 | * |
||
1641 | * @access private |
||
1642 | * @return mixed true on success, a PEAR_Error object otherwise |
||
1643 | * |
||
1644 | * @see setData() |
||
1645 | */ |
||
1646 | public function _validate() |
||
1647 | { |
||
1648 | $cummulativeData = ($this->_dataOption == self::STATS_DATA_CUMMULATIVE); |
||
1715 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.