Test Setup Failed
Push — master ( 3baf15...4590d5 )
by Arkadiusz
02:24
created

src/Math/Statistic/ANOVA.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Math\Statistic;
6
7
use Phpml\Exception\InvalidArgumentException;
8
9
/**
10
 * Analysis of variance
11
 * https://en.wikipedia.org/wiki/Analysis_of_variance
12
 */
13
final class ANOVA
14
{
15
    /**
16
     * The one-way ANOVA tests the null hypothesis that 2 or more groups have
17
     * the same population mean. The test is applied to samples from two or
18
     * more groups, possibly with differing sizes.
19
     *
20
     * @param array[] $samples - each row is class samples
21
     *
22
     * @return float[]
23
     */
24
    public static function oneWayF(array $samples): array
25
    {
26
        $classes = count($samples);
27
        if ($classes < 2) {
28
            throw new InvalidArgumentException('The array must have at least 2 elements');
29
        }
30
31
        $samplesPerClass = array_map(function (array $class): int {
32
            return count($class);
33
        }, $samples);
34
        $allSamples = (int) array_sum($samplesPerClass);
35
        $ssAllSamples = self::sumOfSquaresPerFeature($samples);
36
        $sumSamples = self::sumOfFeaturesPerClass($samples);
37
        $squareSumSamples = self::sumOfSquares($sumSamples);
38
        $sumSamplesSquare = self::squaresSum($sumSamples);
39
        $ssbn = self::calculateSsbn($samples, $sumSamplesSquare, $samplesPerClass, $squareSumSamples, $allSamples);
40
        $sswn = self::calculateSswn($ssbn, $ssAllSamples, $squareSumSamples, $allSamples);
41
        $dfbn = $classes - 1;
42
        $dfwn = $allSamples - $classes;
43
44
        $msb = array_map(function ($s) use ($dfbn) {
45
            return $s / $dfbn;
46
        }, $ssbn);
47
        $msw = array_map(function ($s) use ($dfwn) {
48
            return $s / $dfwn;
49
        }, $sswn);
50
51
        $f = [];
52
        foreach ($msb as $index => $msbValue) {
53
            $f[$index] = $msbValue / $msw[$index];
54
        }
55
56
        return $f;
57
    }
58
59 View Code Duplication
    private static function sumOfSquaresPerFeature(array $samples): array
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
60
    {
61
        $sum = array_fill(0, count($samples[0][0]), 0);
62
        foreach ($samples as $class) {
63
            foreach ($class as $sample) {
64
                foreach ($sample as $index => $feature) {
65
                    $sum[$index] += $feature ** 2;
66
                }
67
            }
68
        }
69
70
        return $sum;
71
    }
72
73
    private static function sumOfFeaturesPerClass(array $samples): array
74
    {
75
        return array_map(function (array $class) {
76
            $sum = array_fill(0, count($class[0]), 0);
77
            foreach ($class as $sample) {
78
                foreach ($sample as $index => $feature) {
79
                    $sum[$index] += $feature;
80
                }
81
            }
82
83
            return $sum;
84
        }, $samples);
85
    }
86
87 View Code Duplication
    private static function sumOfSquares(array $sums): array
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
88
    {
89
        $squares = array_fill(0, count($sums[0]), 0);
90
        foreach ($sums as $row) {
91
            foreach ($row as $index => $sum) {
92
                $squares[$index] += $sum;
93
            }
94
        }
95
96
        return array_map(function ($sum) {
97
            return $sum ** 2;
98
        }, $squares);
99
    }
100
101
    private static function squaresSum(array $sums): array
102
    {
103
        foreach ($sums as &$row) {
104
            foreach ($row as &$sum) {
105
                $sum **= 2;
106
            }
107
        }
108
109
        return $sums;
110
    }
111
112
    private static function calculateSsbn(array $samples, array $sumSamplesSquare, array $samplesPerClass, array $squareSumSamples, int $allSamples): array
113
    {
114
        $ssbn = array_fill(0, count($samples[0][0]), 0);
115
        foreach ($sumSamplesSquare as $classIndex => $class) {
116
            foreach ($class as $index => $feature) {
117
                $ssbn[$index] += $feature / $samplesPerClass[$classIndex];
118
            }
119
        }
120
121
        foreach ($squareSumSamples as $index => $sum) {
122
            $ssbn[$index] -= $sum / $allSamples;
123
        }
124
125
        return $ssbn;
126
    }
127
128
    private static function calculateSswn(array $ssbn, array $ssAllSamples, array $squareSumSamples, int $allSamples): array
129
    {
130
        $sswn = [];
131
        foreach ($ssAllSamples as $index => $ss) {
132
            $sswn[$index] = ($ss - $squareSumSamples[$index] / $allSamples) - $ssbn[$index];
133
        }
134
135
        return $sswn;
136
    }
137
}
138