ANOVA::calculateSsbn()   A
last analyzed

Complexity

Conditions 4
Paths 6

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 7
c 1
b 0
f 0
dl 0
loc 14
rs 10
cc 4
nc 6
nop 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Math\Statistic;
6
7
use Phpml\Exception\InvalidArgumentException;
8
9
/**
10
 * Analysis of variance
11
 * https://en.wikipedia.org/wiki/Analysis_of_variance
12
 */
13
final class ANOVA
14
{
15
    /**
16
     * The one-way ANOVA tests the null hypothesis that 2 or more groups have
17
     * the same population mean. The test is applied to samples from two or
18
     * more groups, possibly with differing sizes.
19
     *
20
     * @param array[] $samples - each row is class samples
21
     *
22
     * @return float[]
23
     */
24
    public static function oneWayF(array $samples): array
25
    {
26
        $classes = count($samples);
27
        if ($classes < 2) {
28
            throw new InvalidArgumentException('The array must have at least 2 elements');
29
        }
30
31
        $samplesPerClass = array_map(static function (array $class): int {
32
            return count($class);
33
        }, $samples);
34
        $allSamples = (int) array_sum($samplesPerClass);
35
        $ssAllSamples = self::sumOfSquaresPerFeature($samples);
36
        $sumSamples = self::sumOfFeaturesPerClass($samples);
37
        $squareSumSamples = self::sumOfSquares($sumSamples);
38
        $sumSamplesSquare = self::squaresSum($sumSamples);
39
        $ssbn = self::calculateSsbn($samples, $sumSamplesSquare, $samplesPerClass, $squareSumSamples, $allSamples);
40
        $sswn = self::calculateSswn($ssbn, $ssAllSamples, $squareSumSamples, $allSamples);
41
        $dfbn = $classes - 1;
42
        $dfwn = $allSamples - $classes;
43
44
        $msb = array_map(static function ($s) use ($dfbn) {
45
            return $s / $dfbn;
46
        }, $ssbn);
47
        $msw = array_map(static function ($s) use ($dfwn) {
48
            if ($dfwn === 0) {
49
                return 1;
50
            }
51
52
            return $s / $dfwn;
53
        }, $sswn);
54
55
        $f = [];
56
        foreach ($msb as $index => $msbValue) {
57
            $f[$index] = $msbValue / $msw[$index];
58
        }
59
60
        return $f;
61
    }
62
63
    private static function sumOfSquaresPerFeature(array $samples): array
64
    {
65
        $sum = array_fill(0, count($samples[0][0]), 0);
66
        foreach ($samples as $class) {
67
            foreach ($class as $sample) {
68
                foreach ($sample as $index => $feature) {
69
                    $sum[$index] += $feature ** 2;
70
                }
71
            }
72
        }
73
74
        return $sum;
75
    }
76
77
    private static function sumOfFeaturesPerClass(array $samples): array
78
    {
79
        return array_map(static function (array $class): array {
80
            $sum = array_fill(0, count($class[0]), 0);
81
            foreach ($class as $sample) {
82
                foreach ($sample as $index => $feature) {
83
                    $sum[$index] += $feature;
84
                }
85
            }
86
87
            return $sum;
88
        }, $samples);
89
    }
90
91
    private static function sumOfSquares(array $sums): array
92
    {
93
        $squares = array_fill(0, count($sums[0]), 0);
94
        foreach ($sums as $row) {
95
            foreach ($row as $index => $sum) {
96
                $squares[$index] += $sum;
97
            }
98
        }
99
100
        return array_map(static function ($sum) {
101
            return $sum ** 2;
102
        }, $squares);
103
    }
104
105
    private static function squaresSum(array $sums): array
106
    {
107
        foreach ($sums as &$row) {
108
            foreach ($row as &$sum) {
109
                $sum **= 2;
110
            }
111
        }
112
113
        return $sums;
114
    }
115
116
    private static function calculateSsbn(array $samples, array $sumSamplesSquare, array $samplesPerClass, array $squareSumSamples, int $allSamples): array
117
    {
118
        $ssbn = array_fill(0, count($samples[0][0]), 0);
119
        foreach ($sumSamplesSquare as $classIndex => $class) {
120
            foreach ($class as $index => $feature) {
121
                $ssbn[$index] += $feature / $samplesPerClass[$classIndex];
122
            }
123
        }
124
125
        foreach ($squareSumSamples as $index => $sum) {
126
            $ssbn[$index] -= $sum / $allSamples;
127
        }
128
129
        return $ssbn;
130
    }
131
132
    private static function calculateSswn(array $ssbn, array $ssAllSamples, array $squareSumSamples, int $allSamples): array
133
    {
134
        $sswn = [];
135
        foreach ($ssAllSamples as $index => $ss) {
136
            $sswn[$index] = ($ss - $squareSumSamples[$index] / $allSamples) - $ssbn[$index];
137
        }
138
139
        return $sswn;
140
    }
141
}
142