Completed
Push — master ( 25a916...0178b9 )
by Yasunori
15s queued 10s
created

Correlation::kendall()   A

Complexity

Conditions 5
Paths 6

Size

Total Lines 37
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 16
c 0
b 0
f 0
nc 6
nop 2
dl 0
loc 37
rs 9.4222
1
<?php
2
3
namespace devfym\IntelliPHP\Math;
4
5
use devfym\IntelliPHP\Data\DataFrame;
6
7
class Correlation
8
{
9
    /**
10
     * @param $df
11
     * @param $xColumn
12
     * @param $yColumn
13
     * @return float
14
     * Get Pearson's Correlation Coefficient.
15
     */
16
    public static function pearson(DataFrame $df, $xColumn, $yColumn) : float
17
    {
18
        $n = $df->getIndex();
19
        $x = 0; $y = 0; $xy = 0; $x2 = 0; $y2 = 0;
20
21
        for ($i = 0; $i < $n; $i++) {
22
23
            $cx = $df->{$xColumn}->get($i);
24
            $cy = $df->{$yColumn}->get($i);
25
26
            $x += $cx;
27
            $y += $cy;
28
29
            $xy += $cx * $cy;
30
31
            $x2 += $cx * $cx;
32
            $y2 += $cy * $cy;
33
34
        }
35
36
        $r = (($n * $xy) - ($x * $y)) / sqrt((($n * $x2) - ($x * $x)) * (($n * $y2) - ($y * $y)));
37
38
        return round($r, 4);
39
    }
40
41
    /**
42
     * @param DataFrame $df
43
     * @param $xColumn
44
     * @param $yColumn
45
     * @return float
46
     */
47
    public static function spearman(DataFrame $df, $xColumn, $yColumn) : float
48
    {
49
        $xValue = $df->{$xColumn}->all();
50
        $yValue = $df->{$yColumn}->all();
51
52
        $xSort  = array_unique($xValue);
53
        $ySort  = array_unique($yValue);
54
55
        rsort($xSort);
56
        rsort($ySort);
57
58
        $xRank = [];
59
        $yRank = [];
60
        $diffRank = 0;
61
62
        for ($i = 0; $i < $df->getIndex(); $i++) {
63
64
            $xR = array_keys($xSort, $xValue[$i]);
65
            $yR = array_keys($ySort, $yValue[$i]);
66
67
            $xRank[$i] = $xR[0] + 1;
68
            $yRank[$i] = $yR[0] + 1;
69
70
            $diffRank += pow($xRank[$i] - $yRank[$i], 2);
71
72
        }
73
74
        $p = 1 - ((6 * $diffRank) / ($df->getIndex() * (($df->getIndex() * $df->getIndex()) - 1)));
75
76
        return round($p, 4);
77
    }
78
79
    /**
80
     * @param DataFrame $df
81
     * @param $xColumn
82
     * @return float
83
     */
84
    public static function kendall(DataFrame $df, $xColumn) : float
85
    {
86
87
        $concordant = [];
88
        $discordant = [];
89
90
        for ($i = 0; $i < $df->getIndex(); $i++) {
91
92
            $concordant_count = 0;
93
            $discordant_count = 0;
94
95
            for ($j = $i + 1; $j < $df->getIndex(); $j++) {
96
97
                if ($df->{$xColumn}->get($i) < $df->{$xColumn}->get($j)) {
98
99
                    $concordant_count++;
100
101
                }
102
103
                if ($df->{$xColumn}->get($i) > $df->{$xColumn}->get($j)) {
104
105
                    $discordant_count++;
106
107
                }
108
            }
109
110
            $concordant[$i] = $concordant_count;
111
            $discordant[$i] = $discordant_count;
112
113
        }
114
115
        $scon = array_sum($concordant);
116
        $sdis = array_sum($discordant);
117
118
        $t = ($scon - $sdis) / ($df->getIndex() * ($df->getIndex() - 1) / 2);
119
120
        return round($t, 4);
121
    }
122
123
    /**
124
     * @param DataFrame $df
125
     * @return array
126
     */
127
    public static function pearsonAll(DataFrame $df) : array
128
    {
129
        $arr = [];
130
131
        $columns = $df->getNumericColumns();
132
133
        $numeric_count = count($columns);
134
135
        for ($i = 0; $i < $numeric_count; $i++) {
136
137
            for ($j = 0; $j < $numeric_count; $j++) {
138
139
                $arr[$i][$j] = self::pearson($df, $columns[$i], $columns[$j]);
140
141
            }
142
143
        }
144
145
        return $arr;
146
    }
147
148
    /**
149
     * @param DataFrame $df
150
     * @return array
151
     */
152
    public static function spearmanAll(DataFrame $df) : array
153
    {
154
        $arr = [];
155
156
        $columns = $df->getNumericColumns();
157
158
        $numeric_count = count($columns);
159
160
        for ($i = 0; $i < $numeric_count; $i++) {
161
162
            for ($j = 0; $j < $numeric_count; $j++) {
163
164
                $arr[$i][$j] = self::spearman($df, $columns[$i], $columns[$j]);
165
166
            }
167
168
        }
169
170
        return $arr;
171
    }
172
}