1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\FeatureSelection\ScoringFunction; |
6
|
|
|
|
7
|
|
|
use Phpml\FeatureSelection\ScoringFunction; |
8
|
|
|
use Phpml\Math\Matrix; |
9
|
|
|
use Phpml\Math\Statistic\Mean; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* Quick linear model for testing the effect of a single regressor, |
13
|
|
|
* sequentially for many regressors. |
14
|
|
|
* |
15
|
|
|
* This is done in 2 steps: |
16
|
|
|
* |
17
|
|
|
* 1. The cross correlation between each regressor and the target is computed, |
18
|
|
|
* that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)). |
19
|
|
|
* 2. It is converted to an F score. |
20
|
|
|
* |
21
|
|
|
* Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression) |
22
|
|
|
*/ |
23
|
|
|
final class UnivariateLinearRegression implements ScoringFunction |
24
|
|
|
{ |
25
|
|
|
/** |
26
|
|
|
* @var bool |
27
|
|
|
*/ |
28
|
|
|
private $center; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @param bool $center - if true samples and targets will be centered |
32
|
|
|
*/ |
33
|
|
|
public function __construct(bool $center = true) |
34
|
|
|
{ |
35
|
|
|
$this->center = $center; |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
public function score(array $samples, array $targets): array |
39
|
|
|
{ |
40
|
|
|
if ($this->center) { |
41
|
|
|
$this->centerTargets($targets); |
42
|
|
|
$this->centerSamples($samples); |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
$correlations = []; |
46
|
|
|
foreach ($samples[0] as $index => $feature) { |
47
|
|
|
$featureColumn = array_column($samples, $index); |
48
|
|
|
$correlations[$index] = |
49
|
|
|
(Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()) |
50
|
|
|
/ (new Matrix($targets, false))->frobeniusNorm(); |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
$degreesOfFreedom = count($targets) - ($this->center ? 2 : 1); |
54
|
|
|
|
55
|
|
|
return array_map(function (float $correlation) use ($degreesOfFreedom): float { |
56
|
|
|
return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom; |
57
|
|
|
}, $correlations); |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
private function centerTargets(&$targets): void |
61
|
|
|
{ |
62
|
|
|
$mean = Mean::arithmetic($targets); |
63
|
|
|
foreach ($targets as &$target) { |
64
|
|
|
$target -= $mean; |
65
|
|
|
} |
66
|
|
|
} |
67
|
|
|
|
68
|
|
|
private function centerSamples(&$samples): void |
69
|
|
|
{ |
70
|
|
|
$means = []; |
71
|
|
|
foreach ($samples[0] as $index => $feature) { |
72
|
|
|
$means[$index] = Mean::arithmetic(array_column($samples, $index)); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
foreach ($samples as &$sample) { |
76
|
|
|
foreach ($sample as $index => &$feature) { |
77
|
|
|
$feature -= $means[$index]; |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
} |
81
|
|
|
} |
82
|
|
|
|