UnivariateLinearRegression::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
cc 1
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\FeatureSelection\ScoringFunction;
6
7
use Phpml\FeatureSelection\ScoringFunction;
8
use Phpml\Math\Matrix;
9
use Phpml\Math\Statistic\Mean;
10
11
/**
12
 * Quick linear model for testing the effect of a single regressor,
13
 * sequentially for many regressors.
14
 *
15
 * This is done in 2 steps:
16
 *
17
 * 1. The cross correlation between each regressor and the target is computed,
18
 * that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *std(y)).
19
 * 2. It is converted to an F score.
20
 *
21
 * Ported from scikit-learn f_regression function (http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html#sklearn.feature_selection.f_regression)
22
 */
23
final class UnivariateLinearRegression implements ScoringFunction
24
{
25
    /**
26
     * @var bool
27
     */
28
    private $center;
29
30
    /**
31
     * @param bool $center - if true samples and targets will be centered
32
     */
33
    public function __construct(bool $center = true)
34
    {
35
        $this->center = $center;
36
    }
37
38
    public function score(array $samples, array $targets): array
39
    {
40
        if ($this->center) {
41
            $this->centerTargets($targets);
42
            $this->centerSamples($samples);
43
        }
44
45
        $correlations = [];
46
        foreach (array_keys($samples[0]) as $index) {
47
            $featureColumn = array_column($samples, $index);
48
            $correlations[$index] =
49
                Matrix::dot($targets, $featureColumn)[0] / (new Matrix($featureColumn, false))->transpose()->frobeniusNorm()
50
                / (new Matrix($targets, false))->frobeniusNorm();
51
        }
52
53
        $degreesOfFreedom = count($targets) - ($this->center ? 2 : 1);
54
55
        return array_map(function (float $correlation) use ($degreesOfFreedom): float {
56
            return $correlation ** 2 / (1 - $correlation ** 2) * $degreesOfFreedom;
57
        }, $correlations);
58
    }
59
60
    private function centerTargets(array &$targets): void
61
    {
62
        $mean = Mean::arithmetic($targets);
63
        array_walk($targets, function (&$target) use ($mean): void {
64
            $target -= $mean;
65
        });
66
    }
67
68
    private function centerSamples(array &$samples): void
69
    {
70
        $means = [];
71
        foreach ($samples[0] as $index => $feature) {
72
            $means[$index] = Mean::arithmetic(array_column($samples, $index));
73
        }
74
75
        foreach ($samples as &$sample) {
76
            foreach ($sample as $index => &$feature) {
77
                $feature -= $means[$index];
78
            }
79
        }
80
    }
81
}
82