PCA::__construct()   B
last analyzed

Complexity

Conditions 9
Paths 7

Size

Total Lines 20
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 20
rs 8.0555
c 0
b 0
f 0
cc 9
nc 7
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Phpml\Exception\InvalidArgumentException;
8
use Phpml\Exception\InvalidOperationException;
9
use Phpml\Math\Statistic\Covariance;
10
use Phpml\Math\Statistic\Mean;
11
12
class PCA extends EigenTransformerBase
13
{
14
    /**
15
     * Temporary storage for mean values for each dimension in given data
16
     *
17
     * @var array
18
     */
19
    protected $means = [];
20
21
    /**
22
     * @var bool
23
     */
24
    protected $fit = false;
25
26
    /**
27
     * PCA (Principal Component Analysis) used to explain given
28
     * data with lower number of dimensions. This analysis transforms the
29
     * data to a lower dimensional version of it by conserving a proportion of total variance
30
     * within the data. It is a lossy data compression technique.<br>
31
     *
32
     * @param float $totalVariance Total explained variance to be preserved
33
     * @param int   $numFeatures   Number of features to be preserved
34
     *
35
     * @throws InvalidArgumentException
36
     */
37
    public function __construct(?float $totalVariance = null, ?int $numFeatures = null)
38
    {
39
        if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) {
40
            throw new InvalidArgumentException('Total variance can be a value between 0.1 and 0.99');
41
        }
42
43
        if ($numFeatures !== null && $numFeatures <= 0) {
44
            throw new InvalidArgumentException('Number of features to be preserved should be greater than 0');
45
        }
46
47
        if (($totalVariance !== null) === ($numFeatures !== null)) {
48
            throw new InvalidArgumentException('Either totalVariance or numFeatures should be specified in order to run the algorithm');
49
        }
50
51
        if ($numFeatures !== null) {
52
            $this->numFeatures = $numFeatures;
53
        }
54
55
        if ($totalVariance !== null) {
56
            $this->totalVariance = $totalVariance;
57
        }
58
    }
59
60
    /**
61
     * Takes a data and returns a lower dimensional version
62
     * of this data while preserving $totalVariance or $numFeatures. <br>
63
     * $data is an n-by-m matrix and returned array is
64
     * n-by-k matrix where k <= m
65
     */
66
    public function fit(array $data): array
67
    {
68
        $n = count($data[0]);
69
70
        $data = $this->normalize($data, $n);
71
72
        $covMatrix = Covariance::covarianceMatrix($data, array_fill(0, $n, 0));
73
74
        $this->eigenDecomposition($covMatrix);
75
76
        $this->fit = true;
77
78
        return $this->reduce($data);
79
    }
80
81
    /**
82
     * Transforms the given sample to a lower dimensional vector by using
83
     * the eigenVectors obtained in the last run of <code>fit</code>.
84
     *
85
     * @throws InvalidOperationException
86
     */
87
    public function transform(array $sample): array
88
    {
89
        if (!$this->fit) {
90
            throw new InvalidOperationException('PCA has not been fitted with respect to original dataset, please run PCA::fit() first');
91
        }
92
93
        if (!is_array($sample[0])) {
94
            $sample = [$sample];
95
        }
96
97
        $sample = $this->normalize($sample, count($sample[0]));
98
99
        return $this->reduce($sample);
100
    }
101
102
    protected function calculateMeans(array $data, int $n): void
103
    {
104
        // Calculate means for each dimension
105
        $this->means = [];
106
        for ($i = 0; $i < $n; ++$i) {
107
            $column = array_column($data, $i);
108
            $this->means[] = Mean::arithmetic($column);
109
        }
110
    }
111
112
    /**
113
     * Normalization of the data includes subtracting mean from
114
     * each dimension therefore dimensions will be centered to zero
115
     */
116
    protected function normalize(array $data, int $n): array
117
    {
118
        if (count($this->means) === 0) {
119
            $this->calculateMeans($data, $n);
120
        }
121
122
        // Normalize data
123
        foreach (array_keys($data) as $i) {
124
            for ($k = 0; $k < $n; ++$k) {
125
                $data[$i][$k] -= $this->means[$k];
126
            }
127
        }
128
129
        return $data;
130
    }
131
}
132