KernelPCA::calculateKernelMatrix()   A
last analyzed

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 9
dl 0
loc 16
rs 9.9666
c 0
b 0
f 0
cc 4
nc 4
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Closure;
8
use Phpml\Exception\InvalidArgumentException;
9
use Phpml\Exception\InvalidOperationException;
10
use Phpml\Math\Distance\Euclidean;
11
use Phpml\Math\Distance\Manhattan;
12
use Phpml\Math\Matrix;
13
14
class KernelPCA extends PCA
15
{
16
    public const KERNEL_RBF = 1;
17
18
    public const KERNEL_SIGMOID = 2;
19
20
    public const KERNEL_LAPLACIAN = 3;
21
22
    public const KERNEL_LINEAR = 4;
23
24
    /**
25
     * Selected kernel function
26
     *
27
     * @var int
28
     */
29
    protected $kernel;
30
31
    /**
32
     * Gamma value used by the kernel
33
     *
34
     * @var float|null
35
     */
36
    protected $gamma;
37
38
    /**
39
     * Original dataset used to fit KernelPCA
40
     *
41
     * @var array
42
     */
43
    protected $data = [];
44
45
    /**
46
     * Kernel principal component analysis (KernelPCA) is an extension of PCA using
47
     * techniques of kernel methods. It is more suitable for data that involves
48
     * vectors that are not linearly separable<br><br>
49
     * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
50
     * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
51
     * This transformation will return the same number of rows with only <i>2</i> columns.
52
     *
53
     * @param float $totalVariance Total variance to be preserved if numFeatures is not given
54
     * @param int   $numFeatures   Number of columns to be returned
55
     * @param float $gamma         Gamma parameter is used with RBF and Sigmoid kernels
56
     *
57
     * @throws InvalidArgumentException
58
     */
59
    public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null)
60
    {
61
        if (!in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR], true)) {
62
            throw new InvalidArgumentException('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
63
        }
64
65
        parent::__construct($totalVariance, $numFeatures);
66
67
        $this->kernel = $kernel;
68
        $this->gamma = $gamma;
69
    }
70
71
    /**
72
     * Takes a data and returns a lower dimensional version
73
     * of this data while preserving $totalVariance or $numFeatures. <br>
74
     * $data is an n-by-m matrix and returned array is
75
     * n-by-k matrix where k <= m
76
     */
77
    public function fit(array $data): array
78
    {
79
        $numRows = count($data);
80
        $this->data = $data;
81
82
        if ($this->gamma === null) {
83
            $this->gamma = 1.0 / $numRows;
84
        }
85
86
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
87
        $matrix = $this->centerMatrix($matrix, $numRows);
88
89
        $this->eigenDecomposition($matrix);
90
91
        $this->fit = true;
92
93
        return Matrix::transposeArray($this->eigVectors);
94
    }
95
96
    /**
97
     * Transforms the given sample to a lower dimensional vector by using
98
     * the variables obtained during the last run of <code>fit</code>.
99
     *
100
     * @throws InvalidArgumentException
101
     * @throws InvalidOperationException
102
     */
103
    public function transform(array $sample): array
104
    {
105
        if (!$this->fit) {
106
            throw new InvalidOperationException('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
107
        }
108
109
        if (is_array($sample[0])) {
110
            throw new InvalidArgumentException('KernelPCA::transform() accepts only one-dimensional arrays');
111
        }
112
113
        $pairs = $this->getDistancePairs($sample);
114
115
        return $this->projectSample($pairs);
116
    }
117
118
    /**
119
     * Calculates similarity matrix by use of selected kernel function<br>
120
     * An n-by-m matrix is given and an n-by-n matrix is returned
121
     */
122
    protected function calculateKernelMatrix(array $data, int $numRows): array
123
    {
124
        $kernelFunc = $this->getKernel();
125
126
        $matrix = [];
127
        for ($i = 0; $i < $numRows; ++$i) {
128
            for ($k = 0; $k < $numRows; ++$k) {
129
                if ($i <= $k) {
130
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
131
                } else {
132
                    $matrix[$i][$k] = $matrix[$k][$i];
133
                }
134
            }
135
        }
136
137
        return $matrix;
138
    }
139
140
    /**
141
     * Kernel matrix is centered in its original space by using the following
142
     * conversion:
143
     *
144
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
145
     */
146
    protected function centerMatrix(array $matrix, int $n): array
147
    {
148
        $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
149
        $N = new Matrix($N, false);
150
        $K = new Matrix($matrix, false);
151
152
        // K.N (This term is repeated so we cache it once)
153
        $K_N = $K->multiply($N);
154
        // N.K
155
        $N_K = $N->multiply($K);
156
        // N.K.N
157
        $N_K_N = $N->multiply($K_N);
158
159
        return $K->subtract($N_K)
160
            ->subtract($K_N)
161
            ->add($N_K_N)
162
            ->toArray();
163
    }
164
165
    /**
166
     * Returns the callable kernel function
167
     *
168
     * @throws \Exception
169
     */
170
    protected function getKernel(): Closure
171
    {
172
        switch ($this->kernel) {
173
            case self::KERNEL_LINEAR:
174
                // k(x,y) = xT.y
175
                return function ($x, $y) {
176
                    return Matrix::dot($x, $y)[0];
177
                };
178
            case self::KERNEL_RBF:
179
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
180
                $dist = new Euclidean();
181
182
                return function ($x, $y) use ($dist): float {
183
                    return exp(-$this->gamma * $dist->sqDistance($x, $y));
184
                };
185
186
            case self::KERNEL_SIGMOID:
187
                // k(x,y)=tanh(γ.xT.y+c0) where c0=1
188
                return function ($x, $y): float {
189
                    $res = Matrix::dot($x, $y)[0] + 1.0;
190
191
                    return tanh((float) $this->gamma * $res);
192
                };
193
194
            case self::KERNEL_LAPLACIAN:
195
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
196
                $dist = new Manhattan();
197
198
                return function ($x, $y) use ($dist): float {
199
                    return exp(-$this->gamma * $dist->distance($x, $y));
200
                };
201
202
            default:
203
                // Not reached
204
                throw new InvalidArgumentException(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
205
        }
206
    }
207
208
    protected function getDistancePairs(array $sample): array
209
    {
210
        $kernel = $this->getKernel();
211
212
        $pairs = [];
213
        foreach ($this->data as $row) {
214
            $pairs[] = $kernel($row, $sample);
215
        }
216
217
        return $pairs;
218
    }
219
220
    protected function projectSample(array $pairs): array
221
    {
222
        // Normalize eigenvectors by eig = eigVectors / eigValues
223
        $func = function ($eigVal, $eigVect) {
224
            $m = new Matrix($eigVect, false);
225
            $a = $m->divideByScalar($eigVal)->toArray();
226
227
            return $a[0];
228
        };
229
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
230
231
        // return k.dot(eig)
232
        return Matrix::dot($pairs, $eig);
233
    }
234
}
235