Passed
Push — master ( 47cdff...ed5fc8 )
by Arkadiusz
03:38
created

src/Phpml/DimensionReduction/KernelPCA.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Phpml\Math\Distance\Euclidean;
8
use Phpml\Math\Distance\Manhattan;
9
use Phpml\Math\Matrix;
10
11
class KernelPCA extends PCA
12
{
13
    const KERNEL_RBF = 1;
14
    const KERNEL_SIGMOID = 2;
15
    const KERNEL_LAPLACIAN = 3;
16
    const KERNEL_LINEAR = 4;
17
18
    /**
19
     * Selected kernel function
20
     *
21
     * @var int
22
     */
23
    protected $kernel;
24
25
    /**
26
     * Gamma value used by the kernel
27
     *
28
     * @var float
29
     */
30
    protected $gamma;
31
32
    /**
33
     * Original dataset used to fit KernelPCA
34
     *
35
     * @var array
36
     */
37
    protected $data;
38
39
    /**
40
     * Kernel principal component analysis (KernelPCA) is an extension of PCA using
41
     * techniques of kernel methods. It is more suitable for data that involves
42
     * vectors that are not linearly separable<br><br>
43
     * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
44
     * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
45
     * This transformation will return the same number of rows with only <i>2</i> columns.
46
     *
47
     * @param int $kernel
48
     * @param float $totalVariance Total variance to be preserved if numFeatures is not given
49
     * @param int $numFeatures Number of columns to be returned
50
     * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels
51
     *
52
     * @throws \Exception
53
     */
54
    public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null)
55
    {
56
        $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
57
        if (!in_array($kernel, $availableKernels)) {
58
            throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian");
59
        }
60
61
        parent::__construct($totalVariance, $numFeatures);
62
63
        $this->kernel = $kernel;
64
        $this->gamma = $gamma;
65
    }
66
67
    /**
68
     * Takes a data and returns a lower dimensional version
69
     * of this data while preserving $totalVariance or $numFeatures. <br>
70
     * $data is an n-by-m matrix and returned array is
71
     * n-by-k matrix where k <= m
72
     *
73
     * @param array $data
74
     *
75
     * @return array
76
     */
77
    public function fit(array $data)
78
    {
79
        $numRows = count($data);
80
        $this->data = $data;
81
82
        if ($this->gamma === null) {
83
            $this->gamma = 1.0 / $numRows;
84
        }
85
86
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
87
        $matrix = $this->centerMatrix($matrix, $numRows);
88
89
        $this->eigenDecomposition($matrix);
90
91
        $this->fit = true;
92
93
        return Matrix::transposeArray($this->eigVectors);
94
    }
95
96
    /**
97
     * Calculates similarity matrix by use of selected kernel function<br>
98
     * An n-by-m matrix is given and an n-by-n matrix is returned
99
     *
100
     * @param array $data
101
     * @param int   $numRows
102
     *
103
     * @return array
104
     */
105
    protected function calculateKernelMatrix(array $data, int $numRows)
106
    {
107
        $kernelFunc = $this->getKernel();
108
109
        $matrix = [];
110
        for ($i = 0; $i < $numRows; ++$i) {
111
            for ($k = 0; $k < $numRows; ++$k) {
112
                if ($i <= $k) {
113
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
114
                } else {
115
                    $matrix[$i][$k] = $matrix[$k][$i];
116
                }
117
            }
118
        }
119
120
        return $matrix;
121
    }
122
123
    /**
124
     * Kernel matrix is centered in its original space by using the following
125
     * conversion:
126
     *
127
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
128
     *
129
     * @param array $matrix
130
     * @param int   $n
131
     *
132
     * @return array
133
     */
134
    protected function centerMatrix(array $matrix, int $n)
135
    {
136
        $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n));
137
        $N = new Matrix($N, false);
138
        $K = new Matrix($matrix, false);
139
140
        // K.N (This term is repeated so we cache it once)
141
        $K_N = $K->multiply($N);
142
        // N.K
143
        $N_K = $N->multiply($K);
144
        // N.K.N
145
        $N_K_N = $N->multiply($K_N);
146
147
        return $K->subtract($N_K)
148
                 ->subtract($K_N)
149
                 ->add($N_K_N)
150
                 ->toArray();
151
    }
152
153
    /**
154
     * Returns the callable kernel function
155
     *
156
     * @return \Closure
157
     *
158
     * @throws \Exception
159
     */
160
    protected function getKernel()
161
    {
162
        switch ($this->kernel) {
163
            case self::KERNEL_LINEAR:
164
                // k(x,y) = xT.y
165
                return function ($x, $y) {
166
                    return Matrix::dot($x, $y)[0];
167
                };
168 View Code Duplication
            case self::KERNEL_RBF:
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
169
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
170
                $dist = new Euclidean();
171
                return function ($x, $y) use ($dist) {
172
                    return exp(-$this->gamma * $dist->sqDistance($x, $y));
173
                };
174
175
            case self::KERNEL_SIGMOID:
176
                // k(x,y)=tanh(γ.xT.y+c0) where c0=1
177
                return function ($x, $y) {
178
                    $res = Matrix::dot($x, $y)[0] + 1.0;
179
                    return tanh($this->gamma * $res);
180
                };
181
182 View Code Duplication
            case self::KERNEL_LAPLACIAN:
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
183
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
184
                $dist = new Manhattan();
185
                return function ($x, $y) use ($dist) {
186
                    return exp(-$this->gamma * $dist->distance($x, $y));
187
                };
188
189
            default:
190
                throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
191
        }
192
    }
193
194
    /**
195
     * @param array $sample
196
     *
197
     * @return array
198
     */
199
    protected function getDistancePairs(array $sample)
200
    {
201
        $kernel = $this->getKernel();
202
203
        $pairs = [];
204
        foreach ($this->data as $row) {
205
            $pairs[] = $kernel($row, $sample);
206
        }
207
208
        return $pairs;
209
    }
210
211
    /**
212
     * @param array $pairs
213
     *
214
     * @return array
215
     */
216
    protected function projectSample(array $pairs)
217
    {
218
        // Normalize eigenvectors by eig = eigVectors / eigValues
219
        $func = function ($eigVal, $eigVect) {
220
            $m = new Matrix($eigVect, false);
221
            $a = $m->divideByScalar($eigVal)->toArray();
222
223
            return $a[0];
224
        };
225
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
226
227
        // return k.dot(eig)
228
        return Matrix::dot($pairs, $eig);
229
    }
230
231
    /**
232
     * Transforms the given sample to a lower dimensional vector by using
233
     * the variables obtained during the last run of <code>fit</code>.
234
     *
235
     * @param array $sample
236
     *
237
     * @return array
238
     *
239
     * @throws \Exception
240
     */
241 View Code Duplication
    public function transform(array $sample)
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
242
    {
243
        if (!$this->fit) {
244
            throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first");
245
        }
246
247
        if (is_array($sample[0])) {
248
            throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays");
249
        }
250
251
        $pairs = $this->getDistancePairs($sample);
252
253
        return $this->projectSample($pairs);
254
    }
255
}
256