Completed
Push — master ( 6296e4...a87859 )
by Arkadiusz
18:50
created

KernelPCA   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 236
Duplicated Lines 5.08 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 0
Metric Value
wmc 20
lcom 1
cbo 4
dl 12
loc 236
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 12 2
A fit() 0 18 2
A calculateKernelMatrix() 0 17 4
A centerMatrix() 0 18 1
B getKernel() 12 30 5
A getDistancePairs() 0 11 2
A projectSample() 0 14 1
A transform() 0 14 3

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Phpml\Math\Distance\Euclidean;
8
use Phpml\Math\Distance\Manhattan;
9
use Phpml\Math\Matrix;
10
11
class KernelPCA extends PCA
12
{
13
    const KERNEL_RBF = 1;
14
    const KERNEL_SIGMOID = 2;
15
    const KERNEL_LAPLACIAN = 3;
16
    const KERNEL_LINEAR = 4;
17
18
    /**
19
     * Selected kernel function
20
     *
21
     * @var int
22
     */
23
    protected $kernel;
24
25
    /**
26
     * Gamma value used by the kernel
27
     *
28
     * @var float
29
     */
30
    protected $gamma;
31
32
    /**
33
     * Original dataset used to fit KernelPCA
34
     *
35
     * @var array
36
     */
37
    protected $data;
38
39
    /**
40
     * Kernel principal component analysis (KernelPCA) is an extension of PCA using
41
     * techniques of kernel methods. It is more suitable for data that involves
42
     * vectors that are not linearly separable<br><br>
43
     * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
44
     * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
45
     * This transformation will return the same number of rows with only <i>2</i> columns.
46
     *
47
     * @param int $kernel
48
     * @param float $totalVariance Total variance to be preserved if numFeatures is not given
49
     * @param int $numFeatures Number of columns to be returned
50
     * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels
51
     *
52
     * @throws \Exception
53
     */
54
    public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null)
55
    {
56
        $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
57
        if (! in_array($kernel, $availableKernels)) {
58
            throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian");
59
        }
60
61
        parent::__construct($totalVariance, $numFeatures);
62
63
        $this->kernel = $kernel;
64
        $this->gamma = $gamma;
65
    }
66
67
    /**
68
     * Takes a data and returns a lower dimensional version
69
     * of this data while preserving $totalVariance or $numFeatures. <br>
70
     * $data is an n-by-m matrix and returned array is
71
     * n-by-k matrix where k <= m
72
     *
73
     * @param array $data
74
     *
75
     * @return array
76
     */
77
    public function fit(array $data)
78
    {
79
        $numRows = count($data);
80
        $this->data = $data;
81
82
        if ($this->gamma === null) {
83
            $this->gamma = 1.0 / $numRows;
84
        }
85
86
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
87
        $matrix = $this->centerMatrix($matrix, $numRows);
88
89
        list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($matrix, $numRows);
90
91
        $this->fit = true;
92
93
        return Matrix::transposeArray($this->eigVectors);
94
    }
95
96
    /**
97
     * Calculates similarity matrix by use of selected kernel function<br>
98
     * An n-by-m matrix is given and an n-by-n matrix is returned
99
     *
100
     * @param array $data
101
     * @param int $numRows
102
     *
103
     * @return array
104
     */
105
    protected function calculateKernelMatrix(array $data, int $numRows)
106
    {
107
        $kernelFunc = $this->getKernel();
108
109
        $matrix = [];
110
        for ($i=0; $i < $numRows; $i++) {
111
            for ($k=0; $k < $numRows; $k++) {
112
                if ($i <= $k) {
113
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
114
                } else {
115
                    $matrix[$i][$k] = $matrix[$k][$i];
116
                }
117
            }
118
        }
119
120
        return $matrix;
121
    }
122
123
    /**
124
     * Kernel matrix is centered in its original space by using the following
125
     * conversion:
126
     *
127
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
128
     *
129
     * @param array $matrix
130
     * @param int $n
131
     */
132
    protected function centerMatrix(array $matrix, int $n)
133
    {
134
        $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n));
135
        $N = new Matrix($N, false);
136
        $K = new Matrix($matrix, false);
137
138
        // K.N (This term is repeated so we cache it once)
139
        $K_N = $K->multiply($N);
140
        // N.K
141
        $N_K = $N->multiply($K);
142
        // N.K.N
143
        $N_K_N = $N->multiply($K_N);
144
145
        return $K->subtract($N_K)
146
                 ->subtract($K_N)
147
                 ->add($N_K_N)
148
                 ->toArray();
149
    }
150
151
    /**
152
     * Returns the callable kernel function
153
     *
154
     * @return \Closure
155
     */
156
    protected function getKernel()
157
    {
158
        switch ($this->kernel) {
159
            case self::KERNEL_LINEAR:
160
                // k(x,y) = xT.y
161
                return function ($x, $y) {
162
                    return Matrix::dot($x, $y)[0];
163
                };
164 View Code Duplication
            case self::KERNEL_RBF:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
165
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
166
                $dist = new Euclidean();
167
                return function ($x, $y) use ($dist) {
168
                    return exp(-$this->gamma * $dist->sqDistance($x, $y));
169
                };
170
171
            case self::KERNEL_SIGMOID:
172
                // k(x,y)=tanh(γ.xT.y+c0) where c0=1
173
                return function ($x, $y) {
174
                    $res = Matrix::dot($x, $y)[0] + 1.0;
175
                    return tanh($this->gamma * $res);
176
                };
177
178 View Code Duplication
            case self::KERNEL_LAPLACIAN:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
179
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
180
                $dist = new Manhattan();
181
                return function ($x, $y) use ($dist) {
182
                    return exp(-$this->gamma * $dist->distance($x, $y));
183
                };
184
        }
185
    }
186
187
    /**
188
     * @param array $sample
189
     *
190
     * @return array
191
     */
192
    protected function getDistancePairs(array $sample)
193
    {
194
        $kernel = $this->getKernel();
195
196
        $pairs = [];
197
        foreach ($this->data as $row) {
198
            $pairs[] = $kernel($row, $sample);
199
        }
200
201
        return $pairs;
202
    }
203
204
    /**
205
     * @param array $pairs
206
     *
207
     * @return array
208
     */
209
    protected function projectSample(array $pairs)
210
    {
211
        // Normalize eigenvectors by eig = eigVectors / eigValues
212
        $func = function ($eigVal, $eigVect) {
213
            $m = new Matrix($eigVect, false);
214
            $a = $m->divideByScalar($eigVal)->toArray();
215
216
            return $a[0];
217
        };
218
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
219
220
        // return k.dot(eig)
221
        return Matrix::dot($pairs, $eig);
222
    }
223
224
    /**
225
     * Transforms the given sample to a lower dimensional vector by using
226
     * the variables obtained during the last run of <code>fit</code>.
227
     *
228
     * @param array $sample
229
     *
230
     * @return array
231
     */
232
    public function transform(array $sample)
233
    {
234
        if (!$this->fit) {
235
            throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first");
236
        }
237
238
        if (is_array($sample[0])) {
239
            throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays");
240
        }
241
242
        $pairs = $this->getDistancePairs($sample);
243
244
        return $this->projectSample($pairs);
245
    }
246
}
247