Test Failed
Pull Request — master (#81)
by
unknown
03:06
created

KernelPCA   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 234
Duplicated Lines 5.13 %

Coupling/Cohesion

Components 1
Dependencies 4

Importance

Changes 0
Metric Value
wmc 20
lcom 1
cbo 4
dl 12
loc 234
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 12 2
A fit() 0 19 2
A calculateKernelMatrix() 0 17 4
A centerMatrix() 0 18 1
B getKernel() 12 30 5
A getDistancePairs() 0 11 2
A projectSample() 0 14 1
A transform() 0 17 3

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Phpml\Math\Distance\Euclidean;
8
use Phpml\Math\Distance\Manhattan;
9
use Phpml\Math\Matrix;
10
11
class KernelPCA extends PCA
12
{
13
    const KERNEL_RBF = 1;
14
    const KERNEL_SIGMOID = 2;
15
    const KERNEL_LAPLACIAN = 3;
16
    const KERNEL_LINEAR = 4;
17
18
    /**
19
     * Selected kernel function
20
     *
21
     * @var int
22
     */
23
    protected $kernel;
24
25
    /**
26
     * Gamma value used by the kernel
27
     *
28
     * @var float
29
     */
30
    protected $gamma;
31
32
    /**
33
     * Original dataset used to fit KernelPCA
34
     *
35
     * @var array
36
     */
37
    protected $data;
38
39
    /**
40
     *
41
     * @param int $kernel
42
     * @param float $totalVariance
43
     * @param int $numFeatures
44
     * @param float $gamma
45
     *
46
     * @throws \Exception
47
     */
48
    public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null)
49
    {
50
        $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
51
        if (! in_array($kernel, $availableKernels)) {
52
            throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian");
53
        }
54
55
        parent::__construct($totalVariance, $numFeatures);
56
57
        $this->kernel = $kernel;
58
        $this->gamma = $gamma;
59
    }
60
61
    /**
62
     * Takes a data and returns a lower dimensional version
63
     * of this data while preserving $totalVariance or $numFeatures. <br>
64
     * $data is an n-by-m matrix and returned array is
65
     * n-by-k matrix where k <= m
66
     *
67
     * @param array $data
68
     *
69
     * @return array
70
     */
71
    public function fit(array $data)
72
    {
73
        $numRows = count($data);
74
        $numCols = count($data[0]);
75
76
        if ($this->gamma === null) {
77
            $this->gamma = 1.0 / $numRows;
78
        }
79
80
        $this->data = $this->normalize($data, $numCols);
81
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
82
        $matrix = $this->centerMatrix($matrix, $numRows);
83
84
        list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($matrix, $numRows);
85
86
        $this->fit = true;
87
88
        return Matrix::transposeArray($this->eigVectors);
89
    }
90
91
    /**
92
     * Calculates similarity matrix by use of selected kernel function<br>
93
     * An n-by-m matrix is given and an n-by-n matrix is returned
94
     *
95
     * @param array $data
96
     * @param int $numRows
97
     *
98
     * @return array
99
     */
100
    protected function calculateKernelMatrix(array $data, int $numRows)
101
    {
102
        $kernelFunc = $this->getKernel();
103
104
        $matrix = [];
105
        for ($i=0; $i < $numRows; $i++) {
106
            for ($k=0; $k < $numRows; $k++) {
107
                if ($i <= $k) {
108
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
109
                } else {
110
                    $matrix[$i][$k] = $matrix[$k][$i];
111
                }
112
            }
113
        }
114
115
        return $matrix;
116
    }
117
118
    /**
119
     * Kernel matrix is centered in its original space by using the following
120
     * conversion:
121
     *
122
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
123
     *
124
     * @param array $matrix
125
     * @param int $n
126
     */
127
    protected function centerMatrix(array $matrix, int $n)
128
    {
129
        $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n));
130
        $N = new Matrix($N, false);
131
        $K = new Matrix($matrix, false);
132
133
        // K.N (This term is repeated so we cache it once)
134
        $K_N = $K->multiply($N);
135
        // N.K
136
        $N_K = $N->multiply($K);
137
        // N.K.N
138
        $N_K_N = $N->multiply($K_N);
139
140
        return $K->subtract($N_K)
141
                 ->subtract($K_N)
142
                 ->add($N_K_N)
143
                 ->toArray();
144
    }
145
146
    /**
147
     * Returns the callable kernel function
148
     *
149
     * @return \Closure
150
     */
151
    protected function getKernel()
152
    {
153
        switch ($this->kernel) {
154
            case self::KERNEL_LINEAR:
155
                // k(x,y) = xT.y
156
                return function ($x, $y) {
157
                    return Matrix::dot($x, $y)[0];
158
                };
159 View Code Duplication
            case self::KERNEL_RBF:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
160
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
161
                $dist = new Euclidean();
162
                return function ($x, $y) use ($dist) {
163
                    return exp(-$this->gamma * $dist->sqDistance($x, $y));
164
                };
165
166
            case self::KERNEL_SIGMOID:
167
                // k(x,y)=tanh(γ.xT.y+c0) where c0=1
168
                return function ($x, $y) {
169
                    $res = Matrix::dot($x, $y)[0] + 1.0;
170
                    return tanh($this->gamma * $res);
171
                };
172
173 View Code Duplication
            case self::KERNEL_LAPLACIAN:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
174
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
175
                $dist = new Manhattan();
176
                return function ($x, $y) use ($dist) {
177
                    return exp(-$this->gamma * $dist->distance($x, $y));
178
                };
179
        }
180
    }
181
182
    /**
183
     * @param array $sample
184
     *
185
     * @return array
186
     */
187
    protected function getDistancePairs(array $sample)
188
    {
189
        $kernel = $this->getKernel();
190
191
        $pairs = [];
192
        foreach ($this->data as $row) {
193
            $pairs[] = $kernel($row, $sample);
194
        }
195
196
        return $pairs;
197
    }
198
199
    /**
200
     * @param array $pairs
201
     *
202
     * @return array
203
     */
204
    protected function projectSample(array $pairs)
205
    {
206
        // Normalize eigenvectors by eig = eigVectors / eigValues
207
        $func = function ($eigVal, $eigVect) {
208
            $m = new Matrix($eigVect, false);
209
            $a = $m->divideByScalar($eigVal)->toArray();
210
211
            return $a[0];
212
        };
213
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
214
215
        // return k.dot(eig)
216
        return Matrix::dot($pairs, $eig);
217
    }
218
219
    /**
220
     * Transforms the given sample to a lower dimensional vector by using
221
     * the variables obtained during the last run of <code>fit</code>.
222
     *
223
     * @param array $sample
224
     *
225
     * @return array
226
     */
227
    public function transform(array $sample)
228
    {
229
        if (!$this->fit) {
230
            throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first");
231
        }
232
233
        if (is_array($sample[0])) {
234
            throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays");
235
        }
236
237
        $sample = $this->normalize([$sample], count($sample));
238
        $sample = $sample[0];
239
240
        $pairs = $this->getDistancePairs($sample);
241
242
        return $this->projectSample($pairs);
243
    }
244
}
245