Test Failed
Pull Request — master (#81)
by
unknown
04:16
created

KernelPCA::projectSample()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 14
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 7
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Phpml\Math\Distance\Euclidean;
8
use Phpml\Math\Distance\Manhattan;
9
use Phpml\Math\Matrix;
10
11
class KernelPCA extends PCA
12
{
13
    const KERNEL_RBF = 1;
14
    const KERNEL_SIGMOID = 2;
15
    const KERNEL_LAPLACIAN = 3;
16
17
    /**
18
     * Selected kernel function
19
     *
20
     * @var int
21
     */
22
    protected $kernel;
23
24
    /**
25
     * Gamma value used by the kernel
26
     *
27
     * @var float
28
     */
29
    protected $gamma;
30
31
    /**
32
     * Original dataset used to fit KernelPCA
33
     *
34
     * @var array
35
     */
36
    protected $data;
37
38
    /**
39
     *
40
     * @param int $kernel
41
     * @param float $totalVariance
42
     * @param int $numFeatures
43
     * @param float $gamma
44
     *
45
     * @throws \Exception
46
     */
47
    public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null)
48
    {
49
        if (! in_array($kernel, [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN])) {
50
            throw new \Exception("KernelPCA can be initialized with the following kernels only: RBF, Sigmoid and Laplacian");
51
        }
52
53
        parent::__construct($totalVariance, $numFeatures);
54
55
        $this->kernel = $kernel;
56
        $this->gamma = $gamma;
57
    }
58
59
    /**
60
     * Takes a data and returns a lower dimensional version
61
     * of this data while preserving $totalVariance or $numFeatures. <br>
62
     * $data is an n-by-m matrix and returned array is
63
     * n-by-k matrix where k <= m
64
     *
65
     * @param array $data
66
     *
67
     * @return array
68
     */
69
    public function fit(array $data)
70
    {
71
        $numRows = count($data);
72
        $numCols = count($data[0]);
73
74
        if ($this->gamma === null) {
75
            $this->gamma = 1.0 / $numRows;
76
        }
77
78
        $this->data = $this->normalize($data, $numCols);
79
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
80
        $matrix = $this->centerMatrix($matrix, $numRows);
81
82
        list($this->eigValues, $this->eigVectors) = $this->eigenDecomposition($matrix, $numRows);
83
84
        $this->fit = true;
85
86
        return Matrix::transposeArray($this->eigVectors);
87
    }
88
89
    /**
90
     * Calculates similarity matrix by use of selected kernel function<br>
91
     * An n-by-m matrix is given and an n-by-n matrix is returned
92
     *
93
     * @param array $data
94
     * @param int $numRows
95
     *
96
     * @return array
97
     */
98
    protected function calculateKernelMatrix(array $data, int $numRows)
99
    {
100
        $kernelFunc = $this->getKernel();
101
102
        $matrix = [];
103
        for ($i=0; $i < $numRows; $i++) {
104
            for ($k=0; $k < $numRows; $k++) {
105
                if ($i <= $k) {
106
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
107
                } else {
108
                    $matrix[$i][$k] = $matrix[$k][$i];
109
                }
110
            }
111
        }
112
113
        return $matrix;
114
    }
115
116
    /**
117
     * Kernel matrix is centered in its original space by using the following
118
     * conversion:
119
     *
120
     * K′ = K− N . K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
121
     *
122
     * @param array $matrix
123
     * @param int $n
124
     */
125
    protected function centerMatrix(array $matrix, int $n)
126
    {
127
        $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n));
128
        $N = new Matrix($N, false);
129
        $K = new Matrix($matrix, false);
130
131
        // K.N (This term is repeated so we cache it once)
132
        $K_N = $K->multiply($N);
133
        // N.K
134
        $N_K = $N->multiply($K);
135
        // N.K.N
136
        $N_K_N = $N->multiply($K_N);
137
138
        return $K->subtract($N_K)
139
                 ->subtract($K_N)
140
                 ->add($N_K_N)
141
                 ->toArray();
142
    }
143
144
    /**
145
     * Returns the callable kernel function
146
     *
147
     * @return \Closure
148
     */
149
    protected function getKernel()
150
    {
151
        /**
152
         * Each kernel function operates in two modes:
153
         * 1) k(x, y) mode: x and y is given
154
         * 2) k(x) mode: only x is given
155
         */
156
        switch ($this->kernel) {
157 View Code Duplication
            case self::KERNEL_RBF:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
158
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
159
                $dist = new Euclidean();
160
                return function ($x, $y = null) use ($dist) {
161
                    if ($y === null) {
162
                        foreach ($x as $i => $element) {
163
                            $x[$i] = exp(-$this->gamma * $element);
164
                        }
165
166
                        return $x;
167
                    }
168
169
                    return exp(-$this->gamma *    $dist->distance($x, $y));
170
                };
171
172
            case self::KERNEL_SIGMOID:
173
                // k(x,y)=tanh(γ.xT.y+c0) where c0=0
174
                return function ($x, $y = null) {
175
                    if ($y === null) {
176
                        foreach ($x as $i => $element) {
177
                            $x[$i] = tanh($this->gamma * $element);
178
                        }
179
180
                        return $x;
181
                    }
182
183
                    $x = new Matrix($x);
184
                    $y = new Matrix($y);
185
                    $res = $x->multiply($y->transpose());
186
187
                    return tanh($this->gamma * $res->toScalar());
188
                };
189
190 View Code Duplication
            case self::KERNEL_LAPLACIAN:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
191
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
192
                $dist = new Manhattan();
193
                return function ($x, $y = null) use ($dist) {
194
                    if ($y === null) {
195
                        foreach ($x as $i => $element) {
196
                            $x[$i] = exp(-$this->gamma * $element);
197
                        }
198
199
                        return $x;
200
                    }
201
202
                    return exp(-$this->gamma * $dist->distance($x, $y));
203
                };
204
        }
205
    }
206
207
    /**
208
     * @param array $sample
209
     *
210
     * @return array
211
     */
212
    protected function getDistancePairs(array $sample)
213
    {
214
        $kernel = $this->getKernel();
215
        $pairs = [];
216
        foreach ($this->data as $row) {
217
            $pairs[] = $kernel($row, $sample);
218
        }
219
220
        return $pairs;
221
    }
222
223
    /**
224
     * @param array $pairs
225
     *
226
     * @return array
227
     */
228
    protected function projectSample(array $pairs)
229
    {
230
        // Normalize eigenvectors by eig = eigVectors / eigValues
231
        $func = function ($eigVal, $eigVect) {
232
            $m = new Matrix($eigVect, false);
233
            $a = $m->divideByScalar(sqrt($eigVal))->toArray();
234
235
            return $a[0];
236
        };
237
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
238
239
        // return k.dot(eig)
240
        return Matrix::dot($pairs, $eig);
241
    }
242
243
    /**
244
     * Transforms the given sample to a lower dimensional vector by using
245
     * the variables obtained during the last run of <code>fit</code>.
246
     *
247
     * @param array $sample
248
     *
249
     * @return array
250
     */
251
    public function transform(array $sample)
252
    {
253
        if (!$this->fit) {
254
            throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first");
255
        }
256
257
        if (is_array($sample[0])) {
258
            throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays");
259
        }
260
261
        $sample = $this->normalize([$sample], count($sample));
262
        $sample = $sample[0];
263
264
        $pairs = $this->getDistancePairs($sample);
265
266
        return $this->projectSample($pairs);
267
    }
268
}
269