These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\DimensionReduction; |
||
6 | |||
7 | use Phpml\Math\Distance\Euclidean; |
||
8 | use Phpml\Math\Distance\Manhattan; |
||
9 | use Phpml\Math\Matrix; |
||
10 | |||
11 | class KernelPCA extends PCA |
||
12 | { |
||
13 | const KERNEL_RBF = 1; |
||
14 | const KERNEL_SIGMOID = 2; |
||
15 | const KERNEL_LAPLACIAN = 3; |
||
16 | const KERNEL_LINEAR = 4; |
||
17 | |||
18 | /** |
||
19 | * Selected kernel function |
||
20 | * |
||
21 | * @var int |
||
22 | */ |
||
23 | protected $kernel; |
||
24 | |||
25 | /** |
||
26 | * Gamma value used by the kernel |
||
27 | * |
||
28 | * @var float |
||
29 | */ |
||
30 | protected $gamma; |
||
31 | |||
32 | /** |
||
33 | * Original dataset used to fit KernelPCA |
||
34 | * |
||
35 | * @var array |
||
36 | */ |
||
37 | protected $data; |
||
38 | |||
39 | /** |
||
40 | * Kernel principal component analysis (KernelPCA) is an extension of PCA using |
||
41 | * techniques of kernel methods. It is more suitable for data that involves |
||
42 | * vectors that are not linearly separable<br><br> |
||
43 | * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b> |
||
44 | * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br> |
||
45 | * This transformation will return the same number of rows with only <i>2</i> columns. |
||
46 | * |
||
47 | * @param int $kernel |
||
48 | * @param float $totalVariance Total variance to be preserved if numFeatures is not given |
||
49 | * @param int $numFeatures Number of columns to be returned |
||
50 | * @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels |
||
51 | * |
||
52 | * @throws \Exception |
||
53 | */ |
||
54 | public function __construct(int $kernel = self::KERNEL_RBF, $totalVariance = null, $numFeatures = null, $gamma = null) |
||
55 | { |
||
56 | $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR]; |
||
57 | if (!in_array($kernel, $availableKernels)) { |
||
58 | throw new \Exception("KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian"); |
||
59 | } |
||
60 | |||
61 | parent::__construct($totalVariance, $numFeatures); |
||
62 | |||
63 | $this->kernel = $kernel; |
||
64 | $this->gamma = $gamma; |
||
65 | } |
||
66 | |||
67 | /** |
||
68 | * Takes a data and returns a lower dimensional version |
||
69 | * of this data while preserving $totalVariance or $numFeatures. <br> |
||
70 | * $data is an n-by-m matrix and returned array is |
||
71 | * n-by-k matrix where k <= m |
||
72 | * |
||
73 | * @param array $data |
||
74 | * |
||
75 | * @return array |
||
76 | */ |
||
77 | public function fit(array $data) |
||
78 | { |
||
79 | $numRows = count($data); |
||
80 | $this->data = $data; |
||
81 | |||
82 | if ($this->gamma === null) { |
||
83 | $this->gamma = 1.0 / $numRows; |
||
84 | } |
||
85 | |||
86 | $matrix = $this->calculateKernelMatrix($this->data, $numRows); |
||
87 | $matrix = $this->centerMatrix($matrix, $numRows); |
||
88 | |||
89 | $this->eigenDecomposition($matrix); |
||
90 | |||
91 | $this->fit = true; |
||
92 | |||
93 | return Matrix::transposeArray($this->eigVectors); |
||
94 | } |
||
95 | |||
96 | /** |
||
97 | * Calculates similarity matrix by use of selected kernel function<br> |
||
98 | * An n-by-m matrix is given and an n-by-n matrix is returned |
||
99 | * |
||
100 | * @param array $data |
||
101 | * @param int $numRows |
||
102 | * |
||
103 | * @return array |
||
104 | */ |
||
105 | protected function calculateKernelMatrix(array $data, int $numRows) |
||
106 | { |
||
107 | $kernelFunc = $this->getKernel(); |
||
108 | |||
109 | $matrix = []; |
||
110 | for ($i = 0; $i < $numRows; ++$i) { |
||
111 | for ($k = 0; $k < $numRows; ++$k) { |
||
112 | if ($i <= $k) { |
||
113 | $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]); |
||
114 | } else { |
||
115 | $matrix[$i][$k] = $matrix[$k][$i]; |
||
116 | } |
||
117 | } |
||
118 | } |
||
119 | |||
120 | return $matrix; |
||
121 | } |
||
122 | |||
123 | /** |
||
124 | * Kernel matrix is centered in its original space by using the following |
||
125 | * conversion: |
||
126 | * |
||
127 | * K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n |
||
128 | * |
||
129 | * @param array $matrix |
||
130 | * @param int $n |
||
131 | * |
||
132 | * @return array |
||
133 | */ |
||
134 | protected function centerMatrix(array $matrix, int $n) |
||
135 | { |
||
136 | $N = array_fill(0, $n, array_fill(0, $n, 1.0/$n)); |
||
137 | $N = new Matrix($N, false); |
||
138 | $K = new Matrix($matrix, false); |
||
139 | |||
140 | // K.N (This term is repeated so we cache it once) |
||
141 | $K_N = $K->multiply($N); |
||
142 | // N.K |
||
143 | $N_K = $N->multiply($K); |
||
144 | // N.K.N |
||
145 | $N_K_N = $N->multiply($K_N); |
||
146 | |||
147 | return $K->subtract($N_K) |
||
148 | ->subtract($K_N) |
||
149 | ->add($N_K_N) |
||
150 | ->toArray(); |
||
151 | } |
||
152 | |||
153 | /** |
||
154 | * Returns the callable kernel function |
||
155 | * |
||
156 | * @return \Closure |
||
157 | * |
||
158 | * @throws \Exception |
||
159 | */ |
||
160 | protected function getKernel() |
||
161 | { |
||
162 | switch ($this->kernel) { |
||
163 | case self::KERNEL_LINEAR: |
||
164 | // k(x,y) = xT.y |
||
165 | return function ($x, $y) { |
||
166 | return Matrix::dot($x, $y)[0]; |
||
167 | }; |
||
168 | View Code Duplication | case self::KERNEL_RBF: |
|
0 ignored issues
–
show
|
|||
169 | // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance |
||
170 | $dist = new Euclidean(); |
||
171 | return function ($x, $y) use ($dist) { |
||
172 | return exp(-$this->gamma * $dist->sqDistance($x, $y)); |
||
173 | }; |
||
174 | |||
175 | case self::KERNEL_SIGMOID: |
||
176 | // k(x,y)=tanh(γ.xT.y+c0) where c0=1 |
||
177 | return function ($x, $y) { |
||
178 | $res = Matrix::dot($x, $y)[0] + 1.0; |
||
179 | return tanh($this->gamma * $res); |
||
180 | }; |
||
181 | |||
182 | View Code Duplication | case self::KERNEL_LAPLACIAN: |
|
0 ignored issues
–
show
This code seems to be duplicated across your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository.
Loading history...
|
|||
183 | // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance |
||
184 | $dist = new Manhattan(); |
||
185 | return function ($x, $y) use ($dist) { |
||
186 | return exp(-$this->gamma * $dist->distance($x, $y)); |
||
187 | }; |
||
188 | |||
189 | default: |
||
190 | throw new \Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel)); |
||
191 | } |
||
192 | } |
||
193 | |||
194 | /** |
||
195 | * @param array $sample |
||
196 | * |
||
197 | * @return array |
||
198 | */ |
||
199 | protected function getDistancePairs(array $sample) |
||
200 | { |
||
201 | $kernel = $this->getKernel(); |
||
202 | |||
203 | $pairs = []; |
||
204 | foreach ($this->data as $row) { |
||
205 | $pairs[] = $kernel($row, $sample); |
||
206 | } |
||
207 | |||
208 | return $pairs; |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * @param array $pairs |
||
213 | * |
||
214 | * @return array |
||
215 | */ |
||
216 | protected function projectSample(array $pairs) |
||
217 | { |
||
218 | // Normalize eigenvectors by eig = eigVectors / eigValues |
||
219 | $func = function ($eigVal, $eigVect) { |
||
220 | $m = new Matrix($eigVect, false); |
||
221 | $a = $m->divideByScalar($eigVal)->toArray(); |
||
222 | |||
223 | return $a[0]; |
||
224 | }; |
||
225 | $eig = array_map($func, $this->eigValues, $this->eigVectors); |
||
226 | |||
227 | // return k.dot(eig) |
||
228 | return Matrix::dot($pairs, $eig); |
||
229 | } |
||
230 | |||
231 | /** |
||
232 | * Transforms the given sample to a lower dimensional vector by using |
||
233 | * the variables obtained during the last run of <code>fit</code>. |
||
234 | * |
||
235 | * @param array $sample |
||
236 | * |
||
237 | * @return array |
||
238 | * |
||
239 | * @throws \Exception |
||
240 | */ |
||
241 | View Code Duplication | public function transform(array $sample) |
|
0 ignored issues
–
show
This method seems to be duplicated in your project.
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository.
Loading history...
|
|||
242 | { |
||
243 | if (!$this->fit) { |
||
244 | throw new \Exception("KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first"); |
||
245 | } |
||
246 | |||
247 | if (is_array($sample[0])) { |
||
248 | throw new \Exception("KernelPCA::transform() accepts only one-dimensional arrays"); |
||
249 | } |
||
250 | |||
251 | $pairs = $this->getDistancePairs($sample); |
||
252 | |||
253 | return $this->projectSample($pairs); |
||
254 | } |
||
255 | } |
||
256 |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.