1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Phpml\DimensionReduction; |
6
|
|
|
|
7
|
|
|
use Closure; |
8
|
|
|
use Exception; |
9
|
|
|
use Phpml\Math\Distance\Euclidean; |
10
|
|
|
use Phpml\Math\Distance\Manhattan; |
11
|
|
|
use Phpml\Math\Matrix; |
12
|
|
|
|
13
|
|
|
class KernelPCA extends PCA |
14
|
|
|
{ |
15
|
|
|
public const KERNEL_RBF = 1; |
16
|
|
|
|
17
|
|
|
public const KERNEL_SIGMOID = 2; |
18
|
|
|
|
19
|
|
|
public const KERNEL_LAPLACIAN = 3; |
20
|
|
|
|
21
|
|
|
public const KERNEL_LINEAR = 4; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Selected kernel function |
25
|
|
|
* |
26
|
|
|
* @var int |
27
|
|
|
*/ |
28
|
|
|
protected $kernel; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* Gamma value used by the kernel |
32
|
|
|
* |
33
|
|
|
* @var float|null |
34
|
|
|
*/ |
35
|
|
|
protected $gamma; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Original dataset used to fit KernelPCA |
39
|
|
|
* |
40
|
|
|
* @var array |
41
|
|
|
*/ |
42
|
|
|
protected $data = []; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* Kernel principal component analysis (KernelPCA) is an extension of PCA using |
46
|
|
|
* techniques of kernel methods. It is more suitable for data that involves |
47
|
|
|
* vectors that are not linearly separable<br><br> |
48
|
|
|
* Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b> |
49
|
|
|
* will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br> |
50
|
|
|
* This transformation will return the same number of rows with only <i>2</i> columns. |
51
|
|
|
* |
52
|
|
|
* @param float $totalVariance Total variance to be preserved if numFeatures is not given |
53
|
|
|
* @param int $numFeatures Number of columns to be returned |
54
|
|
|
* @param float $gamma Gamma parameter is used with RBF and Sigmoid kernels |
55
|
|
|
* |
56
|
|
|
* @throws \Exception |
57
|
|
|
*/ |
58
|
|
|
public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null) |
59
|
|
|
{ |
60
|
|
|
$availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR]; |
61
|
|
|
if (!in_array($kernel, $availableKernels)) { |
62
|
|
|
throw new Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian'); |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
parent::__construct($totalVariance, $numFeatures); |
66
|
|
|
|
67
|
|
|
$this->kernel = $kernel; |
68
|
|
|
$this->gamma = $gamma; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Takes a data and returns a lower dimensional version |
73
|
|
|
* of this data while preserving $totalVariance or $numFeatures. <br> |
74
|
|
|
* $data is an n-by-m matrix and returned array is |
75
|
|
|
* n-by-k matrix where k <= m |
76
|
|
|
*/ |
77
|
|
|
public function fit(array $data): array |
78
|
|
|
{ |
79
|
|
|
$numRows = count($data); |
80
|
|
|
$this->data = $data; |
81
|
|
|
|
82
|
|
|
if ($this->gamma === null) { |
83
|
|
|
$this->gamma = 1.0 / $numRows; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
$matrix = $this->calculateKernelMatrix($this->data, $numRows); |
87
|
|
|
$matrix = $this->centerMatrix($matrix, $numRows); |
88
|
|
|
|
89
|
|
|
$this->eigenDecomposition($matrix); |
90
|
|
|
|
91
|
|
|
$this->fit = true; |
92
|
|
|
|
93
|
|
|
return Matrix::transposeArray($this->eigVectors); |
94
|
|
|
} |
95
|
|
|
|
96
|
|
|
/** |
97
|
|
|
* Transforms the given sample to a lower dimensional vector by using |
98
|
|
|
* the variables obtained during the last run of <code>fit</code>. |
99
|
|
|
* |
100
|
|
|
* @throws \Exception |
101
|
|
|
*/ |
102
|
|
|
public function transform(array $sample): array |
103
|
|
|
{ |
104
|
|
|
if (!$this->fit) { |
105
|
|
|
throw new Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first'); |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
if (is_array($sample[0])) { |
109
|
|
|
throw new Exception('KernelPCA::transform() accepts only one-dimensional arrays'); |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
$pairs = $this->getDistancePairs($sample); |
113
|
|
|
|
114
|
|
|
return $this->projectSample($pairs); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
/** |
118
|
|
|
* Calculates similarity matrix by use of selected kernel function<br> |
119
|
|
|
* An n-by-m matrix is given and an n-by-n matrix is returned |
120
|
|
|
*/ |
121
|
|
|
protected function calculateKernelMatrix(array $data, int $numRows): array |
122
|
|
|
{ |
123
|
|
|
$kernelFunc = $this->getKernel(); |
124
|
|
|
|
125
|
|
|
$matrix = []; |
126
|
|
|
for ($i = 0; $i < $numRows; ++$i) { |
127
|
|
|
for ($k = 0; $k < $numRows; ++$k) { |
128
|
|
|
if ($i <= $k) { |
129
|
|
|
$matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]); |
130
|
|
|
} else { |
131
|
|
|
$matrix[$i][$k] = $matrix[$k][$i]; |
132
|
|
|
} |
133
|
|
|
} |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
return $matrix; |
137
|
|
|
} |
138
|
|
|
|
139
|
|
|
/** |
140
|
|
|
* Kernel matrix is centered in its original space by using the following |
141
|
|
|
* conversion: |
142
|
|
|
* |
143
|
|
|
* K′ = K − N.K − K.N + N.K.N where N is n-by-n matrix filled with 1/n |
144
|
|
|
*/ |
145
|
|
|
protected function centerMatrix(array $matrix, int $n): array |
146
|
|
|
{ |
147
|
|
|
$N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n)); |
148
|
|
|
$N = new Matrix($N, false); |
149
|
|
|
$K = new Matrix($matrix, false); |
150
|
|
|
|
151
|
|
|
// K.N (This term is repeated so we cache it once) |
152
|
|
|
$K_N = $K->multiply($N); |
|
|
|
|
153
|
|
|
// N.K |
154
|
|
|
$N_K = $N->multiply($K); |
|
|
|
|
155
|
|
|
// N.K.N |
156
|
|
|
$N_K_N = $N->multiply($K_N); |
157
|
|
|
|
158
|
|
|
return $K->subtract($N_K) |
159
|
|
|
->subtract($K_N) |
160
|
|
|
->add($N_K_N) |
161
|
|
|
->toArray(); |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
/** |
165
|
|
|
* Returns the callable kernel function |
166
|
|
|
* |
167
|
|
|
* @throws \Exception |
168
|
|
|
*/ |
169
|
|
|
protected function getKernel(): Closure |
170
|
|
|
{ |
171
|
|
|
switch ($this->kernel) { |
172
|
|
|
case self::KERNEL_LINEAR: |
173
|
|
|
// k(x,y) = xT.y |
174
|
|
|
return function ($x, $y) { |
175
|
|
|
return Matrix::dot($x, $y)[0]; |
176
|
|
|
}; |
177
|
|
|
case self::KERNEL_RBF: |
178
|
|
|
// k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance |
179
|
|
|
$dist = new Euclidean(); |
180
|
|
|
|
181
|
|
|
return function ($x, $y) use ($dist) { |
182
|
|
|
return exp(-$this->gamma * $dist->sqDistance($x, $y)); |
183
|
|
|
}; |
184
|
|
|
|
185
|
|
|
case self::KERNEL_SIGMOID: |
186
|
|
|
// k(x,y)=tanh(γ.xT.y+c0) where c0=1 |
187
|
|
|
return function ($x, $y) { |
188
|
|
|
$res = Matrix::dot($x, $y)[0] + 1.0; |
189
|
|
|
|
190
|
|
|
return tanh($this->gamma * $res); |
191
|
|
|
}; |
192
|
|
|
|
193
|
|
|
case self::KERNEL_LAPLACIAN: |
194
|
|
|
// k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance |
195
|
|
|
$dist = new Manhattan(); |
196
|
|
|
|
197
|
|
|
return function ($x, $y) use ($dist) { |
198
|
|
|
return exp(-$this->gamma * $dist->distance($x, $y)); |
199
|
|
|
}; |
200
|
|
|
|
201
|
|
|
default: |
202
|
|
|
throw new Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel)); |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
protected function getDistancePairs(array $sample): array |
207
|
|
|
{ |
208
|
|
|
$kernel = $this->getKernel(); |
209
|
|
|
|
210
|
|
|
$pairs = []; |
211
|
|
|
foreach ($this->data as $row) { |
212
|
|
|
$pairs[] = $kernel($row, $sample); |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
return $pairs; |
216
|
|
|
} |
217
|
|
|
|
218
|
|
|
protected function projectSample(array $pairs): array |
219
|
|
|
{ |
220
|
|
|
// Normalize eigenvectors by eig = eigVectors / eigValues |
221
|
|
|
$func = function ($eigVal, $eigVect) { |
222
|
|
|
$m = new Matrix($eigVect, false); |
223
|
|
|
$a = $m->divideByScalar($eigVal)->toArray(); |
224
|
|
|
|
225
|
|
|
return $a[0]; |
226
|
|
|
}; |
227
|
|
|
$eig = array_map($func, $this->eigValues, $this->eigVectors); |
228
|
|
|
|
229
|
|
|
// return k.dot(eig) |
230
|
|
|
return Matrix::dot($pairs, $eig); |
231
|
|
|
} |
232
|
|
|
} |
233
|
|
|
|
It seems like the type of the argument is not accepted by the function/method which you are calling.
In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.
We suggest to add an explicit type cast like in the following example: