Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/DimensionReduction/KernelPCA.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\DimensionReduction;
6
7
use Closure;
8
use Exception;
9
use Phpml\Math\Distance\Euclidean;
10
use Phpml\Math\Distance\Manhattan;
11
use Phpml\Math\Matrix;
12
13
class KernelPCA extends PCA
14
{
15
    public const KERNEL_RBF = 1;
16
17
    public const KERNEL_SIGMOID = 2;
18
19
    public const KERNEL_LAPLACIAN = 3;
20
21
    public const KERNEL_LINEAR = 4;
22
23
    /**
24
     * Selected kernel function
25
     *
26
     * @var int
27
     */
28
    protected $kernel;
29
30
    /**
31
     * Gamma value used by the kernel
32
     *
33
     * @var float|null
34
     */
35
    protected $gamma;
36
37
    /**
38
     * Original dataset used to fit KernelPCA
39
     *
40
     * @var array
41
     */
42
    protected $data = [];
43
44
    /**
45
     * Kernel principal component analysis (KernelPCA) is an extension of PCA using
46
     * techniques of kernel methods. It is more suitable for data that involves
47
     * vectors that are not linearly separable<br><br>
48
     * Example: <b>$kpca = new KernelPCA(KernelPCA::KERNEL_RBF, null, 2, 15.0);</b>
49
     * will initialize the algorithm with an RBF kernel having the gamma parameter as 15,0. <br>
50
     * This transformation will return the same number of rows with only <i>2</i> columns.
51
     *
52
     * @param float $totalVariance Total variance to be preserved if numFeatures is not given
53
     * @param int   $numFeatures   Number of columns to be returned
54
     * @param float $gamma         Gamma parameter is used with RBF and Sigmoid kernels
55
     *
56
     * @throws \Exception
57
     */
58
    public function __construct(int $kernel = self::KERNEL_RBF, ?float $totalVariance = null, ?int $numFeatures = null, ?float $gamma = null)
59
    {
60
        $availableKernels = [self::KERNEL_RBF, self::KERNEL_SIGMOID, self::KERNEL_LAPLACIAN, self::KERNEL_LINEAR];
61
        if (!in_array($kernel, $availableKernels)) {
62
            throw new Exception('KernelPCA can be initialized with the following kernels only: Linear, RBF, Sigmoid and Laplacian');
63
        }
64
65
        parent::__construct($totalVariance, $numFeatures);
66
67
        $this->kernel = $kernel;
68
        $this->gamma = $gamma;
69
    }
70
71
    /**
72
     * Takes a data and returns a lower dimensional version
73
     * of this data while preserving $totalVariance or $numFeatures. <br>
74
     * $data is an n-by-m matrix and returned array is
75
     * n-by-k matrix where k <= m
76
     */
77
    public function fit(array $data): array
78
    {
79
        $numRows = count($data);
80
        $this->data = $data;
81
82
        if ($this->gamma === null) {
83
            $this->gamma = 1.0 / $numRows;
84
        }
85
86
        $matrix = $this->calculateKernelMatrix($this->data, $numRows);
87
        $matrix = $this->centerMatrix($matrix, $numRows);
88
89
        $this->eigenDecomposition($matrix);
90
91
        $this->fit = true;
92
93
        return Matrix::transposeArray($this->eigVectors);
94
    }
95
96
    /**
97
     * Transforms the given sample to a lower dimensional vector by using
98
     * the variables obtained during the last run of <code>fit</code>.
99
     *
100
     * @throws \Exception
101
     */
102
    public function transform(array $sample): array
103
    {
104
        if (!$this->fit) {
105
            throw new Exception('KernelPCA has not been fitted with respect to original dataset, please run KernelPCA::fit() first');
106
        }
107
108
        if (is_array($sample[0])) {
109
            throw new Exception('KernelPCA::transform() accepts only one-dimensional arrays');
110
        }
111
112
        $pairs = $this->getDistancePairs($sample);
113
114
        return $this->projectSample($pairs);
115
    }
116
117
    /**
118
     * Calculates similarity matrix by use of selected kernel function<br>
119
     * An n-by-m matrix is given and an n-by-n matrix is returned
120
     */
121
    protected function calculateKernelMatrix(array $data, int $numRows): array
122
    {
123
        $kernelFunc = $this->getKernel();
124
125
        $matrix = [];
126
        for ($i = 0; $i < $numRows; ++$i) {
127
            for ($k = 0; $k < $numRows; ++$k) {
128
                if ($i <= $k) {
129
                    $matrix[$i][$k] = $kernelFunc($data[$i], $data[$k]);
130
                } else {
131
                    $matrix[$i][$k] = $matrix[$k][$i];
132
                }
133
            }
134
        }
135
136
        return $matrix;
137
    }
138
139
    /**
140
     * Kernel matrix is centered in its original space by using the following
141
     * conversion:
142
     *
143
     * K′ = K − N.K −  K.N + N.K.N where N is n-by-n matrix filled with 1/n
144
     */
145
    protected function centerMatrix(array $matrix, int $n): array
146
    {
147
        $N = array_fill(0, $n, array_fill(0, $n, 1.0 / $n));
148
        $N = new Matrix($N, false);
149
        $K = new Matrix($matrix, false);
150
151
        // K.N (This term is repeated so we cache it once)
152
        $K_N = $K->multiply($N);
0 ignored issues
show
$N is of type object<Phpml\Math\Matrix>, but the function expects a object<self>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
153
        // N.K
154
        $N_K = $N->multiply($K);
0 ignored issues
show
$K is of type object<Phpml\Math\Matrix>, but the function expects a object<self>.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
155
        // N.K.N
156
        $N_K_N = $N->multiply($K_N);
157
158
        return $K->subtract($N_K)
159
            ->subtract($K_N)
160
            ->add($N_K_N)
161
            ->toArray();
162
    }
163
164
    /**
165
     * Returns the callable kernel function
166
     *
167
     * @throws \Exception
168
     */
169
    protected function getKernel(): Closure
170
    {
171
        switch ($this->kernel) {
172
            case self::KERNEL_LINEAR:
173
                // k(x,y) = xT.y
174
                return function ($x, $y) {
175
                    return Matrix::dot($x, $y)[0];
176
                };
177
            case self::KERNEL_RBF:
178
                // k(x,y)=exp(-γ.|x-y|) where |..| is Euclidean distance
179
                $dist = new Euclidean();
180
181
                return function ($x, $y) use ($dist) {
182
                    return exp(-$this->gamma * $dist->sqDistance($x, $y));
183
                };
184
185
            case self::KERNEL_SIGMOID:
186
                // k(x,y)=tanh(γ.xT.y+c0) where c0=1
187
                return function ($x, $y) {
188
                    $res = Matrix::dot($x, $y)[0] + 1.0;
189
190
                    return tanh($this->gamma * $res);
191
                };
192
193
            case self::KERNEL_LAPLACIAN:
194
                // k(x,y)=exp(-γ.|x-y|) where |..| is Manhattan distance
195
                $dist = new Manhattan();
196
197
                return function ($x, $y) use ($dist) {
198
                    return exp(-$this->gamma * $dist->distance($x, $y));
199
                };
200
201
            default:
202
                throw new Exception(sprintf('KernelPCA initialized with invalid kernel: %d', $this->kernel));
203
        }
204
    }
205
206
    protected function getDistancePairs(array $sample): array
207
    {
208
        $kernel = $this->getKernel();
209
210
        $pairs = [];
211
        foreach ($this->data as $row) {
212
            $pairs[] = $kernel($row, $sample);
213
        }
214
215
        return $pairs;
216
    }
217
218
    protected function projectSample(array $pairs): array
219
    {
220
        // Normalize eigenvectors by eig = eigVectors / eigValues
221
        $func = function ($eigVal, $eigVect) {
222
            $m = new Matrix($eigVect, false);
223
            $a = $m->divideByScalar($eigVal)->toArray();
224
225
            return $a[0];
226
        };
227
        $eig = array_map($func, $this->eigValues, $this->eigVectors);
228
229
        // return k.dot(eig)
230
        return Matrix::dot($pairs, $eig);
231
    }
232
}
233