1 | <?php |
||
11 | class PCA extends EigenTransformerBase |
||
12 | { |
||
13 | /** |
||
14 | * Temporary storage for mean values for each dimension in given data |
||
15 | * |
||
16 | * @var array |
||
17 | */ |
||
18 | protected $means = []; |
||
19 | |||
20 | /** |
||
21 | * @var bool |
||
22 | */ |
||
23 | protected $fit = false; |
||
24 | |||
25 | /** |
||
26 | * PCA (Principal Component Analysis) used to explain given |
||
27 | * data with lower number of dimensions. This analysis transforms the |
||
28 | * data to a lower dimensional version of it by conserving a proportion of total variance |
||
29 | * within the data. It is a lossy data compression technique.<br> |
||
30 | * |
||
31 | * @param float $totalVariance Total explained variance to be preserved |
||
32 | * @param int $numFeatures Number of features to be preserved |
||
33 | * |
||
34 | * @throws \Exception |
||
35 | */ |
||
36 | public function __construct(?float $totalVariance = null, ?int $numFeatures = null) |
||
37 | { |
||
38 | if ($totalVariance !== null && ($totalVariance < 0.1 || $totalVariance > 0.99)) { |
||
39 | throw new Exception('Total variance can be a value between 0.1 and 0.99'); |
||
40 | } |
||
41 | |||
42 | if ($numFeatures !== null && $numFeatures <= 0) { |
||
43 | throw new Exception('Number of features to be preserved should be greater than 0'); |
||
44 | } |
||
45 | |||
46 | if ($totalVariance !== null && $numFeatures !== null) { |
||
47 | throw new Exception('Either totalVariance or numFeatures should be specified in order to run the algorithm'); |
||
48 | } |
||
49 | |||
50 | if ($numFeatures !== null) { |
||
51 | $this->numFeatures = $numFeatures; |
||
52 | } |
||
53 | |||
54 | if ($totalVariance !== null) { |
||
55 | $this->totalVariance = $totalVariance; |
||
56 | } |
||
57 | } |
||
58 | |||
59 | /** |
||
60 | * Takes a data and returns a lower dimensional version |
||
61 | * of this data while preserving $totalVariance or $numFeatures. <br> |
||
62 | * $data is an n-by-m matrix and returned array is |
||
63 | * n-by-k matrix where k <= m |
||
64 | */ |
||
65 | public function fit(array $data): array |
||
79 | |||
80 | /** |
||
81 | * Transforms the given sample to a lower dimensional vector by using |
||
82 | * the eigenVectors obtained in the last run of <code>fit</code>. |
||
83 | * |
||
84 | * @throws \Exception |
||
85 | */ |
||
86 | public function transform(array $sample): array |
||
100 | |||
101 | protected function calculateMeans(array $data, int $n): void |
||
110 | |||
111 | /** |
||
112 | * Normalization of the data includes subtracting mean from |
||
113 | * each dimension therefore dimensions will be centered to zero |
||
114 | */ |
||
115 | protected function normalize(array $data, int $n): array |
||
130 | } |
||
131 |