1 | <?php |
||
2 | /** |
||
3 | * |
||
4 | * (c) Ruben Dorado <[email protected]> |
||
5 | * |
||
6 | * For the full copyright and license information, please view the LICENSE |
||
7 | * file that was distributed with this source code. |
||
8 | */ |
||
9 | namespace SiteAnalyzer; |
||
10 | /** |
||
11 | * class CategoricalDataset |
||
12 | * |
||
13 | * @package SiteAnalyzer |
||
14 | * @author Ruben Dorado <[email protected]> |
||
15 | * @copyright 2018 Ruben Dorado |
||
16 | * @license http://www.opensource.org/licenses/MIT The MIT License |
||
17 | */ |
||
18 | class CategoricalDataset |
||
19 | { |
||
20 | |||
21 | /** |
||
22 | * @var array |
||
23 | */ |
||
24 | protected $data; |
||
25 | |||
26 | /** |
||
27 | * @var array |
||
28 | */ |
||
29 | protected $sortedEncodedFeatures; |
||
30 | |||
31 | /** |
||
32 | * @var array |
||
33 | */ |
||
34 | protected $encodedValues; |
||
35 | |||
36 | /** |
||
37 | * @var array |
||
38 | */ |
||
39 | protected $featEncode; |
||
40 | |||
41 | /** |
||
42 | * @var array |
||
43 | */ |
||
44 | protected $featIndexMap; |
||
45 | |||
46 | /* |
||
47 | * @param |
||
48 | */ |
||
49 | public function __construct($data) |
||
50 | { |
||
51 | $this->data = $data; |
||
52 | } |
||
53 | |||
54 | /* |
||
55 | * @param |
||
56 | */ |
||
57 | public function setEncodedFeatures($array) |
||
58 | { |
||
59 | sort($array); |
||
60 | $this->encodedValues = []; |
||
61 | $this->sortedEncodedFeatures = $array; |
||
62 | foreach($this->sortedEncodedFeatures as $col){ |
||
63 | $vals = $this->getUniqueValues($col); |
||
64 | $this->encodedValues[$col] = $vals; |
||
65 | $this->featIndexMap[$col] = count($vals); |
||
66 | $this->featEncode[$col] = $this->encodeFeature($vals); |
||
67 | //$this->featDecode[$col] = function($val, $arr){ return $this->getDecodedFeature($val, $arr, ); } |
||
68 | //$this->newEncodedSize += count($vals)-1; |
||
69 | } |
||
70 | |||
71 | /*for ($i=0;$i<$this->newEncodedSize:$i++) { |
||
72 | |||
73 | }*/ |
||
74 | } |
||
75 | |||
76 | /* |
||
77 | * @param |
||
78 | */ |
||
79 | private function getUniqueValues($col) |
||
80 | { |
||
81 | $tmp = Matrix::getColumn($this->data, $col); |
||
82 | $n = count($tmp); |
||
83 | $resp = []; |
||
84 | for ($i=0; $i<$n; $i++) { |
||
85 | if (!in_array($tmp[$i], $resp)) { |
||
86 | $resp[] = $tmp[$i]; |
||
87 | } |
||
88 | } |
||
89 | $resp = array_unique($resp); |
||
90 | return $resp; |
||
91 | } |
||
92 | |||
93 | /* |
||
94 | * @param |
||
95 | */ |
||
96 | private function encodeFeature($array) |
||
97 | { |
||
98 | $size = count($array); |
||
99 | $resp = []; |
||
100 | for ($i=0;$i<$size;$i++) { |
||
101 | $tmp = array_fill(0, $size, 0); |
||
102 | $tmp[$i] = 1; |
||
103 | $resp[$array[$i]] = $tmp; |
||
104 | } |
||
105 | return $resp; |
||
106 | } |
||
107 | |||
108 | /* |
||
109 | * @param |
||
110 | */ |
||
111 | public function encode(){ |
||
112 | $transformer = []; |
||
113 | $n = count($this->data); |
||
114 | $ndim = count($this->data[0]); |
||
115 | for ($j=0; $j<$ndim; $j++) { |
||
116 | $transformer[] = function($val){ return [$val]; }; |
||
117 | } |
||
118 | foreach($this->sortedEncodedFeatures as $col) { |
||
119 | $transformer[$col] = function ($val) use ($col) { return $this->featEncode[$col][$val]; }; |
||
120 | } |
||
121 | |||
122 | $ndata = []; |
||
123 | for ($i=0; $i<$n; $i++) { |
||
124 | $npoint = []; |
||
125 | for ($j=0; $j<$ndim; $j++) { |
||
126 | $npoint = array_merge($npoint, $transformer[$j]($this->data[$i][$j])); |
||
127 | } |
||
128 | $ndata[] = $npoint; |
||
129 | } |
||
130 | return $ndata; |
||
131 | } |
||
132 | |||
133 | /* |
||
134 | * @param |
||
135 | */ |
||
136 | function getLabelsAsArray() |
||
0 ignored issues
–
show
|
|||
137 | { |
||
138 | $resp = []; |
||
139 | $len = count($this->data[0]); |
||
140 | for ($i=0; $i<$len; $i++) { |
||
141 | if (isset($this->encodedValues[$i])) { |
||
142 | $resp = array_merge($resp, $this->encodedValues[$i]); |
||
143 | } else { |
||
144 | $resp[] = ""; |
||
145 | } |
||
146 | } |
||
147 | return $resp; |
||
148 | } |
||
149 | |||
150 | /* |
||
151 | * @param |
||
152 | * |
||
153 | function decode($ndata){ |
||
154 | $resp = []; |
||
155 | foreach ($ndata as $row) { |
||
156 | $resp[] = $this->decodeRow($row); |
||
157 | } |
||
158 | return $resp; |
||
159 | } |
||
160 | |||
161 | /* |
||
162 | * @param |
||
163 | * |
||
164 | function decodeRow($row){ |
||
165 | $resp = []; |
||
166 | $n = count($row); |
||
167 | for ($i=0; $i<$n; $i++) { |
||
168 | $resp[] = $this->decodeFeature($i, $row); |
||
169 | } |
||
170 | return $resp; |
||
171 | }*/ |
||
172 | |||
173 | } |
||
174 |
Adding explicit visibility (
private
,protected
, orpublic
) is generally recommend to communicate to other developers how, and from where this method is intended to be used.