Passed
Push — master ( c02f08...0839b6 )
by Ruben
02:19
created

src/CategoricalDataset.php (2 issues)

1
<?php
2
/**
3
 *
4
 * (c) Ruben Dorado <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
namespace SiteAnalyzer;
10
/**
11
 * class CategoricalDataset
12
 *
13
 * @package   SiteAnalyzer
14
 * @author    Ruben Dorado <[email protected]>
15
 * @copyright 2018 Ruben Dorado
16
 * @license   http://www.opensource.org/licenses/MIT The MIT License
17
 */
18
class CategoricalDataset
19
{
20
 
21
    /**
22
     * @var array
23
     */
24
    protected $data;    
25
    
26
    /**
27
     * @var array
28
     */
29
    protected $sortedEncodedFeatures;        
30
31
    /**
32
     * @var array
33
     */
34
    protected $encodedValues;      
35
   
36
    /**
37
     * @var array
38
     */
39
    protected $featEncode;
40
    
41
    /**
42
     * @var array
43
     */
44
    protected $featIndexMap;
45
    
46
    /*
47
     * @param
48
     */        
49
    public function __construct($data) 
50
    {
51
        $this->data = $data;
52
    }
53
    
54
    /*
55
     * @param
56
     */    
57
    public function setEncodedFeatures($array) 
58
    {
59
        sort($array);
60
        $this->encodedValues = [];
61
        $this->sortedEncodedFeatures = $array;
62
        foreach($this->sortedEncodedFeatures as $col){
63
            $vals = $this->getUniqueValues($col);
64
            $this->encodedValues[$col] = $vals;
65
            $this->featIndexMap[$col] = count($vals);
66
            $this->featEncode[$col] = $this->encodeFeature($vals);
67
            //$this->featDecode[$col] = function($val, $arr){ return $this->getDecodedFeature($val, $arr, ); }
68
            //$this->newEncodedSize += count($vals)-1;
69
        }
70
        
71
        /*for ($i=0;$i<$this->newEncodedSize:$i++) {
72
            
73
        }*/
74
    }   
75
    
76
    /*
77
     * @param
78
     */
79
    private function getUniqueValues($col) 
80
    {
81
        $tmp = Matrix::getColumn($this->data, $col);
82
        $n = count($tmp);
83
        $resp = [];
84
        for ($i=0; $i<$n; $i++) {
85
            if (!in_array($tmp[$i], $resp)) {
86
                $resp[] = $tmp[$i];
87
            }
88
        }
89
        $resp = array_unique($resp);
90
        return $resp;
91
    }
92
    
93
    /*
94
     * @param
95
     */
96
    private function encodeFeature($array) 
97
    {
98
        $size = count($array);
99
        $resp = [];
100
        for ($i=0;$i<$size;$i++) {
101
            $tmp = array_fill(0, $size, 0);
102
            $tmp[$i] = 1;  
103
            $resp[$array[$i]] = $tmp;
104
        }
105
        return $resp;
106
    }
107
    
108
    /*
109
     * @param
110
     */  
111
    public function encode(){
112
        $transformer  = [];
113
        $n = count($this->data);
114
        $ndim = count($this->data[0]);
115
        for ($j=0; $j<$ndim; $j++) {
116
            $transformer[] = function($val){ return [$val]; };
117
        }
118
        foreach($this->sortedEncodedFeatures as $col) {
119
            $transformer[$col] = function ($val) use ($col) { return $this->featEncode[$col][$val]; };
120
        }
121
        
122
        $ndata = [];
123
        for ($i=0; $i<$n; $i++) {
124
            $npoint = [];
125
            for ($j=0; $j<$ndim; $j++) {
126
                $npoint = array_merge($npoint, $transformer[$j]($this->data[$i][$j]));
127
            }            
128
            $ndata[] = $npoint;
129
        }
130
        return $ndata;
131
    }
132
 
133
    /*
134
     * @param
135
     */
136
    function getLabelsAsArray()
0 ignored issues
show
It is generally recommended to explicitly declare the visibility for methods.

Adding explicit visibility (private, protected, or public) is generally recommend to communicate to other developers how, and from where this method is intended to be used.

Loading history...
137
    {
138
        $resp = [];
139
        $len = count($data[0]);
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $data seems to be never defined.
Loading history...
140
        for ($i=0; $i<$len; $i++) {
141
            if (isset($this->encodedValues[$i])) {
142
                $resp = array_merge($resp, $this->encodedValues[$i]);
143
            } else {
144
                $resp[] = "";
145
            }
146
        }
147
        return $resp;
148
    }
149
 
150
    /*
151
     * @param
152
     *
153
    function decode($ndata){
154
        $resp = [];
155
        foreach ($ndata as $row) {             
156
            $resp[] = $this->decodeRow($row);
157
        }
158
        return $resp;
159
    }
160
161
    /*
162
     * @param
163
     *     
164
    function decodeRow($row){
165
        $resp = [];
166
        $n = count($row);
167
        for ($i=0; $i<$n; $i++) {
168
            $resp[] = $this->decodeFeature($i, $row);
169
        }
170
        return $resp;
171
    }*/
172
    
173
}
174