Passed
Push — master ( ed30c1...df214b )
by Ruben
02:19
created

src/ML.php (1 issue)

1
<?php
2
/**
3
 *
4
 * (c) Ruben Dorado <[email protected]>
5
 *
6
 * For the full copyright and license information, please view the LICENSE
7
 * file that was distributed with this source code.
8
 */
9
namespace SiteAnalyzer;
10
11
use Exception;
12
13
/**
14
 * class ML
15
 *
16
 * @package   SiteAnalyzer
17
 * @author    Ruben Dorado <[email protected]>
18
 * @copyright 2018 Ruben Dorado
19
 * @license   http://www.opensource.org/licenses/MIT The MIT License
20
 */
21
class ML
22
{
23
    
24
    /*
25
     * @param
26
     */
27
    public static function kmeans($data, $nclusters)
28
    {
29
        $resp = [];
30
        $finished = false;
31
        $niter = 0;
32
        $maxiter = 100;
33
        $npoints = count($data);
34
        if ($npoints <= 0) throw new \Exception("Not enough data. ");    
35
        $ndimensions = count($data[0]);
36
        $centroids = [];
37
        for ($i=0;$i<$nclusters;$i++){
38
            $centroids[] = $data[$i];
39
        }
40
        print( SiteAnalyzer::transform($centroids, "html") );
41
        //$centroids = self::initCentroids($nclusters, $ndimensions, function(){return rand(0,100)/100;});   
42
        while (!$finished && $niter < $maxiter) {
43
            // Assign each one of the points to one centroid   
44
            $niter++;
45
            $nresp = [];
46
            for ($j = 0; $j < $npoints; $j++) {        
47
                $best = -1;
48
                $bdist = INF;
49
                for ($i = 0; $i < $nclusters; $i++) {
50
                    $ndist = self::eclideanDistance($data[$j], $centroids[$i]);
51
                    if($bdist > $ndist) {
52
                        $bdist = $ndist;
53
                        $best = $i;
54
                    }            
55
                }
56
                $nresp[] = $best;
57
                
58
            }
59
            print(SiteAnalyzer::transform([$nresp], "html"));
60
            // Check change 
61
            $finished = true;
62
            if (count($resp) > 0) {
63
                for ($j=0; $j < $npoints; $j++) {        
64
                    if ($resp[$j]!==$nresp[$j]) {
65
                        $finished = false;
66
                        break;
67
                    }
68
                }
69
            } else {
70
                $finished = false;
71
            }
72
            $resp = $nresp;
73
            // Recalculate the centroids
74
            $centroids = self::initCentroids($nclusters, $ndimensions, function(){return 0;});
75
            $counts = array_fill(0, $nclusters, 0);
76
            for ($j = 0; $j < $npoints; $j++) {    
77
                $centroids[$resp[$j]] = Matrix::sumArray($centroids[$resp[$j]], $data[$j]);
78
                $counts[$resp[$j]]++;            
79
            }
80
            $centroids = self::normalizeCentroids($centroids, $counts);
81
        }
82
        return [$resp];
83
    }
84
85
    
86
    /*
87
     * @param
88
     */
89
    public static function normalizeCentroids($centroids, $counts)
90
    {
91
        var_dump($counts);
0 ignored issues
show
Security Debugging Code introduced by
var_dump($counts) looks like debug code. Are you sure you do not want to remove it?
Loading history...
92
        $resp = [];
93
        $n = count($centroids);
94
        $d = count($centroids[0]);
95
        for ($i=0;$i<$n;$i++) {
96
            $tmp = [];
97
            for ($j=0;$j<$d;$j++){
98
                $tmp[] = $centroids[$i][$j]/$counts[$i];
99
            }
100
            $resp[] = $tmp;
101
        }
102
        return $resp;
103
    }
104
    
105
    /*
106
     * @param
107
     */
108
    public static function initCentroids($nclusters, $ndimensions, $fvalue) 
109
    {
110
        $resp = [];
111
        for ($i = 0; $i < $nclusters; $i++) {
112
            $centroid = [];
113
            for ($d = 0; $d < $ndimensions; $d++) {
114
                $centroid[] = $fvalue();
115
            }
116
            $resp[] = $centroid;
117
        }
118
        return $resp;
119
    }
120
121
    /*
122
     * @param
123
     */
124
    public static function eclideanDistance($p1, $p2) {
125
       $len = count($p1);
126
       $acum = 0;
127
       for($i=0; $i<$len; $i++) {
128
           $acum += ($p1[$i] - $p2[$i])**2;
129
       }
130
       return sqrt($acum);
131
    }
132
    
133
}
134