1 | <?php |
||
2 | /** |
||
3 | * |
||
4 | * (c) Ruben Dorado <[email protected]> |
||
5 | * |
||
6 | * For the full copyright and license information, please view the LICENSE |
||
7 | * file that was distributed with this source code. |
||
8 | */ |
||
9 | namespace SiteAnalyzer; |
||
10 | |||
11 | use Exception; |
||
12 | |||
13 | /** |
||
14 | * class ML |
||
15 | * |
||
16 | * @package SiteAnalyzer |
||
17 | * @author Ruben Dorado <[email protected]> |
||
18 | * @copyright 2018 Ruben Dorado |
||
19 | * @license http://www.opensource.org/licenses/MIT The MIT License |
||
20 | */ |
||
21 | class ML |
||
22 | { |
||
23 | |||
24 | /* |
||
25 | * @param |
||
26 | */ |
||
27 | public static function kmeans($data, $nclusters) |
||
28 | { |
||
29 | $resp = []; |
||
30 | $finished = false; |
||
31 | $niter = 0; |
||
32 | $maxiter = 100; |
||
33 | $npoints = count($data); |
||
34 | if ($npoints <= 0) throw new \Exception("Not enough data. "); |
||
35 | $ndimensions = count($data[0]); |
||
36 | $centroids = []; |
||
37 | for ($i=0;$i<$nclusters;$i++){ |
||
38 | $centroids[] = $data[$i]; |
||
39 | } |
||
40 | print( SiteAnalyzer::transform($centroids, "html") ); |
||
41 | //$centroids = self::initCentroids($nclusters, $ndimensions, function(){return rand(0,100)/100;}); |
||
42 | while (!$finished && $niter < $maxiter) { |
||
43 | // Assign each one of the points to one centroid |
||
44 | $niter++; |
||
45 | $nresp = []; |
||
46 | for ($j = 0; $j < $npoints; $j++) { |
||
47 | $best = -1; |
||
48 | $bdist = INF; |
||
49 | for ($i = 0; $i < $nclusters; $i++) { |
||
50 | $ndist = self::eclideanDistance($data[$j], $centroids[$i]); |
||
51 | if($bdist > $ndist) { |
||
52 | $bdist = $ndist; |
||
53 | $best = $i; |
||
54 | } |
||
55 | } |
||
56 | $nresp[] = $best; |
||
57 | |||
58 | } |
||
59 | print(SiteAnalyzer::transform([$nresp], "html")); |
||
60 | // Check change |
||
61 | $finished = true; |
||
62 | if (count($resp) > 0) { |
||
63 | for ($j=0; $j < $npoints; $j++) { |
||
64 | if ($resp[$j]!==$nresp[$j]) { |
||
65 | $finished = false; |
||
66 | break; |
||
67 | } |
||
68 | } |
||
69 | } else { |
||
70 | $finished = false; |
||
71 | } |
||
72 | $resp = $nresp; |
||
73 | // Recalculate the centroids |
||
74 | $centroids = self::initCentroids($nclusters, $ndimensions, function(){return 0;}); |
||
75 | $counts = array_fill(0, $nclusters, 0); |
||
76 | for ($j = 0; $j < $npoints; $j++) { |
||
77 | $centroids[$resp[$j]] = Matrix::sumArray($centroids[$resp[$j]], $data[$j]); |
||
78 | $counts[$resp[$j]]++; |
||
79 | } |
||
80 | $centroids = self::normalizeCentroids($centroids, $counts); |
||
81 | } |
||
82 | return [$resp]; |
||
83 | } |
||
84 | |||
85 | |||
86 | /* |
||
87 | * @param |
||
88 | */ |
||
89 | public static function normalizeCentroids($centroids, $counts) |
||
90 | { |
||
91 | var_dump($counts); |
||
0 ignored issues
–
show
Security
Debugging Code
introduced
by
Loading history...
|
|||
92 | $resp = []; |
||
93 | $n = count($centroids); |
||
94 | $d = count($centroids[0]); |
||
95 | for ($i=0;$i<$n;$i++) { |
||
96 | $tmp = []; |
||
97 | for ($j=0;$j<$d;$j++){ |
||
98 | $tmp[] = $centroids[$i][$j]/$counts[$i]; |
||
99 | } |
||
100 | $resp[] = $tmp; |
||
101 | } |
||
102 | return $resp; |
||
103 | } |
||
104 | |||
105 | /* |
||
106 | * @param |
||
107 | */ |
||
108 | public static function initCentroids($nclusters, $ndimensions, $fvalue) |
||
109 | { |
||
110 | $resp = []; |
||
111 | for ($i = 0; $i < $nclusters; $i++) { |
||
112 | $centroid = []; |
||
113 | for ($d = 0; $d < $ndimensions; $d++) { |
||
114 | $centroid[] = $fvalue(); |
||
115 | } |
||
116 | $resp[] = $centroid; |
||
117 | } |
||
118 | return $resp; |
||
119 | } |
||
120 | |||
121 | /* |
||
122 | * @param |
||
123 | */ |
||
124 | public static function eclideanDistance($p1, $p2) { |
||
125 | $len = count($p1); |
||
126 | $acum = 0; |
||
127 | for($i=0; $i<$len; $i++) { |
||
128 | $acum += ($p1[$i] - $p2[$i])**2; |
||
129 | } |
||
130 | return sqrt($acum); |
||
131 | } |
||
132 | |||
133 | } |
||
134 |