Completed
Push — 0.7.dev ( e45971...bb7559 )
by Andrei
54s
created

ema_visualizer.__draw_ellipses()   A

Complexity

Conditions 3

Size

Total Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 13
rs 9.4285
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
4
@details Implementation based on article:
5
         - 
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2017
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
    
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
    
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
import numpy;
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
31
from pyclustering.cluster import cluster_visualizer;
32
from pyclustering.utils import pi, calculate_ellipse_description;
33
34
import matplotlib.pyplot as plt;
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
35
from matplotlib import patches;
0 ignored issues
show
Configuration introduced by
The import matplotlib could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
36
37
38
39
def gaussian(data, mean, covariance):
40
    dimension = float(len(data[0]));
41
 
42
    if (dimension != 1.0):
43
        inv_variance = numpy.linalg.pinv(covariance);
44
    else:
45
        inv_variance = 1.0 / covariance;
46
    
47
    divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance));
48
    right_const = 1.0 / divider;
49
     
50
    result = [];
51
     
52
    for point in data:
53
        mean_delta = point - mean;
54
        point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
55
        result.append(point_gaussian);
56
     
57
    return result;
58
59
60
61
class ema_observer:
62
    def __init__(self):
63
        self.__means_evolution = [];
64
        self.__covariances_evolution = [];
65
        self.__clusters_evolution = [];
66
67
68
    def get_iterations(self):
69
        return len(self.__means);
0 ignored issues
show
Bug introduced by
The Instance of ema_observer does not seem to have a member named __means.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
70
71
72
    def get_means(self):
73
        return self.__means_evolution;
74
75
76
    def get_covariances(self):
77
        return self.__covariances_evolution;
78
79
80
    def notify(self, means, covariances, clusters):
81
        self.__means_evolution.append(means);
82
        self.__covariances_evolution.append(covariances);
83
        self.__clusters_evolution.append(clusters);
84
85
86
87
class ema_visualizer:
88
    @staticmethod
89
    def show_clusters(clusters, sample, covariances, means, display = True):
90
        visualizer = cluster_visualizer();
91
        visualizer.append_clusters(clusters, sample);
92
        figure = visualizer.show(display = False);
93
        
94
        if (len(sample[0]) == 2):
95
            ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means);
96
97
        if (display is True): 
98
            plt.show();
99
100
        return figure;
101
102
103
    @staticmethod
104
    def __draw_ellipses(figure, visualizer, clusters, covariances, means):
105
        print(len(clusters));
106
        print([len(cluster) for cluster in clusters]);
107
        print(clusters);
108
        
109
        ax = figure.get_axes()[0];
110
        
111
        for index in range(len(clusters)):
112
            angle, width, height = calculate_ellipse_description(covariances[index]);
113
            color = visualizer.get_cluster_color(index, 0);
114
            
115
            ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color);
116
117
118
    @staticmethod
119
    def __draw_ellipse(ax, x, y, angle, width, height, color):
120
        ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=angle, linewidth=2, fill=True, zorder=2, color=color);
121
        ax.add_patch(ellipse);
122
123
124
class ema:
125
    def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001):
126
        self.__data = numpy.array(data);
127
        self.__amount_clusters = amount_clusters;
128
        self.__tolerance = tolerance;
129
        self.__observer = observer;
130
        
131
        self.__means = means;
132
        if (means is None):
133
            self.__means = self.__get_random_means(data, amount_clusters);
134
135
        self.__variances = variances;
136
        if (variances is None):
137
            self.__variances = self.__get_random_covariances(data, amount_clusters);
138
        
139
        self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
140
        self.__pic = [1.0] * amount_clusters;
141
        self.__clusters = [];
142
        self.__gaussians = [ [] for _ in range(amount_clusters) ];
143
        self.__stop = False;
144
145
146
    def process(self):
147
        self.__clusters = None;
148
        
149
        previous_likelihood = -200000;
150
        current_likelihood = -100000;
151
        
152
        while( (self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) ):
153
            self.__expectation_step();
154
            self.__maximization_step();
155
            
156
            previous_likelihood = current_likelihood;
157
            current_likelihood = self.__log_likelihood();
158
            self.__stop = self.__get_stop_condition();
159
        
160
        self.__clusters = self.__extract_clusters();
161
162
163
    def get_clusters(self):
164
        return self.__clusters;
165
166
167
    def get_centers(self):
168
        return self.__means;
169
170
171
    def get_covariances(self):
172
        return self.__variances;
173
174
175
    def __notify(self):
176
        if (self.__observer is not None):
177
            clusters = self.__extract_clusters();
178
            self.__notify(self.__means, self.__variances, clusters);
0 ignored issues
show
Bug introduced by
There seem to be too many positional arguments for this method call.
Loading history...
179
180
181
    def __extract_clusters(self):
182
        clusters = [ [] for _ in range(self.__amount_clusters) ];
183
        for index_point in range(len(self.__data)):
184
            candidates = [];
185
            for index_cluster in range(self.__amount_clusters):
186
                candidates.append((index_cluster, self.__rc[index_cluster][index_point]));
187
            
188
            index_winner = max(candidates, key = lambda candidate : candidate[1])[0];
189
            clusters[index_winner].append(index_point);
190
        
191
        clusters = [ cluster for cluster in clusters if len(cluster) > 0 ];
192
        return clusters;
193
194
195
    def __log_likelihood(self):
196
        likelihood = 0.0;
197
        
198
        for index_point in range(len(self.__data)):
199
            particle = 0.0;
200
            for index_cluster in range(self.__amount_clusters):
201
                particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
202
            
203
            likelihood += numpy.log(particle);
204
        
205
        return likelihood;
206
207
208
    def __probabilities(self, index_cluster, index_point):
209
        divider = 0.0;
210
        for i in range(self.__amount_clusters):
211
            divider += self.__pic[i] * self.__gaussians[i][index_point];
212
        
213
        rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
214
        return rc;
215
216
217
    def __expectation_step(self):
218
        for index in range(self.__amount_clusters):
219
            self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
220
        
221
        for index_cluster in range(self.__amount_clusters):
222
            for index_point in range(len(self.__data)):
223
                self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
224
225
226
    def __maximization_step(self):
227
        self.__pic = [];
228
        self.__means = [];
229
        self.__variances = [];
230
        
231
        amount_impossible_clusters = 0;
232
        
233
        for index_cluster in range(self.__amount_clusters):
234
            mc = numpy.sum(self.__rc[index_cluster]);
235
            
236
            if (mc == 0.0):
237
                amount_impossible_clusters += 1;
238
                continue;
239
            
240
            self.__pic.append( mc / len(self.__data) );
241
            self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) );
242
            self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) );
243
        
244
        self.__amount_clusters -= amount_impossible_clusters;
245
246
247
    def __get_stop_condition(self):
248
        for covariance in self.__variances:
249
            if (numpy.linalg.norm(covariance) == 0.0):
250
                return True;
251
        
252
        return False;
253
254
255
    def __update_covariance(self, means, rc, mc):
256
        covariance = 0.0;
257
        for index_point in range(len(self.__data)):
258
            deviation = numpy.array( [ self.__data[index_point] - means ]);
259
            covariance += rc[index_point] * deviation.T.dot(deviation);
260
        
261
        covariance = covariance / mc;
262
        return covariance;
263
264
265
    def __update_mean(self, rc, mc):
266
        mean = 0.0;
267
        for index_point in range(len(self.__data)):
268
            mean += rc[index_point] * self.__data[index_point];
269
        
270
        mean = mean / mc;
271
        return mean;
272
273
274
    def __get_random_covariances(self, data, amount):
275
        covariances = [];
276
        covariance_appendixes = [];
277
        data_covariance = numpy.cov(data, rowvar = False);
278
        for _ in range(amount):
279
            random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
280
            while(random_appendix in covariance_appendixes):
281
                random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
282
            
283
            covariance_appendixes.append(random_appendix)
284
            covariances.append(data_covariance - random_appendix);
285
         
286
        return covariances;
287
288
289
    def __get_random_means(self, data, amount):
290
        means = [];
291
        mean_indexes = [];
292
        for _ in range(amount):
293
            random_index = numpy.random.randint(0, len(data));
294
            while(random_index in mean_indexes):
295
                random_index = numpy.random.randint(0, len(data));
296
            
297
            mean_indexes.append(random_index);
298
            means.append(numpy.array(data[random_index]));
299
        
300
        return means;