ema_visualizer.__draw_ellipses() - Code Metrics - Inspection of "#16: Corrections for EMA issues." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( e45971...bb7559 )

by Andrei

created 2017-09-16 22:43 UTC

ema_visualizer.__draw_ellipses() A

↳ Parent: ema_visualizer

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	13
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
@details Implementation based on article:
         - 

@authors Andrei Novikov ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pyclustering.cluster import cluster_visualizer;
from pyclustering.utils import pi, calculate_ellipse_description;

import matplotlib.pyplot as plt;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from matplotlib import patches;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3



def gaussian(data, mean, covariance):
    dimension = float(len(data[0]));
 
    if (dimension != 1.0):
        inv_variance = numpy.linalg.pinv(covariance);
    else:
        inv_variance = 1.0 / covariance;
    
    divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance));
    right_const = 1.0 / divider;
     
    result = [];
     
    for point in data:
        mean_delta = point - mean;
        point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
        result.append(point_gaussian);
     
    return result;



class ema_observer:
    def __init__(self):
        self.__means_evolution = [];
        self.__covariances_evolution = [];
        self.__clusters_evolution = [];


    def get_iterations(self):
        return len(self.__means);



    def get_means(self):
        return self.__means_evolution;


    def get_covariances(self):
        return self.__covariances_evolution;


    def notify(self, means, covariances, clusters):
        self.__means_evolution.append(means);
        self.__covariances_evolution.append(covariances);
        self.__clusters_evolution.append(clusters);



class ema_visualizer:
    @staticmethod
    def show_clusters(clusters, sample, covariances, means, display = True):
        visualizer = cluster_visualizer();
        visualizer.append_clusters(clusters, sample);
        figure = visualizer.show(display = False);
        
        if (len(sample[0]) == 2):
            ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means);

        if (display is True): 
            plt.show();

        return figure;


    @staticmethod
    def __draw_ellipses(figure, visualizer, clusters, covariances, means):
        print(len(clusters));
        print([len(cluster) for cluster in clusters]);
        print(clusters);
        
        ax = figure.get_axes()[0];
        
        for index in range(len(clusters)):
            angle, width, height = calculate_ellipse_description(covariances[index]);
            color = visualizer.get_cluster_color(index, 0);
            
            ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color);


    @staticmethod
    def __draw_ellipse(ax, x, y, angle, width, height, color):
        ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=angle, linewidth=2, fill=True, zorder=2, color=color);
        ax.add_patch(ellipse);


class ema:
    def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001):
        self.__data = numpy.array(data);
        self.__amount_clusters = amount_clusters;
        self.__tolerance = tolerance;
        self.__observer = observer;
        
        self.__means = means;
        if (means is None):
            self.__means = self.__get_random_means(data, amount_clusters);

        self.__variances = variances;
        if (variances is None):
            self.__variances = self.__get_random_covariances(data, amount_clusters);
        
        self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
        self.__pic = [1.0] * amount_clusters;
        self.__clusters = [];
        self.__gaussians = [ [] for _ in range(amount_clusters) ];
        self.__stop = False;


    def process(self):
        self.__clusters = None;
        
        previous_likelihood = -200000;
        current_likelihood = -100000;
        
        while( (self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) ):
            self.__expectation_step();
            self.__maximization_step();
            
            previous_likelihood = current_likelihood;
            current_likelihood = self.__log_likelihood();
            self.__stop = self.__get_stop_condition();
        
        self.__clusters = self.__extract_clusters();


    def get_clusters(self):
        return self.__clusters;


    def get_centers(self):
        return self.__means;


    def get_covariances(self):
        return self.__variances;


    def __notify(self):
        if (self.__observer is not None):
            clusters = self.__extract_clusters();
            self.__notify(self.__means, self.__variances, clusters);



    def __extract_clusters(self):
        clusters = [ [] for _ in range(self.__amount_clusters) ];
        for index_point in range(len(self.__data)):
            candidates = [];
            for index_cluster in range(self.__amount_clusters):
                candidates.append((index_cluster, self.__rc[index_cluster][index_point]));
            
            index_winner = max(candidates, key = lambda candidate : candidate[1])[0];
            clusters[index_winner].append(index_point);
        
        clusters = [ cluster for cluster in clusters if len(cluster) > 0 ];
        return clusters;


    def __log_likelihood(self):
        likelihood = 0.0;
        
        for index_point in range(len(self.__data)):
            particle = 0.0;
            for index_cluster in range(self.__amount_clusters):
                particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
            
            likelihood += numpy.log(particle);
        
        return likelihood;


    def __probabilities(self, index_cluster, index_point):
        divider = 0.0;
        for i in range(self.__amount_clusters):
            divider += self.__pic[i] * self.__gaussians[i][index_point];
        
        rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
        return rc;


    def __expectation_step(self):
        for index in range(self.__amount_clusters):
            self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
        
        for index_cluster in range(self.__amount_clusters):
            for index_point in range(len(self.__data)):
                self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);


    def __maximization_step(self):
        self.__pic = [];
        self.__means = [];
        self.__variances = [];
        
        amount_impossible_clusters = 0;
        
        for index_cluster in range(self.__amount_clusters):
            mc = numpy.sum(self.__rc[index_cluster]);
            
            if (mc == 0.0):
                amount_impossible_clusters += 1;
                continue;
            
            self.__pic.append( mc / len(self.__data) );
            self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) );
            self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) );
        
        self.__amount_clusters -= amount_impossible_clusters;


    def __get_stop_condition(self):
        for covariance in self.__variances:
            if (numpy.linalg.norm(covariance) == 0.0):
                return True;
        
        return False;


    def __update_covariance(self, means, rc, mc):
        covariance = 0.0;
        for index_point in range(len(self.__data)):
            deviation = numpy.array( [ self.__data[index_point] - means ]);
            covariance += rc[index_point] * deviation.T.dot(deviation);
        
        covariance = covariance / mc;
        return covariance;


    def __update_mean(self, rc, mc):
        mean = 0.0;
        for index_point in range(len(self.__data)):
            mean += rc[index_point] * self.__data[index_point];
        
        mean = mean / mc;
        return mean;


    def __get_random_covariances(self, data, amount):
        covariances = [];
        covariance_appendixes = [];
        data_covariance = numpy.cov(data, rowvar = False);
        for _ in range(amount):
            random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
            while(random_appendix in covariance_appendixes):
                random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
            
            covariance_appendixes.append(random_appendix)
            covariances.append(data_covariance - random_appendix);
         
        return covariances;


    def __get_random_means(self, data, amount):
        means = [];
        mean_indexes = [];
        for _ in range(amount):
            random_index = numpy.random.randint(0, len(data));
            while(random_index in mean_indexes):
                random_index = numpy.random.randint(0, len(data));
            
            mean_indexes.append(random_index);
            means.append(numpy.array(data[random_index]));
        
        return means;

Push — 0.7.dev ( e45971...bb7559 )

ema_visualizer.__draw_ellipses() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""!
2
3			@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
4			@details Implementation based on article:
5			-
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2017
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			import numpy;
			0 ignored issues – show Configuration introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
30
31			from pyclustering.cluster import cluster_visualizer;
32			from pyclustering.utils import pi, calculate_ellipse_description;
33
34			import matplotlib.pyplot as plt;
			0 ignored issues – show Configuration introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
35			from matplotlib import patches;
			0 ignored issues – show Configuration introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report The import `matplotlib` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
36
37
38
39			def gaussian(data, mean, covariance):
40			dimension = float(len(data[0]));
41
42			if (dimension != 1.0):
43			inv_variance = numpy.linalg.pinv(covariance);
44			else:
45			inv_variance = 1.0 / covariance;
46
47			divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance));
48			right_const = 1.0 / divider;
49
50			result = [];
51
52			for point in data:
53			mean_delta = point - mean;
54			point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
55			result.append(point_gaussian);
56
57			return result;
58
59
60
61			class ema_observer:
62			def __init__(self):
63			self.__means_evolution = [];
64			self.__covariances_evolution = [];
65			self.__clusters_evolution = [];
66
67
68			def get_iterations(self):
69			return len(self.__means);
			0 ignored issues – show Bug introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report The Instance of `ema_observer` does not seem to have a member named `__means`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
70
71
72			def get_means(self):
73			return self.__means_evolution;
74
75
76			def get_covariances(self):
77			return self.__covariances_evolution;
78
79
80			def notify(self, means, covariances, clusters):
81			self.__means_evolution.append(means);
82			self.__covariances_evolution.append(covariances);
83			self.__clusters_evolution.append(clusters);
84
85
86
87			class ema_visualizer:
88			@staticmethod
89			def show_clusters(clusters, sample, covariances, means, display = True):
90			visualizer = cluster_visualizer();
91			visualizer.append_clusters(clusters, sample);
92			figure = visualizer.show(display = False);
93
94			if (len(sample[0]) == 2):
95			ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means);
96
97			if (display is True):
98			plt.show();
99
100			return figure;
101
102
103			@staticmethod
104			def __draw_ellipses(figure, visualizer, clusters, covariances, means):
105			print(len(clusters));
106			print([len(cluster) for cluster in clusters]);
107			print(clusters);
108
109			ax = figure.get_axes()[0];
110
111			for index in range(len(clusters)):
112			angle, width, height = calculate_ellipse_description(covariances[index]);
113			color = visualizer.get_cluster_color(index, 0);
114
115			ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color);
116
117
118			@staticmethod
119			def __draw_ellipse(ax, x, y, angle, width, height, color):
120			ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=angle, linewidth=2, fill=True, zorder=2, color=color);
121			ax.add_patch(ellipse);
122
123
124			class ema:
125			def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001):
126			self.__data = numpy.array(data);
127			self.__amount_clusters = amount_clusters;
128			self.__tolerance = tolerance;
129			self.__observer = observer;
130
131			self.__means = means;
132			if (means is None):
133			self.__means = self.__get_random_means(data, amount_clusters);
134
135			self.__variances = variances;
136			if (variances is None):
137			self.__variances = self.__get_random_covariances(data, amount_clusters);
138
139			self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
140			self.__pic = [1.0] * amount_clusters;
141			self.__clusters = [];
142			self.__gaussians = [ [] for _ in range(amount_clusters) ];
143			self.__stop = False;
144
145
146			def process(self):
147			self.__clusters = None;
148
149			previous_likelihood = -200000;
150			current_likelihood = -100000;
151
152			while( (self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) ):
153			self.__expectation_step();
154			self.__maximization_step();
155
156			previous_likelihood = current_likelihood;
157			current_likelihood = self.__log_likelihood();
158			self.__stop = self.__get_stop_condition();
159
160			self.__clusters = self.__extract_clusters();
161
162
163			def get_clusters(self):
164			return self.__clusters;
165
166
167			def get_centers(self):
168			return self.__means;
169
170
171			def get_covariances(self):
172			return self.__variances;
173
174
175			def __notify(self):
176			if (self.__observer is not None):
177			clusters = self.__extract_clusters();
178			self.__notify(self.__means, self.__variances, clusters);
			0 ignored issues – show Bug introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report There seem to be too many positional arguments for this method call. Loading history...
179
180
181			def __extract_clusters(self):
182			clusters = [ [] for _ in range(self.__amount_clusters) ];
183			for index_point in range(len(self.__data)):
184			candidates = [];
185			for index_cluster in range(self.__amount_clusters):
186			candidates.append((index_cluster, self.__rc[index_cluster][index_point]));
187
188			index_winner = max(candidates, key = lambda candidate : candidate[1])[0];
189			clusters[index_winner].append(index_point);
190
191			clusters = [ cluster for cluster in clusters if len(cluster) > 0 ];
192			return clusters;
193
194
195			def __log_likelihood(self):
196			likelihood = 0.0;
197
198			for index_point in range(len(self.__data)):
199			particle = 0.0;
200			for index_cluster in range(self.__amount_clusters):
201			particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
202
203			likelihood += numpy.log(particle);
204
205			return likelihood;
206
207
208			def __probabilities(self, index_cluster, index_point):
209			divider = 0.0;
210			for i in range(self.__amount_clusters):
211			divider += self.__pic[i] * self.__gaussians[i][index_point];
212
213			rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
214			return rc;
215
216
217			def __expectation_step(self):
218			for index in range(self.__amount_clusters):
219			self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
220
221			for index_cluster in range(self.__amount_clusters):
222			for index_point in range(len(self.__data)):
223			self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
224
225
226			def __maximization_step(self):
227			self.__pic = [];
228			self.__means = [];
229			self.__variances = [];
230
231			amount_impossible_clusters = 0;
232
233			for index_cluster in range(self.__amount_clusters):
234			mc = numpy.sum(self.__rc[index_cluster]);
235
236			if (mc == 0.0):
237			amount_impossible_clusters += 1;
238			continue;
239
240			self.__pic.append( mc / len(self.__data) );
241			self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) );
242			self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) );
243
244			self.__amount_clusters -= amount_impossible_clusters;
245
246
247			def __get_stop_condition(self):
248			for covariance in self.__variances:
249			if (numpy.linalg.norm(covariance) == 0.0):
250			return True;
251
252			return False;
253
254
255			def __update_covariance(self, means, rc, mc):
256			covariance = 0.0;
257			for index_point in range(len(self.__data)):
258			deviation = numpy.array( [ self.__data[index_point] - means ]);
259			covariance += rc[index_point] * deviation.T.dot(deviation);
260
261			covariance = covariance / mc;
262			return covariance;
263
264
265			def __update_mean(self, rc, mc):
266			mean = 0.0;
267			for index_point in range(len(self.__data)):
268			mean += rc[index_point] * self.__data[index_point];
269
270			mean = mean / mc;
271			return mean;
272
273
274			def __get_random_covariances(self, data, amount):
275			covariances = [];
276			covariance_appendixes = [];
277			data_covariance = numpy.cov(data, rowvar = False);
278			for _ in range(amount):
279			random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
280			while(random_appendix in covariance_appendixes):
281			random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
282
283			covariance_appendixes.append(random_appendix)
284			covariances.append(data_covariance - random_appendix);
285
286			return covariances;
287
288
289			def __get_random_means(self, data, amount):
290			means = [];
291			mean_indexes = [];
292			for _ in range(amount):
293			random_index = numpy.random.randint(0, len(data));
294			while(random_index in mean_indexes):
295			random_index = numpy.random.randint(0, len(data));
296
297			mean_indexes.append(random_index);
298			means.append(numpy.array(data[random_index]));
299
300			return means;

annoviko / pyclustering

Push — 0.7.dev ( e45971...bb7559 )

ema_visualizer.__draw_ellipses() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files