ema_initializer.__initialize_random() - Code Metrics - Inspection of "#16: EMA observer and visualizer implementation." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( 9bc136...42a05b )

by Andrei

created 2017-09-19 14:50 UTC

ema_initializer.__initialize_random() A

↳ Parent: ema_initializer

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	17
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
@details Implementation based on article:
         - 

@authors Andrei Novikov ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import random;

from pyclustering.cluster import cluster_visualizer;
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer;
from pyclustering.cluster.kmeans import kmeans;

from pyclustering.utils import pi, calculate_ellipse_description;

from enum import IntEnum;

import matplotlib.pyplot as plt;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import matplotlib.animation as animation;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from matplotlib import patches;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3



def gaussian(data, mean, covariance):
    dimension = float(len(data[0]));
 
    if (dimension != 1.0):
        inv_variance = numpy.linalg.pinv(covariance);
    else:
        inv_variance = 1.0 / covariance;
    
    divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance));
    right_const = 1.0 / divider;
     
    result = [];
     
    for point in data:
        mean_delta = point - mean;
        point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
        result.append(point_gaussian);
     
    return result;



class ema_init_type(IntEnum):
    RANDOM_INITIALIZATION = 0;
    KMEANS_INITIALIZATION = 1;



class ema_initializer():
    def __init__(self, sample, amount):
        self.__sample = sample;
        self.__amount = amount;


    def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
        if (init_type == ema_init_type.KMEANS_INITIALIZATION):
            return self.__initialize_kmeans();
        
        elif (init_type == ema_init_type.RANDOM_INITIALIZATION):
            return self.__initialize_random();
        
        raise NameError("Unknown type of EM algorithm initialization is specified.");


    def __initialize_random(self):
        initial_means = [];
        initial_covariance = [];
        
        for _ in range(self.__amount):
            mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ];
            while (mean in initial_means):
                mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ];
            
            initial_means.append(mean);
            
            covariance = numpy.cov(self.__sample, rowvar = False);
            covariance = numpy.divide(covariance, self.__amount + 1);
        
            initial_covariance.append(covariance);
        
        return initial_means, initial_covariance;


    def __initialize_kmeans(self):
        initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize();
        kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True);
        kmeans_instance.process();
        
        means = kmeans_instance.get_centers();
        
        covariances = [];
        initial_clusters = kmeans_instance.get_clusters();
        for initial_cluster in initial_clusters:
            cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ];
            covariances.append(numpy.cov(cluster_sample, rowvar = False));
        
        return means, covariances;



class ema_observer:
    def __init__(self):
        self.__means_evolution = [];
        self.__covariances_evolution = [];
        self.__clusters_evolution = [];


    def __len__(self):
        return len(self.__means_evolution);


    def get_iterations(self):
        return len(self.__means_evolution);


    def get_evolution_means(self):
        return self.__means_evolution;


    def get_evolution_covariances(self):
        return self.__covariances_evolution;


    def get_evolution_clusters(self):
        return self.__clusters_evolution;


    def notify(self, means, covariances, clusters):
        self.__means_evolution.append(means);
        self.__covariances_evolution.append(covariances);
        self.__clusters_evolution.append(clusters);



class ema_visualizer:
    @staticmethod
    def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
        visualizer = cluster_visualizer();
        visualizer.append_clusters(clusters, sample);
        
        if (figure is None):
            figure = visualizer.show(display = False);
        else:
            visualizer.show(figure = figure, display = False);
        
        if (len(sample[0]) == 2):
            ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means);

        if (display is True): 
            plt.show();

        return figure;


    @staticmethod
    def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
        figure = plt.figure();
        
        def init_frame():
            return frame_generation(0);
        
        def frame_generation(index_iteration):
            figure.clf();
            
            figure.suptitle("Expectation maximixation algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold');
            
            clusters = observer.get_evolution_clusters()[index_iteration];
            covariances = observer.get_evolution_covariances()[index_iteration];
            means = observer.get_evolution_means()[index_iteration];
            
            ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False);
            figure.subplots_adjust(top = 0.85);
            
            return [ figure.gca() ];

        iterations = len(observer);
        cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000);

        if (save_movie is not None):
            cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500);
        else:
            plt.show();


    @staticmethod
    def __draw_ellipses(figure, visualizer, clusters, covariances, means):
        print(len(clusters));
        print([len(cluster) for cluster in clusters]);
        print(clusters);
        
        ax = figure.get_axes()[0];
        
        for index in range(len(clusters)):
            angle, width, height = calculate_ellipse_description(covariances[index]);
            color = visualizer.get_cluster_color(index, 0);
            
            ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color);


    @staticmethod
    def __draw_ellipse(ax, x, y, angle, width, height, color):
        ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=angle, linewidth=2, fill=True, zorder=2, color=color);
        ax.add_patch(ellipse);



class ema:
    def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001, iterations = 100):
        self.__data = numpy.array(data);
        self.__amount_clusters = amount_clusters;
        self.__tolerance = tolerance;
        self.__iterations = iterations;
        self.__observer = observer;
        
        self.__means = means;
        self.__variances = variances;
        
        if ((means is None) or (variances is None)):
            self.__means, self.__variances = self.__get_initial_parameters(data, amount_clusters);
        
        self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
        self.__pic = [1.0] * amount_clusters;
        self.__clusters = [];
        self.__gaussians = [ [] for _ in range(amount_clusters) ];
        self.__stop = False;


    def process(self):
        self.__clusters = None;
        
        previous_likelihood = -200000;
        current_likelihood = -100000;
        
        current_iteration = 0;
        while( (self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations) ):
            self.__expectation_step();
            self.__maximization_step();
            
            previous_likelihood = current_likelihood;
            current_likelihood = self.__log_likelihood();
            self.__stop = self.__get_stop_condition();
            
            current_iteration += 1;

            if (self.__observer is not None):
                self.__observer.notify(self.__means, self.__variances, self.__extract_clusters());
        
        self.__clusters = self.__extract_clusters();


    def get_clusters(self):
        return self.__clusters;


    def get_centers(self):
        return self.__means;


    def get_covariances(self):
        return self.__variances;


    def __notify(self):
        if (self.__observer is not None):
            clusters = self.__extract_clusters();
            self.__notify(self.__means, self.__variances, clusters);



    def __extract_clusters(self):
        clusters = [ [] for _ in range(self.__amount_clusters) ];
        for index_point in range(len(self.__data)):
            candidates = [];
            for index_cluster in range(self.__amount_clusters):
                candidates.append((index_cluster, self.__rc[index_cluster][index_point]));
            
            index_winner = max(candidates, key = lambda candidate : candidate[1])[0];
            clusters[index_winner].append(index_point);
        
        return clusters;


    def __log_likelihood(self):
        likelihood = 0.0;
        
        for index_point in range(len(self.__data)):
            particle = 0.0;
            for index_cluster in range(self.__amount_clusters):
                particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
            
            likelihood += numpy.log(particle);
        
        return likelihood;


    def __probabilities(self, index_cluster, index_point):
        divider = 0.0;
        for i in range(self.__amount_clusters):
            divider += self.__pic[i] * self.__gaussians[i][index_point];
        
        rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
        return rc;


    def __expectation_step(self):
        self.__gaussians = [ [] for _ in range(self.__amount_clusters) ];
        for index in range(self.__amount_clusters):
            self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
        
        self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ];
        for index_cluster in range(self.__amount_clusters):
            for index_point in range(len(self.__data)):
                self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);


    def __maximization_step(self):
        self.__pic = [];
        self.__means = [];
        self.__variances = [];
        
        amount_impossible_clusters = 0;
        
        for index_cluster in range(self.__amount_clusters):
            mc = numpy.sum(self.__rc[index_cluster]);
            
            if (mc == 0.0):
                amount_impossible_clusters += 1;
                continue;
            
            self.__pic.append( mc / len(self.__data) );
            self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) );
            self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) );
        
        self.__amount_clusters -= amount_impossible_clusters;


    def __get_stop_condition(self):
        for covariance in self.__variances:
            if (numpy.linalg.norm(covariance) == 0.0):
                return True;
        
        return False;


    def __update_covariance(self, means, rc, mc):
        covariance = 0.0;
        for index_point in range(len(self.__data)):
            deviation = numpy.array( [ self.__data[index_point] - means ]);
            covariance += rc[index_point] * deviation.T.dot(deviation);
        
        covariance = covariance / mc;
        return covariance;


    def __update_mean(self, rc, mc):
        mean = 0.0;
        for index_point in range(len(self.__data)):
            mean += rc[index_point] * self.__data[index_point];
        
        mean = mean / mc;
        return mean;


    def __get_initial_parameters(self, sample, amount_clusters):
        return ema_initializer(sample, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION);

Push — 0.7.dev ( 9bc136...42a05b )

ema_initializer.__initialize_random() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""!
2
3			@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
4			@details Implementation based on article:
5			-
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2017
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			import numpy;
			0 ignored issues – show Configuration introduced 2017-09-19 14:51 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
30			import random;
31
32			from pyclustering.cluster import cluster_visualizer;
33			from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer;
34			from pyclustering.cluster.kmeans import kmeans;
35
36			from pyclustering.utils import pi, calculate_ellipse_description;
37
38			from enum import IntEnum;
39
40			import matplotlib.pyplot as plt;
			0 ignored issues – show Configuration introduced 2017-09-19 14:51 UTC by Report Bug Copy Issue Report The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
41			import matplotlib.animation as animation;
			0 ignored issues – show Configuration introduced 2017-09-19 14:51 UTC by Report Bug Copy Issue Report The import `matplotlib.animation` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
42			from matplotlib import patches;
			0 ignored issues – show Configuration introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report The import `matplotlib` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
43
44
45
46			def gaussian(data, mean, covariance):
47			dimension = float(len(data[0]));
48
49			if (dimension != 1.0):
50			inv_variance = numpy.linalg.pinv(covariance);
51			else:
52			inv_variance = 1.0 / covariance;
53
54			divider = (pi * 2.0) ** (dimension / 2.0) * numpy.sqrt(numpy.linalg.norm(covariance));
55			right_const = 1.0 / divider;
56
57			result = [];
58
59			for point in data:
60			mean_delta = point - mean;
61			point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
62			result.append(point_gaussian);
63
64			return result;
65
66
67
68			class ema_init_type(IntEnum):
69			RANDOM_INITIALIZATION = 0;
70			KMEANS_INITIALIZATION = 1;
71
72
73
74			class ema_initializer():
75			def __init__(self, sample, amount):
76			self.__sample = sample;
77			self.__amount = amount;
78
79
80			def initialize(self, init_type = ema_init_type.KMEANS_INITIALIZATION):
81			if (init_type == ema_init_type.KMEANS_INITIALIZATION):
82			return self.__initialize_kmeans();
83
84			elif (init_type == ema_init_type.RANDOM_INITIALIZATION):
85			return self.__initialize_random();
86
87			raise NameError("Unknown type of EM algorithm initialization is specified.");
88
89
90			def __initialize_random(self):
91			initial_means = [];
92			initial_covariance = [];
93
94			for _ in range(self.__amount):
95			mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ];
96			while (mean in initial_means):
97			mean = self.__sample[ random.randint(0, len(self.__sample)) - 1 ];
98
99			initial_means.append(mean);
100
101			covariance = numpy.cov(self.__sample, rowvar = False);
102			covariance = numpy.divide(covariance, self.__amount + 1);
103
104			initial_covariance.append(covariance);
105
106			return initial_means, initial_covariance;
107
108
109			def __initialize_kmeans(self):
110			initial_centers = kmeans_plusplus_initializer(self.__sample, self.__amount).initialize();
111			kmeans_instance = kmeans(self.__sample, initial_centers, ccore = True);
112			kmeans_instance.process();
113
114			means = kmeans_instance.get_centers();
115
116			covariances = [];
117			initial_clusters = kmeans_instance.get_clusters();
118			for initial_cluster in initial_clusters:
119			cluster_sample = [ self.__sample[index_point] for index_point in initial_cluster ];
120			covariances.append(numpy.cov(cluster_sample, rowvar = False));
121
122			return means, covariances;
123
124
125
126			class ema_observer:
127			def __init__(self):
128			self.__means_evolution = [];
129			self.__covariances_evolution = [];
130			self.__clusters_evolution = [];
131
132
133			def __len__(self):
134			return len(self.__means_evolution);
135
136
137			def get_iterations(self):
138			return len(self.__means_evolution);
139
140
141			def get_evolution_means(self):
142			return self.__means_evolution;
143
144
145			def get_evolution_covariances(self):
146			return self.__covariances_evolution;
147
148
149			def get_evolution_clusters(self):
150			return self.__clusters_evolution;
151
152
153			def notify(self, means, covariances, clusters):
154			self.__means_evolution.append(means);
155			self.__covariances_evolution.append(covariances);
156			self.__clusters_evolution.append(clusters);
157
158
159
160			class ema_visualizer:
161			@staticmethod
162			def show_clusters(clusters, sample, covariances, means, figure = None, display = True):
163			visualizer = cluster_visualizer();
164			visualizer.append_clusters(clusters, sample);
165
166			if (figure is None):
167			figure = visualizer.show(display = False);
168			else:
169			visualizer.show(figure = figure, display = False);
170
171			if (len(sample[0]) == 2):
172			ema_visualizer.__draw_ellipses(figure, visualizer, clusters, covariances, means);
173
174			if (display is True):
175			plt.show();
176
177			return figure;
178
179
180			@staticmethod
181			def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 1, save_movie = None):
182			figure = plt.figure();
183
184			def init_frame():
185			return frame_generation(0);
186
187			def frame_generation(index_iteration):
188			figure.clf();
189
190			figure.suptitle("Expectation maximixation algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold');
191
192			clusters = observer.get_evolution_clusters()[index_iteration];
193			covariances = observer.get_evolution_covariances()[index_iteration];
194			means = observer.get_evolution_means()[index_iteration];
195
196			ema_visualizer.show_clusters(clusters, data, covariances, means, figure, False);
197			figure.subplots_adjust(top = 0.85);
198
199			return [ figure.gca() ];
200
201			iterations = len(observer);
202			cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000);
203
204			if (save_movie is not None):
205			cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500);
206			else:
207			plt.show();
208
209
210			@staticmethod
211			def __draw_ellipses(figure, visualizer, clusters, covariances, means):
212			print(len(clusters));
213			print([len(cluster) for cluster in clusters]);
214			print(clusters);
215
216			ax = figure.get_axes()[0];
217
218			for index in range(len(clusters)):
219			angle, width, height = calculate_ellipse_description(covariances[index]);
220			color = visualizer.get_cluster_color(index, 0);
221
222			ema_visualizer.__draw_ellipse(ax, means[index][0], means[index][1], angle, width, height, color);
223
224
225			@staticmethod
226			def __draw_ellipse(ax, x, y, angle, width, height, color):
227			ellipse = patches.Ellipse((x, y), width, height, alpha=0.2, angle=angle, linewidth=2, fill=True, zorder=2, color=color);
228			ax.add_patch(ellipse);
229
230
231
232			class ema:
233			def __init__(self, data, amount_clusters, means = None, variances = None, observer = None, tolerance = 0.00001, iterations = 100):
234			self.__data = numpy.array(data);
235			self.__amount_clusters = amount_clusters;
236			self.__tolerance = tolerance;
237			self.__iterations = iterations;
238			self.__observer = observer;
239
240			self.__means = means;
241			self.__variances = variances;
242
243			if ((means is None) or (variances is None)):
244			self.__means, self.__variances = self.__get_initial_parameters(data, amount_clusters);
245
246			self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
247			self.__pic = [1.0] * amount_clusters;
248			self.__clusters = [];
249			self.__gaussians = [ [] for _ in range(amount_clusters) ];
250			self.__stop = False;
251
252
253			def process(self):
254			self.__clusters = None;
255
256			previous_likelihood = -200000;
257			current_likelihood = -100000;
258
259			current_iteration = 0;
260			while( (self.__stop is False) and (abs(previous_likelihood - current_likelihood) > self.__tolerance) and (current_iteration < self.__iterations) ):
261			self.__expectation_step();
262			self.__maximization_step();
263
264			previous_likelihood = current_likelihood;
265			current_likelihood = self.__log_likelihood();
266			self.__stop = self.__get_stop_condition();
267
268			current_iteration += 1;
269
270			if (self.__observer is not None):
271			self.__observer.notify(self.__means, self.__variances, self.__extract_clusters());
272
273			self.__clusters = self.__extract_clusters();
274
275
276			def get_clusters(self):
277			return self.__clusters;
278
279
280			def get_centers(self):
281			return self.__means;
282
283
284			def get_covariances(self):
285			return self.__variances;
286
287
288			def __notify(self):
289			if (self.__observer is not None):
290			clusters = self.__extract_clusters();
291			self.__notify(self.__means, self.__variances, clusters);
			0 ignored issues – show Bug introduced 2017-09-16 22:44 UTC by Report Bug Copy Issue Report There seem to be too many positional arguments for this method call. Loading history...
292
293
294			def __extract_clusters(self):
295			clusters = [ [] for _ in range(self.__amount_clusters) ];
296			for index_point in range(len(self.__data)):
297			candidates = [];
298			for index_cluster in range(self.__amount_clusters):
299			candidates.append((index_cluster, self.__rc[index_cluster][index_point]));
300
301			index_winner = max(candidates, key = lambda candidate : candidate[1])[0];
302			clusters[index_winner].append(index_point);
303
304			return clusters;
305
306
307			def __log_likelihood(self):
308			likelihood = 0.0;
309
310			for index_point in range(len(self.__data)):
311			particle = 0.0;
312			for index_cluster in range(self.__amount_clusters):
313			particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
314
315			likelihood += numpy.log(particle);
316
317			return likelihood;
318
319
320			def __probabilities(self, index_cluster, index_point):
321			divider = 0.0;
322			for i in range(self.__amount_clusters):
323			divider += self.__pic[i] * self.__gaussians[i][index_point];
324
325			rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
326			return rc;
327
328
329			def __expectation_step(self):
330			self.__gaussians = [ [] for _ in range(self.__amount_clusters) ];
331			for index in range(self.__amount_clusters):
332			self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
333
334			self.__rc = [ [0.0] * len(self.__data) for _ in range(self.__amount_clusters) ];
335			for index_cluster in range(self.__amount_clusters):
336			for index_point in range(len(self.__data)):
337			self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
338
339
340			def __maximization_step(self):
341			self.__pic = [];
342			self.__means = [];
343			self.__variances = [];
344
345			amount_impossible_clusters = 0;
346
347			for index_cluster in range(self.__amount_clusters):
348			mc = numpy.sum(self.__rc[index_cluster]);
349
350			if (mc == 0.0):
351			amount_impossible_clusters += 1;
352			continue;
353
354			self.__pic.append( mc / len(self.__data) );
355			self.__means.append( self.__update_mean(self.__rc[index_cluster], mc) );
356			self.__variances.append( self.__update_covariance(self.__means[-1], self.__rc[index_cluster], mc) );
357
358			self.__amount_clusters -= amount_impossible_clusters;
359
360
361			def __get_stop_condition(self):
362			for covariance in self.__variances:
363			if (numpy.linalg.norm(covariance) == 0.0):
364			return True;
365
366			return False;
367
368
369			def __update_covariance(self, means, rc, mc):
370			covariance = 0.0;
371			for index_point in range(len(self.__data)):
372			deviation = numpy.array( [ self.__data[index_point] - means ]);
373			covariance += rc[index_point] * deviation.T.dot(deviation);
374
375			covariance = covariance / mc;
376			return covariance;
377
378
379			def __update_mean(self, rc, mc):
380			mean = 0.0;
381			for index_point in range(len(self.__data)):
382			mean += rc[index_point] * self.__data[index_point];
383
384			mean = mean / mc;
385			return mean;
386
387
388			def __get_initial_parameters(self, sample, amount_clusters):
389			return ema_initializer(sample, amount_clusters).initialize(ema_init_type.KMEANS_INITIALIZATION);

annoviko / pyclustering

Push — 0.7.dev ( 9bc136...42a05b )

ema_initializer.__initialize_random() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files