ema - Code Metrics - Inspection of "#13: EM algorithm implementation." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( 619631...fe456d )

by Andrei

created 2017-09-12 08:16 UTC

ema B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	152
Duplicated Lines	0 %

Importance

Changes

Metric	Value
dl	0
loc	152
rs	8.2857
c	0
b	0
f	0
wmc	39

14 Methods

Rating	Name	Size	Complexity
A	get_centers()	2	1
A	__update_mean()	7	2
A	process()	14	4
B	__init__()	17	5
B	get_clusters()	14	5
A	__get_random_means()	12	3
A	__expectation_step()	8	4
A	__maximization_step()	8	2
A	__probabilities()	7	2
A	get_covariances()	2	1
A	__log_likelihood()	11	3
A	__update_covariance()	8	2
A	__get_random_covariances()	8	2
A	__get_stop_flag()	7	3

"""!

@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
@details Implementation based on article:
         - 

@authors Andrei Novikov ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pyclustering.utils import pi;

import matplotlib.pyplot as plt;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from _operator import index



def gaussian(data, mean = None, covariance = None):
    dimension = len(data[0]);
 
    if (mean is None):
        mean = numpy.mean(data);
     
    if (covariance is None):
        covariance = numpy.cov(data, rowvar = False);
     
    inv_variance = numpy.linalg.inv(covariance);
    right_const = 1.0 / ( (pi * 2.0) ** (dimension / 2.0) * numpy.linalg.norm(covariance) ** 0.5 );
     
    result = [];
     
    for point in data:
        mean_delta = point - mean;
        point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
        result.append(point_gaussian);
     
    return result;


class ema:
    def __init__(self, data, amount_clusters, means = None, variances = None):
        self.__data = numpy.array(data);
        self.__amount_clusters = amount_clusters;
        
        self.__means = means;
        if (means is None):
            self.__means = self.__get_random_means(data, amount_clusters);

        self.__variances = variances;
        if (variances is None):
            self.__variances = self.__get_random_covariances(data, amount_clusters);
        
        self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
        self.__pic = [1.0] * amount_clusters;
        self.__clusters = [];
        self.__gaussians = [ [] for _ in range(amount_clusters) ];
        self.__stop = False;


    def process(self):
        self.__clusters = None;
        
        previous_likelihood = -10000500;
        current_likelihood = -10000000;
        
        while((self.__stop is False) and (abs(numpy.min(previous_likelihood) - numpy.min(current_likelihood)) > 0.00001) and (current_likelihood < 0.0)):
            self.__expectation_step();
            self.__maximization_step();
            
            previous_likelihood = current_likelihood;
            current_likelihood = self.__log_likelihood();
            self.__stop = self.__get_stop_flag();
            print(previous_likelihood, current_likelihood);


    def get_clusters(self):
        if (self.__clusters is not None):
            return self.__clusters;
        
        self.__clusters= [];
        for index_cluster in range(self.__amount_clusters):
            cluster = [];
            for index_point in range(len(self.__data)):
                if (self.__rc[index_cluster][index_point] >= 0.5):
                    cluster.append(index_point);
            
            self.__clusters.append(cluster);
        
        return self.__clusters;


    def get_centers(self):
        return self.__means;


    def get_covariances(self):
        return self.__variances;


    def __log_likelihood(self):
        likelihood = 0.0;
        
        for index_point in range(len(self.__data)):
            particle = 0.0;
            for index_cluster in range(self.__amount_clusters):
                particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
            
            likelihood += numpy.log(particle);
        
        return likelihood;


    def __probabilities(self, index_cluster, index_point):
        divider = 0.0;
        for i in range(self.__amount_clusters):
            divider += self.__pic[i] * self.__gaussians[i][index_point];
        
        rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
        return rc;


    def __expectation_step(self):
        for index in range(self.__amount_clusters):
param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param
            self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
        
        for index_cluster in range(self.__amount_clusters):
            for index_point in range(len(self.__data)):
                self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
            print(self.__rc[index_cluster]);


    def __maximization_step(self):
        for index_cluster in range(self.__amount_clusters):
            mc = numpy.sum(self.__rc[index_cluster]);
            
            self.__pic[index_cluster] = mc / len(self.__data);
            self.__means[index_cluster] = self.__update_mean(index_cluster, mc);
            
            self.__variances[index_cluster] = self.__update_covariance(index_cluster, mc);


    def __get_stop_flag(self):
        for covariance in self.__variances:
            print(covariance[0])
            if (min(covariance[0]) == 0):
                return True;
        
        return False;


    def __update_covariance(self, index_cluster, mc):
        covariance = 0.0;
        for index_point in range(len(self.__data)):
            deviation = numpy.array( [ self.__data[index_point] - self.__means[index_cluster] ]);
            covariance += self.__rc[index_cluster][index_point] * deviation.T.dot(deviation);
        
        covariance = covariance / mc;
        return covariance;


    def __update_mean(self, index_cluster, mc):
        mean = 0.0;
        for index_point in range(len(self.__data)):
            mean += self.__rc[index_cluster][index_point] * self.__data[index_point];
        
        mean = mean / mc;
        return mean;


    def __get_random_covariances(self, data, amount):
        covariances = [];
        data_covariance = numpy.cov(data, rowvar = False);
        for _ in range(amount):
            random_appendix = numpy.min(data_covariance) * 0.2 * numpy.random.random();
            covariances.append(data_covariance + random_appendix);
         
        return covariances;


    def __get_random_means(self, data, amount):
        means = [];
        mean_indexes = [];
        for _ in range(amount):
            random_index = numpy.random.randint(0, len(data));
            while(random_index in mean_indexes):
                mean_indexes.append(random_index);
                random_index = numpy.random.randint(0, len(data));
            
            means.append(numpy.array(data[random_index]));
        
        return means;



# from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;
# from pyclustering.utils import read_sample;
#   
# # sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE9);
# # ema_instance = ema(sample, 2);
# 
# sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
# ema_instance = ema(sample, 3);
# 
# ema_instance.process();
# clusters = ema_instance.get_clusters();
#   
# print(clusters);

Push — 0.7.dev ( 619631...fe456d )

ema B

Complexity

Size/Duplication

Importance

14 Methods

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""!
2
3			@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
4			@details Implementation based on article:
5			-
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2017
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			import numpy;
			0 ignored issues – show Configuration introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
30
31			from pyclustering.utils import pi;
32
33			import matplotlib.pyplot as plt;
			0 ignored issues – show Configuration introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history... Unused Code introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report Unused matplotlib.pyplot imported as plt Loading history...
34			from _operator import index
			0 ignored issues – show Unused Code introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report Unused index imported from _operator Loading history...
35
36
37			def gaussian(data, mean = None, covariance = None):
38			dimension = len(data[0]);
39
40			if (mean is None):
41			mean = numpy.mean(data);
42
43			if (covariance is None):
44			covariance = numpy.cov(data, rowvar = False);
45
46			inv_variance = numpy.linalg.inv(covariance);
47			right_const = 1.0 / ( (pi * 2.0) ** (dimension / 2.0) * numpy.linalg.norm(covariance) ** 0.5 );
48
49			result = [];
50
51			for point in data:
52			mean_delta = point - mean;
53			point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
54			result.append(point_gaussian);
55
56			return result;
57
58
59			class ema:
60			def __init__(self, data, amount_clusters, means = None, variances = None):
61			self.__data = numpy.array(data);
62			self.__amount_clusters = amount_clusters;
63
64			self.__means = means;
65			if (means is None):
66			self.__means = self.__get_random_means(data, amount_clusters);
67
68			self.__variances = variances;
69			if (variances is None):
70			self.__variances = self.__get_random_covariances(data, amount_clusters);
71
72			self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
73			self.__pic = [1.0] * amount_clusters;
74			self.__clusters = [];
75			self.__gaussians = [ [] for _ in range(amount_clusters) ];
76			self.__stop = False;
77
78
79			def process(self):
80			self.__clusters = None;
81
82			previous_likelihood = -10000500;
83			current_likelihood = -10000000;
84
85			while((self.__stop is False) and (abs(numpy.min(previous_likelihood) - numpy.min(current_likelihood)) > 0.00001) and (current_likelihood < 0.0)):
86			self.__expectation_step();
87			self.__maximization_step();
88
89			previous_likelihood = current_likelihood;
90			current_likelihood = self.__log_likelihood();
91			self.__stop = self.__get_stop_flag();
92			print(previous_likelihood, current_likelihood);
93
94
95			def get_clusters(self):
96			if (self.__clusters is not None):
97			return self.__clusters;
98
99			self.__clusters= [];
100			for index_cluster in range(self.__amount_clusters):
101			cluster = [];
102			for index_point in range(len(self.__data)):
103			if (self.__rc[index_cluster][index_point] >= 0.5):
104			cluster.append(index_point);
105
106			self.__clusters.append(cluster);
107
108			return self.__clusters;
109
110
111			def get_centers(self):
112			return self.__means;
113
114
115			def get_covariances(self):
116			return self.__variances;
117
118
119			def __log_likelihood(self):
120			likelihood = 0.0;
121
122			for index_point in range(len(self.__data)):
123			particle = 0.0;
124			for index_cluster in range(self.__amount_clusters):
125			particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
126
127			likelihood += numpy.log(particle);
128
129			return likelihood;
130
131
132			def __probabilities(self, index_cluster, index_point):
133			divider = 0.0;
134			for i in range(self.__amount_clusters):
135			divider += self.__pic[i] * self.__gaussians[i][index_point];
136
137			rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
138			return rc;
139
140
141			def __expectation_step(self):
142			for index in range(self.__amount_clusters):
			0 ignored issues – show Comprehensibility Bug introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report `index` is re-defining a name which is already available in the outer-scope (previously defined on line `34`). It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5 class Foo: def __init__(self, param): # "param" would be flagged here self.param = param Loading history...
143			self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
144
145			for index_cluster in range(self.__amount_clusters):
146			for index_point in range(len(self.__data)):
147			self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
148			print(self.__rc[index_cluster]);
149
150
151			def __maximization_step(self):
152			for index_cluster in range(self.__amount_clusters):
153			mc = numpy.sum(self.__rc[index_cluster]);
154
155			self.__pic[index_cluster] = mc / len(self.__data);
156			self.__means[index_cluster] = self.__update_mean(index_cluster, mc);
157
158			self.__variances[index_cluster] = self.__update_covariance(index_cluster, mc);
159
160
161			def __get_stop_flag(self):
162			for covariance in self.__variances:
163			print(covariance[0])
164			if (min(covariance[0]) == 0):
165			return True;
166
167			return False;
168
169
170			def __update_covariance(self, index_cluster, mc):
171			covariance = 0.0;
172			for index_point in range(len(self.__data)):
173			deviation = numpy.array( [ self.__data[index_point] - self.__means[index_cluster] ]);
174			covariance += self.__rc[index_cluster][index_point] * deviation.T.dot(deviation);
175
176			covariance = covariance / mc;
177			return covariance;
178
179
180			def __update_mean(self, index_cluster, mc):
181			mean = 0.0;
182			for index_point in range(len(self.__data)):
183			mean += self.__rc[index_cluster][index_point] * self.__data[index_point];
184
185			mean = mean / mc;
186			return mean;
187
188
189			def __get_random_covariances(self, data, amount):
190			covariances = [];
191			data_covariance = numpy.cov(data, rowvar = False);
192			for _ in range(amount):
193			random_appendix = numpy.min(data_covariance) * 0.2 * numpy.random.random();
194			covariances.append(data_covariance + random_appendix);
195
196			return covariances;
197
198
199			def __get_random_means(self, data, amount):
200			means = [];
201			mean_indexes = [];
202			for _ in range(amount):
203			random_index = numpy.random.randint(0, len(data));
204			while(random_index in mean_indexes):
205			mean_indexes.append(random_index);
206			random_index = numpy.random.randint(0, len(data));
207
208			means.append(numpy.array(data[random_index]));
209
210			return means;
211
212
213
214			# from pyclustering.samples.definitions import SIMPLE_SAMPLES, FCPS_SAMPLES;
215			# from pyclustering.utils import read_sample;
216			#
217			# # sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE9);
218			# # ema_instance = ema(sample, 2);
219			#
220			# sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE2);
221			# ema_instance = ema(sample, 3);
222			#
223			# ema_instance.process();
224			# clusters = ema_instance.get_clusters();
225			#
226			# print(clusters);

annoviko / pyclustering

Push — 0.7.dev ( 619631...fe456d )

ema B

Complexity

Size/Duplication

Importance

14 Methods

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files