ema - Code Metrics - Inspection of "#16: Correction for one-dimensional data, implemen..." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( bfeadc...20409b )

by Andrei

created 2017-09-13 16:22 UTC

ema B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	157
Duplicated Lines	0 %

Importance

Changes

Metric	Value
dl	0
loc	157
rs	8.2608
c	0
b	0
f	0
wmc	40

15 Methods

Rating	Name	Size	Complexity
A	get_centers()	2	1
A	__update_mean()	7	2
A	process()	15	4
A	get_clusters()	2	1
B	__init__()	17	5
A	__get_random_means()	12	3
A	__expectation_step()	7	4
A	__maximization_step()	8	2
A	__extract_clusters()	11	4
A	__probabilities()	7	2
A	get_covariances()	2	1
A	__log_likelihood()	11	3
A	__update_covariance()	8	2
A	__get_stop_flag()	6	3
A	__get_random_covariances()	13	3

How to fix Complexity

"""!

@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
@details Implementation based on article:
         - 

@authors Andrei Novikov ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pyclustering.cluster import cluster_visualizer;
from pyclustering.utils import pi;

import matplotlib.pyplot as plt;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from matplotlib.patches import Ellipse;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from _operator import index




def gaussian(data, mean, covariance):
    dimension = len(data[0]);
 
    if (dimension != 1):
        inv_variance = numpy.linalg.inv(covariance);
    else:
        inv_variance = 1.0 / covariance;
    
    right_const = 1.0 / ( (pi * 2.0) ** (dimension / 2.0) * numpy.linalg.norm(covariance) ** 0.5 );
     
    result = [];
     
    for point in data:
        mean_delta = point - mean;
        point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
        result.append(point_gaussian);
     
    return result;



class ema_observer:
    def __init__(self):
        self.__means = [];
        self.__covariances = [];

    def get_iterations(self):
        return len(self.__means);

    def get_means(self):
        return self.__means;

    def get_covariances(self):
        return self.__covariances;

    def notify(self, means, covariances):
        self.__means.append(means);
        self.__covariances.append(covariances);



class ema_visualizer:
    @staticmethod
    def show_clusters(self, clusters, sample, covariances):

        visualizer = cluster_visualizer();
        visualizer.append_clusters(clusters, sample);
        figure = visualizer.show(display = False);

        
        # TODO: draw ellipes for each cluster using covariance matrix


class ema:
    def __init__(self, data, amount_clusters, means = None, variances = None):
        self.__data = numpy.array(data);
        self.__amount_clusters = amount_clusters;
        
        self.__means = means;
        if (means is None):
            self.__means = self.__get_random_means(data, amount_clusters);

        self.__variances = variances;
        if (variances is None):
            self.__variances = self.__get_random_covariances(data, amount_clusters);
        
        self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
        self.__pic = [1.0] * amount_clusters;
        self.__clusters = [];
        self.__gaussians = [ [] for _ in range(amount_clusters) ];
        self.__stop = False;


    def process(self):
        self.__clusters = None;
        
        previous_likelihood = -10000500;
        current_likelihood = -10000000;
        
        while((self.__stop is False) and (abs(numpy.min(previous_likelihood) - numpy.min(current_likelihood)) > 0.00001) and (current_likelihood < 0.0)):
            self.__expectation_step();
            self.__maximization_step();
            
            previous_likelihood = current_likelihood;
            current_likelihood = self.__log_likelihood();
            self.__stop = self.__get_stop_flag();
        
        self.__extract_clusters();


    def get_clusters(self):
        return self.__clusters;


    def get_centers(self):
        return self.__means;


    def get_covariances(self):
        return self.__variances;


    def __extract_clusters(self):
        self.__clusters = [];
        for index_cluster in range(self.__amount_clusters):
            cluster = [];
            for index_point in range(len(self.__data)):
                if (self.__rc[index_cluster][index_point] >= 0.5):
                    cluster.append(index_point);
            
            self.__clusters.append(cluster);
        
        return self.__clusters;


    def __log_likelihood(self):
        likelihood = 0.0;
        
        for index_point in range(len(self.__data)):
            particle = 0.0;
            for index_cluster in range(self.__amount_clusters):
                particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
            
            likelihood += numpy.log(particle);
        
        return likelihood;


    def __probabilities(self, index_cluster, index_point):
        divider = 0.0;
        for i in range(self.__amount_clusters):
            divider += self.__pic[i] * self.__gaussians[i][index_point];
        
        rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
        return rc;


    def __expectation_step(self):
        for index in range(self.__amount_clusters):
param = 5

class Foo:
    def __init__(self, param):   # "param" would be flagged here
        self.param = param
            self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
        
        for index_cluster in range(self.__amount_clusters):
            for index_point in range(len(self.__data)):
                self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);


    def __maximization_step(self):
        for index_cluster in range(self.__amount_clusters):
            mc = numpy.sum(self.__rc[index_cluster]);
            
            self.__pic[index_cluster] = mc / len(self.__data);
            self.__means[index_cluster] = self.__update_mean(index_cluster, mc);
            
            self.__variances[index_cluster] = self.__update_covariance(index_cluster, mc);


    def __get_stop_flag(self):
        for covariance in self.__variances:
            if (min(covariance[0]) == 0):
                return True;
        
        return False;


    def __update_covariance(self, index_cluster, mc):
        covariance = 0.0;
        for index_point in range(len(self.__data)):
            deviation = numpy.array( [ self.__data[index_point] - self.__means[index_cluster] ]);
            covariance += self.__rc[index_cluster][index_point] * deviation.T.dot(deviation);
        
        covariance = covariance / mc;
        return covariance;


    def __update_mean(self, index_cluster, mc):
        mean = 0.0;
        for index_point in range(len(self.__data)):
            mean += self.__rc[index_cluster][index_point] * self.__data[index_point];
        
        mean = mean / mc;
        return mean;


    def __get_random_covariances(self, data, amount):
        covariances = [];
        covariance_appendixes = [];
        data_covariance = numpy.cov(data, rowvar = False);
        for _ in range(amount):
            random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
            while(random_appendix in covariance_appendixes):
                random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
            
            covariance_appendixes.append(random_appendix)
            covariances.append(data_covariance - random_appendix);
         
        return covariances;


    def __get_random_means(self, data, amount):
        means = [];
        mean_indexes = [];
        for _ in range(amount):
            random_index = numpy.random.randint(0, len(data));
            while(random_index in mean_indexes):
                random_index = numpy.random.randint(0, len(data));
            
            mean_indexes.append(random_index);
            means.append(numpy.array(data[random_index]));
        
        return means;

Push — 0.7.dev ( bfeadc...20409b )

ema B

Complexity

Size/Duplication

Importance

15 Methods

How to fix Complexity

Complex Class

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			"""!
2
3			@brief Cluster analysis algorithm: Expectation-Maximization Algorithm (EMA).
4			@details Implementation based on article:
5			-
6
7			@authors Andrei Novikov ([email protected])
8			@date 2014-2017
9			@copyright GNU Public License
10
11			@cond GNU_PUBLIC_LICENSE
12			PyClustering is free software: you can redistribute it and/or modify
13			it under the terms of the GNU General Public License as published by
14			the Free Software Foundation, either version 3 of the License, or
15			(at your option) any later version.
16
17			PyClustering is distributed in the hope that it will be useful,
18			but WITHOUT ANY WARRANTY; without even the implied warranty of
19			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20			GNU General Public License for more details.
21
22			You should have received a copy of the GNU General Public License
23			along with this program. If not, see <http://www.gnu.org/licenses/>.
24			@endcond
25
26			"""
27
28
29			import numpy;
			0 ignored issues – show Configuration introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
30
31			from pyclustering.cluster import cluster_visualizer;
32			from pyclustering.utils import pi;
33
34			import matplotlib.pyplot as plt;
			0 ignored issues – show Configuration introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history... Unused Code introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report Unused matplotlib.pyplot imported as plt Loading history...
35			from matplotlib.patches import Ellipse;
			0 ignored issues – show Configuration introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The import `matplotlib.patches` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history... Unused Code introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report Unused Ellipse imported from matplotlib.patches Loading history...
36			from _operator import index
			0 ignored issues – show Unused Code introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report Unused index imported from _operator Loading history...
37
38
39
40			def gaussian(data, mean, covariance):
41			dimension = len(data[0]);
42
43			if (dimension != 1):
44			inv_variance = numpy.linalg.inv(covariance);
45			else:
46			inv_variance = 1.0 / covariance;
47
48			right_const = 1.0 / ( (pi * 2.0) ** (dimension / 2.0) * numpy.linalg.norm(covariance) ** 0.5 );
49
50			result = [];
51
52			for point in data:
53			mean_delta = point - mean;
54			point_gaussian = right_const * numpy.exp( -0.5 * mean_delta.dot(inv_variance).dot(numpy.transpose(mean_delta)) );
55			result.append(point_gaussian);
56
57			return result;
58
59
60
61			class ema_observer:
62			def __init__(self):
63			self.__means = [];
64			self.__covariances = [];
65
66			def get_iterations(self):
67			return len(self.__means);
68
69			def get_means(self):
70			return self.__means;
71
72			def get_covariances(self):
73			return self.__covariances;
74
75			def notify(self, means, covariances):
76			self.__means.append(means);
77			self.__covariances.append(covariances);
78
79
80
81			class ema_visualizer:
82			@staticmethod
83			def show_clusters(self, clusters, sample, covariances):
			0 ignored issues – show Best Practice introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report Static method with 'self' as first argument Loading history... Unused Code introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The argument `covariances` seems to be unused. Loading history... Unused Code introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The argument `self` seems to be unused. Loading history...
84			visualizer = cluster_visualizer();
85			visualizer.append_clusters(clusters, sample);
86			figure = visualizer.show(display = False);
			0 ignored issues – show Unused Code introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report The variable `figure` seems to be unused. Loading history...
87
88			# TODO: draw ellipes for each cluster using covariance matrix
89
90
91			class ema:
92			def __init__(self, data, amount_clusters, means = None, variances = None):
93			self.__data = numpy.array(data);
94			self.__amount_clusters = amount_clusters;
95
96			self.__means = means;
97			if (means is None):
98			self.__means = self.__get_random_means(data, amount_clusters);
99
100			self.__variances = variances;
101			if (variances is None):
102			self.__variances = self.__get_random_covariances(data, amount_clusters);
103
104			self.__rc = [ [0.0] * len(self.__data) for _ in range(amount_clusters) ];
105			self.__pic = [1.0] * amount_clusters;
106			self.__clusters = [];
107			self.__gaussians = [ [] for _ in range(amount_clusters) ];
108			self.__stop = False;
109
110
111			def process(self):
112			self.__clusters = None;
113
114			previous_likelihood = -10000500;
115			current_likelihood = -10000000;
116
117			while((self.__stop is False) and (abs(numpy.min(previous_likelihood) - numpy.min(current_likelihood)) > 0.00001) and (current_likelihood < 0.0)):
118			self.__expectation_step();
119			self.__maximization_step();
120
121			previous_likelihood = current_likelihood;
122			current_likelihood = self.__log_likelihood();
123			self.__stop = self.__get_stop_flag();
124
125			self.__extract_clusters();
126
127
128			def get_clusters(self):
129			return self.__clusters;
130
131
132			def get_centers(self):
133			return self.__means;
134
135
136			def get_covariances(self):
137			return self.__variances;
138
139
140			def __extract_clusters(self):
141			self.__clusters = [];
142			for index_cluster in range(self.__amount_clusters):
143			cluster = [];
144			for index_point in range(len(self.__data)):
145			if (self.__rc[index_cluster][index_point] >= 0.5):
146			cluster.append(index_point);
147
148			self.__clusters.append(cluster);
149
150			return self.__clusters;
151
152
153			def __log_likelihood(self):
154			likelihood = 0.0;
155
156			for index_point in range(len(self.__data)):
157			particle = 0.0;
158			for index_cluster in range(self.__amount_clusters):
159			particle += self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point];
160
161			likelihood += numpy.log(particle);
162
163			return likelihood;
164
165
166			def __probabilities(self, index_cluster, index_point):
167			divider = 0.0;
168			for i in range(self.__amount_clusters):
169			divider += self.__pic[i] * self.__gaussians[i][index_point];
170
171			rc = self.__pic[index_cluster] * self.__gaussians[index_cluster][index_point] / divider;
172			return rc;
173
174
175			def __expectation_step(self):
176			for index in range(self.__amount_clusters):
			0 ignored issues – show Comprehensibility Bug introduced 2017-09-11 12:46 UTC by Report Bug Copy Issue Report `index` is re-defining a name which is already available in the outer-scope (previously defined on line `36`). It is generally a bad practice to shadow variables from the outer-scope. In most cases, this is done unintentionally and might lead to unexpected behavior: param = 5 class Foo: def __init__(self, param): # "param" would be flagged here self.param = param Loading history...
177			self.__gaussians[index] = gaussian(self.__data, self.__means[index], self.__variances[index]);
178
179			for index_cluster in range(self.__amount_clusters):
180			for index_point in range(len(self.__data)):
181			self.__rc[index_cluster][index_point] = self.__probabilities(index_cluster, index_point);
182
183
184			def __maximization_step(self):
185			for index_cluster in range(self.__amount_clusters):
186			mc = numpy.sum(self.__rc[index_cluster]);
187
188			self.__pic[index_cluster] = mc / len(self.__data);
189			self.__means[index_cluster] = self.__update_mean(index_cluster, mc);
190
191			self.__variances[index_cluster] = self.__update_covariance(index_cluster, mc);
192
193
194			def __get_stop_flag(self):
195			for covariance in self.__variances:
196			if (min(covariance[0]) == 0):
197			return True;
198
199			return False;
200
201
202			def __update_covariance(self, index_cluster, mc):
203			covariance = 0.0;
204			for index_point in range(len(self.__data)):
205			deviation = numpy.array( [ self.__data[index_point] - self.__means[index_cluster] ]);
206			covariance += self.__rc[index_cluster][index_point] * deviation.T.dot(deviation);
207
208			covariance = covariance / mc;
209			return covariance;
210
211
212			def __update_mean(self, index_cluster, mc):
213			mean = 0.0;
214			for index_point in range(len(self.__data)):
215			mean += self.__rc[index_cluster][index_point] * self.__data[index_point];
216
217			mean = mean / mc;
218			return mean;
219
220
221			def __get_random_covariances(self, data, amount):
222			covariances = [];
223			covariance_appendixes = [];
224			data_covariance = numpy.cov(data, rowvar = False);
225			for _ in range(amount):
226			random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
227			while(random_appendix in covariance_appendixes):
228			random_appendix = numpy.min(data_covariance) * 0.5 * numpy.random.random();
229
230			covariance_appendixes.append(random_appendix)
231			covariances.append(data_covariance - random_appendix);
232
233			return covariances;
234
235
236			def __get_random_means(self, data, amount):
237			means = [];
238			mean_indexes = [];
239			for _ in range(amount):
240			random_index = numpy.random.randint(0, len(data));
241			while(random_index in mean_indexes):
242			random_index = numpy.random.randint(0, len(data));
243
244			mean_indexes.append(random_index);
245			means.append(numpy.array(data[random_index]));
246
247			return means;

annoviko / pyclustering

Push — 0.7.dev ( bfeadc...20409b )

ema B

Complexity

Size/Duplication

Importance

15 Methods

How to fix Complexity

Complex Class

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files