GeneticAlgorithm - Code Metrics - Inspection of "#360: The first step of implementation. Merge" - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( 941fee...d0e462 )

unknown

created 2017-09-06 17:09 UTC

GeneticAlgorithm B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	345
Duplicated Lines	13.33 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
dl	46
loc	345
rs	8.295
c	1
b	0
f	0
wmc	42

17 Methods

Rating	Name	Duplication	Size	Complexity
A	__init__()	0	19	1
A	get_chromosome_by_probability()	0	20	4
A	mutation()	0	18	3
A	crossover_a_pair()	0	9	3
A	calc_fitness_function()	22	22	3
A	get_crossover_mask()	0	14	1
A	set_last_value_to_one()	0	19	3
A	init_population()	0	7	1
A	calc_centers_for_chromosome()	0	12	2
A	get_best_chromosome()	0	15	1
B	calc_the_center()	0	25	4
B	clustering()	0	38	3
A	crossover()	0	23	2
B	get_uniform()	0	25	4
A	select()	0	21	2
B	calc_probability_vector()	24	24	3
A	get_centres()	0	12	2

How to fix Duplicated Code Complexity

"""!

@brief Clustering by Genetic Algorithm

@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3


class GeneticAlgorithm:
    """!
    @brief Class represents Genetic clustering algorithm

    """

    def __init__(self, data, count_clusters,  chromosome_count, population_count, count_mutation_gens=2):

        # Initialize random
        np.random.seed()

        # Clustering data
        self.data = data

        # Count clusters
        self.count_clusters = count_clusters

        # Home many chromosome in population
        self.chromosome_count = chromosome_count

        # How many populations
        self.population_count = population_count

        # Count mutation genes
        self.count_mutation_gens = count_mutation_gens

    def clustering(self):
        """

        :return:
        """

        # Initialize population
        chromosomes = self.init_population(self.count_clusters, len(self.data), self.chromosome_count)

        # Initialize the Best solution
        best_chromosome, best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)

        # Next population
        for _ in range(self.population_count):
            pass

            # Select
            chromosomes = self.select(chromosomes, self.data, self.count_clusters)

            # Crossover
            self.crossover(chromosomes)

            # Mutation
            self.mutation(chromosomes, self.count_clusters, self.count_mutation_gens)

            # Update the Best Solution
            new_best_chromosome, new_best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)

            print('new best_chromosome : ', new_best_chromosome)
            print('new best_ff : ', new_best_ff)

            if new_best_ff < best_ff:
                best_ff = new_best_ff
                best_chromosome = new_best_chromosome


        print('best_chromosome : ', best_chromosome)
        print('best_ff : ', best_ff)

    @staticmethod
    def mutation(chromosomes, count_clusters, count_gen_for_mutation):
        """  """

        # Count gens in Chromosome
        count_gens = len(chromosomes[0])

        #
        for _idx_chromosome in range(len(chromosomes)):

            #
            for _ in range(count_gen_for_mutation):

                # Get random gen
                gen_num = np.random.randint(count_gens)

                # Set random cluster
                chromosomes[_idx_chromosome][gen_num] = np.random.randint(count_clusters)

    @staticmethod
    def crossover(chromosomes):
        """  """

        # Get pairs to Crossover
        pairs_to_crossover = np.array(range(len(chromosomes)))

        # Set random pairs
        np.random.shuffle(pairs_to_crossover)

        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
        offset_in_pair = int(len(pairs_to_crossover) / 2)

        # For each pair
        for _idx in range(offset_in_pair):

            # Generate random mask for crossover
            crossover_mask = GeneticAlgorithm.get_crossover_mask(len(chromosomes[_idx]))

            # Crossover a pair
            GeneticAlgorithm.crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
                                              chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
                                              crossover_mask)

    @staticmethod
    def crossover_a_pair(chromosome_1, chromosome_2, mask):
        """  """

        for _idx in range(len(chromosome_1)):

            if mask[_idx] == 1:
                # Swap values
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]

    @staticmethod
    def get_crossover_mask(mask_length):
        """  """

        # Initialize mask
        mask = np.zeros(mask_length)

        # Set a half of array to 1
        mask[:int(int(mask_length) / 2)] = 1

        # Random shuffle
        np.random.shuffle(mask)

        return mask

    @staticmethod
    def select(chromosomes, data, count_clusters):
        """  """

        # Calc centers
        centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)

        # Calc fitness functions
        fitness = GeneticAlgorithm.calc_fitness_function(centres, data)

        # Calc probability vector
        probabilities = GeneticAlgorithm.calc_probability_vector(fitness)

        # Select P chromosomes with probabilities
        new_chromosomes = np.zeros(chromosomes.shape)

        # Selecting
        for _idx in range(len(chromosomes)):
            new_chromosomes[_idx] = chromosomes[GeneticAlgorithm.get_uniform(probabilities)]

        return new_chromosomes

    @staticmethod
    def set_last_value_to_one(probabilities):
        """!
        @brief Update the last same probabilities to one.
        @details All values of probability list equals to the last element are set to 1.
        """

        # Start from the last elem
        back_idx = - 1

        # All values equal to the last elem should be set to 1
        last_val = probabilities[back_idx]

        # for all elements or if a elem not equal to the last elem
        for _idx in range(-1, -len(probabilities) - 1):

            if probabilities[back_idx] == last_val:
                probabilities[back_idx] = 1
            else:
                break

    @staticmethod
    def get_uniform(probabilities):
        """!
        @brief Returns index in probabilities.

        @param[in] probabilities (list): List with segments in increasing sequence with val in [0, 1],
                   for example, [0 0.1 0.2 0.3 1.0].
        """

        # Initialize return value
        res_idx = None

        # Get random num in range [0, 1)
        random_num = np.random.rand()

        # Find segment with  val1 < random_num < val2
        for _idx in range(len(probabilities)):
            if random_num < probabilities[_idx]:
                res_idx = _idx
                break

        if res_idx is None:
            raise AttributeError("'probabilities' should contain 1 as the end of last segment(s)")

        return res_idx

    @staticmethod
    def get_chromosome_by_probability(probabilities):
        """ """

        # Initialize return value
        res_idx = None

        # Get uniform random in [0, 1)
        random_num = np.random.rand()

        # Find element with  val1 < random_num < val2
        for _idx in range(len(probabilities)):
            if random_num < probabilities[_idx]:
                res_idx = _idx
                break

        if res_idx is None:
            raise AttributeError("List 'probabilities' should contain 1 as the end of last segment(s)")

        return res_idx

    @staticmethod

    def calc_probability_vector(fitness):
        """  """

        if len(fitness) == 0:
            raise AttributeError("Has no any fitness functions.")

        # Initialize vector
        prob = np.zeros(len(fitness))

        # Get min element
        min_elem = np.min(fitness)

        # Initialize first element
        prob[0] = fitness[0] - min_elem

        # Accumulate values in probability vector
        for _idx in range(1, len(fitness)):
            prob[_idx] = prob[_idx - 1] + fitness[_idx] - min_elem

        # Normalize
        prob /= np.sum(fitness - min_elem)

        return prob

    @staticmethod
    def init_population(count_clusters, count_data, chromosome_count):
        """ Returns first population as a uniform random choice """

        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))

        return population

    @staticmethod
    def get_best_chromosome(chromosomes, data, count_clusters):
        """  """

        # Calc centers
        centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)

        # Calc Fitness functions
        fitness_function = GeneticAlgorithm.calc_fitness_function(centres, data)

        # Index of the best chromosome
        best_chromosome_idx = fitness_function.argmin()

        # Get chromosome with the best fitness function
        return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]

    @staticmethod

    def calc_fitness_function(centres, data):
        """  """

        # Get count of chromosomes and clusters
        count_chromosome = len(centres)
        count_clusters = len(centres[0])

        # Initialize fitness function values
        fitness_function = np.zeros(count_chromosome)

        # Calc fitness function for each chromosome
        for _idx_chromosome in range(count_chromosome):

            # Calc for each cluster in a chromosome
            for _idx_center in range(count_clusters):
                fitness_function[_idx_chromosome] += np.linalg.norm(data - centres[_idx_chromosome][_idx_center])

            # Normalize fitness function
            fitness_function[_idx_chromosome] /= count_clusters

        return fitness_function

    @staticmethod
    def get_centres(chromosomes, data, count_clusters):
        """ """

        # Initialize centres
        centres = np.zeros((len(chromosomes), count_clusters, len(data[0])))

        # Calc centers for next chromosome
        for _idx in range(len(chromosomes)):
            centres[_idx] = GeneticAlgorithm.calc_centers_for_chromosome(chromosomes[_idx], data, count_clusters)

        return centres

    @staticmethod
    def calc_centers_for_chromosome(chromosome, data, count_clusters):
        """ """

        # Initialize centers
        centers = np.zeros((count_clusters, len(data[0])))

        # Next cluster
        for _idx_cluster in range(count_clusters):
            centers[_idx_cluster] = GeneticAlgorithm.calc_the_center(chromosome, data, _idx_cluster)

        return centers

    @staticmethod
    def calc_the_center(chromosome, data, cluster_num):
        """ """

        # Initialize center
        center = np.zeros(len(data[0]))

        # Get count data in clusters
        count_data_in_cluster = np.sum(chromosome)

        # If has no data in cluster
        if count_data_in_cluster == 0:
            return center

        # Next data point
        for _idx in range(len(chromosome)):

            # If data associated with current cluster
            if chromosome[_idx] == cluster_num:
                center += data[_idx]

        # Normalize center
        center /= count_data_in_cluster

        return center


# --------------------------  Unit tests  -----------------------------------

# # Count Clusters and Data points
# COUNT_CHROMOSOMES = 4
# COUNT_CLUSTERS = 4
# COUNT_DATA_POINTS = 10
# DATA_DIMENSION = 2
#
# # Chromosome for test
# test_chromosomes = np.random.randint(COUNT_CLUSTERS, size=(COUNT_CHROMOSOMES, COUNT_DATA_POINTS))
#
# # Data points
# test_data = np.random.rand(COUNT_DATA_POINTS, DATA_DIMENSION)
#
# # Current cluster
# test_cluster_num = 2
#
# test_center = GeneticAlgorithm.get_centres(test_chromosomes, test_data, test_cluster_num)
#
# test_fitness = GeneticAlgorithm.calc_fitness_function(test_center, test_data)
#
# print('chromosome : ', test_chromosomes)
# print('data : ', test_data)
# print('center : ', test_center)
#
# print('center shape : ', test_center.shape)
#
# print('subtract : ', test_data - test_center[0][0])
#
# print('test_fitness : ', test_fitness)
#
# a = np.zeros(10)
# a[0] = -1
# a[1] = -1
#
# print('test_fitness min: ', GeneticAlgorithm.get_best_chromosome(test_chromosomes, test_data, COUNT_CLUSTERS))
#
# GeneticAlgorithm.crossover(test_chromosomes)


COUNT_CHROMOSOMES = 20
COUNT_CLUSTERS = 4
COUNT_POPULATIONS = 20
COUNT_MUTATIONS_GEN = 2

data_set_1 = []
data_set_1.extend([[0, 0], [1, 0], [0, 1], [1, 1]])
data_set_1.extend([[5, 0], [6, 0], [5, 1], [6, 1]])
data_set_1.extend([[0, 5], [1, 5], [0, 6], [1, 6]])
data_set_1.extend([[4, 4], [7, 4], [4, 7], [7, 7]])

test_data_2 = np.array(data_set_1)

GeneticAlgorithm(data=test_data_2,
                 count_clusters=COUNT_CLUSTERS,
                 chromosome_count=COUNT_CHROMOSOMES,
                 population_count=COUNT_POPULATIONS,
                 count_mutation_gens=COUNT_MUTATIONS_GEN).clustering()


Push — 0.7.dev ( 941fee...d0e462 )

GeneticAlgorithm B

Complexity

Size/Duplication

Importance

17 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complex Class

1. Missing Dependencies

2. Missing init.py files

1		"""!
2
3		@brief Clustering by Genetic Algorithm
4
5		@date 2014-2017
6		@copyright GNU Public License
7
8		@cond GNU_PUBLIC_LICENSE
9		PyClustering is free software: you can redistribute it and/or modify
10		it under the terms of the GNU General Public License as published by
11		the Free Software Foundation, either version 3 of the License, or
12		(at your option) any later version.
13
14		PyClustering is distributed in the hope that it will be useful,
15		but WITHOUT ANY WARRANTY; without even the implied warranty of
16		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17		GNU General Public License for more details.
18
19		You should have received a copy of the GNU General Public License
20		along with this program. If not, see <http://www.gnu.org/licenses/>.
21		@endcond
22
23		"""
24
25		import numpy as np
		0 ignored issues – show Configuration introduced 2017-09-06 17:10 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
26
27
28		class GeneticAlgorithm:
29		"""!
30		@brief Class represents Genetic clustering algorithm
31
32		"""
33
34		def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2):
35
36		# Initialize random
37		np.random.seed()
38
39		# Clustering data
40		self.data = data
41
42		# Count clusters
43		self.count_clusters = count_clusters
44
45		# Home many chromosome in population
46		self.chromosome_count = chromosome_count
47
48		# How many populations
49		self.population_count = population_count
50
51		# Count mutation genes
52		self.count_mutation_gens = count_mutation_gens
53
54		def clustering(self):
55		"""
56
57		:return:
58		"""
59
60		# Initialize population
61		chromosomes = self.init_population(self.count_clusters, len(self.data), self.chromosome_count)
62
63		# Initialize the Best solution
64		best_chromosome, best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)
65
66		# Next population
67		for _ in range(self.population_count):
68		pass
69
70		# Select
71		chromosomes = self.select(chromosomes, self.data, self.count_clusters)
72
73		# Crossover
74		self.crossover(chromosomes)
75
76		# Mutation
77		self.mutation(chromosomes, self.count_clusters, self.count_mutation_gens)
78
79		# Update the Best Solution
80		new_best_chromosome, new_best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)
81
82		print('new best_chromosome : ', new_best_chromosome)
83		print('new best_ff : ', new_best_ff)
84
85		if new_best_ff < best_ff:
86		best_ff = new_best_ff
87		best_chromosome = new_best_chromosome
88
89
90		print('best_chromosome : ', best_chromosome)
91		print('best_ff : ', best_ff)
92
93		@staticmethod
94		def mutation(chromosomes, count_clusters, count_gen_for_mutation):
95		""" """
96
97		# Count gens in Chromosome
98		count_gens = len(chromosomes[0])
99
100		#
101		for _idx_chromosome in range(len(chromosomes)):
102
103		#
104		for _ in range(count_gen_for_mutation):
105
106		# Get random gen
107		gen_num = np.random.randint(count_gens)
108
109		# Set random cluster
110		chromosomes[_idx_chromosome][gen_num] = np.random.randint(count_clusters)
111
112		@staticmethod
113		def crossover(chromosomes):
114		""" """
115
116		# Get pairs to Crossover
117		pairs_to_crossover = np.array(range(len(chromosomes)))
118
119		# Set random pairs
120		np.random.shuffle(pairs_to_crossover)
121
122		# Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. \| P1, P2, ...] crossover between V<->P)
123		offset_in_pair = int(len(pairs_to_crossover) / 2)
124
125		# For each pair
126		for _idx in range(offset_in_pair):
127
128		# Generate random mask for crossover
129		crossover_mask = GeneticAlgorithm.get_crossover_mask(len(chromosomes[_idx]))
130
131		# Crossover a pair
132		GeneticAlgorithm.crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
133		chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
134		crossover_mask)
135
136		@staticmethod
137		def crossover_a_pair(chromosome_1, chromosome_2, mask):
138		""" """
139
140		for _idx in range(len(chromosome_1)):
141
142		if mask[_idx] == 1:
143		# Swap values
144		chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
145
146		@staticmethod
147		def get_crossover_mask(mask_length):
148		""" """
149
150		# Initialize mask
151		mask = np.zeros(mask_length)
152
153		# Set a half of array to 1
154		mask[:int(int(mask_length) / 2)] = 1
155
156		# Random shuffle
157		np.random.shuffle(mask)
158
159		return mask
160
161		@staticmethod
162		def select(chromosomes, data, count_clusters):
163		""" """
164
165		# Calc centers
166		centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)
167
168		# Calc fitness functions
169		fitness = GeneticAlgorithm.calc_fitness_function(centres, data)
170
171		# Calc probability vector
172		probabilities = GeneticAlgorithm.calc_probability_vector(fitness)
173
174		# Select P chromosomes with probabilities
175		new_chromosomes = np.zeros(chromosomes.shape)
176
177		# Selecting
178		for _idx in range(len(chromosomes)):
179		new_chromosomes[_idx] = chromosomes[GeneticAlgorithm.get_uniform(probabilities)]
180
181		return new_chromosomes
182
183		@staticmethod
184		def set_last_value_to_one(probabilities):
185		"""!
186		@brief Update the last same probabilities to one.
187		@details All values of probability list equals to the last element are set to 1.
188		"""
189
190		# Start from the last elem
191		back_idx = - 1
192
193		# All values equal to the last elem should be set to 1
194		last_val = probabilities[back_idx]
195
196		# for all elements or if a elem not equal to the last elem
197		for _idx in range(-1, -len(probabilities) - 1):
		0 ignored issues – show Unused Code introduced 2017-09-01 15:07 UTC by Report Bug Copy Issue Report The variable `_idx` seems to be unused. Loading history...
198		if probabilities[back_idx] == last_val:
199		probabilities[back_idx] = 1
200		else:
201		break
202
203		@staticmethod
204		def get_uniform(probabilities):
205		"""!
206		@brief Returns index in probabilities.
207
208		@param[in] probabilities (list): List with segments in increasing sequence with val in [0, 1],
209		for example, [0 0.1 0.2 0.3 1.0].
210		"""
211
212		# Initialize return value
213		res_idx = None
214
215		# Get random num in range [0, 1)
216		random_num = np.random.rand()
217
218		# Find segment with val1 < random_num < val2
219		for _idx in range(len(probabilities)):
220		if random_num < probabilities[_idx]:
221		res_idx = _idx
222		break
223
224		if res_idx is None:
225		raise AttributeError("'probabilities' should contain 1 as the end of last segment(s)")
226
227		return res_idx
228
229		@staticmethod
230		def get_chromosome_by_probability(probabilities):
231		""" """
232
233		# Initialize return value
234		res_idx = None
235
236		# Get uniform random in [0, 1)
237		random_num = np.random.rand()
238
239		# Find element with val1 < random_num < val2
240		for _idx in range(len(probabilities)):
241		if random_num < probabilities[_idx]:
242		res_idx = _idx
243		break
244
245		if res_idx is None:
246		raise AttributeError("List 'probabilities' should contain 1 as the end of last segment(s)")
247
248		return res_idx
249
250	View Code Duplication	@staticmethod
		1 ignored issue – show Duplication introduced 2017-09-06 17:10 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
251		def calc_probability_vector(fitness):
252		""" """
253
254		if len(fitness) == 0:
255		raise AttributeError("Has no any fitness functions.")
256
257		# Initialize vector
258		prob = np.zeros(len(fitness))
259
260		# Get min element
261		min_elem = np.min(fitness)
262
263		# Initialize first element
264		prob[0] = fitness[0] - min_elem
265
266		# Accumulate values in probability vector
267		for _idx in range(1, len(fitness)):
268		prob[_idx] = prob[_idx - 1] + fitness[_idx] - min_elem
269
270		# Normalize
271		prob /= np.sum(fitness - min_elem)
272
273		return prob
274
275		@staticmethod
276		def init_population(count_clusters, count_data, chromosome_count):
277		""" Returns first population as a uniform random choice """
278
279		population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
280
281		return population
282
283		@staticmethod
284		def get_best_chromosome(chromosomes, data, count_clusters):
285		""" """
286
287		# Calc centers
288		centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)
289
290		# Calc Fitness functions
291		fitness_function = GeneticAlgorithm.calc_fitness_function(centres, data)
292
293		# Index of the best chromosome
294		best_chromosome_idx = fitness_function.argmin()
295
296		# Get chromosome with the best fitness function
297		return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]
298
299	View Code Duplication	@staticmethod
		1 ignored issue – show Duplication introduced 2017-09-06 17:10 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
300		def calc_fitness_function(centres, data):
301		""" """
302
303		# Get count of chromosomes and clusters
304		count_chromosome = len(centres)
305		count_clusters = len(centres[0])
306
307		# Initialize fitness function values
308		fitness_function = np.zeros(count_chromosome)
309
310		# Calc fitness function for each chromosome
311		for _idx_chromosome in range(count_chromosome):
312
313		# Calc for each cluster in a chromosome
314		for _idx_center in range(count_clusters):
315		fitness_function[_idx_chromosome] += np.linalg.norm(data - centres[_idx_chromosome][_idx_center])
316
317		# Normalize fitness function
318		fitness_function[_idx_chromosome] /= count_clusters
319
320		return fitness_function
321
322		@staticmethod
323		def get_centres(chromosomes, data, count_clusters):
324		""" """
325
326		# Initialize centres
327		centres = np.zeros((len(chromosomes), count_clusters, len(data[0])))
328
329		# Calc centers for next chromosome
330		for _idx in range(len(chromosomes)):
331		centres[_idx] = GeneticAlgorithm.calc_centers_for_chromosome(chromosomes[_idx], data, count_clusters)
332
333		return centres
334
335		@staticmethod
336		def calc_centers_for_chromosome(chromosome, data, count_clusters):
337		""" """
338
339		# Initialize centers
340		centers = np.zeros((count_clusters, len(data[0])))
341
342		# Next cluster
343		for _idx_cluster in range(count_clusters):
344		centers[_idx_cluster] = GeneticAlgorithm.calc_the_center(chromosome, data, _idx_cluster)
345
346		return centers
347
348		@staticmethod
349		def calc_the_center(chromosome, data, cluster_num):
350		""" """
351
352		# Initialize center
353		center = np.zeros(len(data[0]))
354
355		# Get count data in clusters
356		count_data_in_cluster = np.sum(chromosome)
357
358		# If has no data in cluster
359		if count_data_in_cluster == 0:
360		return center
361
362		# Next data point
363		for _idx in range(len(chromosome)):
364
365		# If data associated with current cluster
366		if chromosome[_idx] == cluster_num:
367		center += data[_idx]
368
369		# Normalize center
370		center /= count_data_in_cluster
371
372		return center
373
374
375		# -------------------------- Unit tests -----------------------------------
376
377		# # Count Clusters and Data points
378		# COUNT_CHROMOSOMES = 4
379		# COUNT_CLUSTERS = 4
380		# COUNT_DATA_POINTS = 10
381		# DATA_DIMENSION = 2
382		#
383		# # Chromosome for test
384		# test_chromosomes = np.random.randint(COUNT_CLUSTERS, size=(COUNT_CHROMOSOMES, COUNT_DATA_POINTS))
385		#
386		# # Data points
387		# test_data = np.random.rand(COUNT_DATA_POINTS, DATA_DIMENSION)
388		#
389		# # Current cluster
390		# test_cluster_num = 2
391		#
392		# test_center = GeneticAlgorithm.get_centres(test_chromosomes, test_data, test_cluster_num)
393		#
394		# test_fitness = GeneticAlgorithm.calc_fitness_function(test_center, test_data)
395		#
396		# print('chromosome : ', test_chromosomes)
397		# print('data : ', test_data)
398		# print('center : ', test_center)
399		#
400		# print('center shape : ', test_center.shape)
401		#
402		# print('subtract : ', test_data - test_center[0][0])
403		#
404		# print('test_fitness : ', test_fitness)
405		#
406		# a = np.zeros(10)
407		# a[0] = -1
408		# a[1] = -1
409		#
410		# print('test_fitness min: ', GeneticAlgorithm.get_best_chromosome(test_chromosomes, test_data, COUNT_CLUSTERS))
411		#
412		# GeneticAlgorithm.crossover(test_chromosomes)
413
414
415		COUNT_CHROMOSOMES = 20
416		COUNT_CLUSTERS = 4
417		COUNT_POPULATIONS = 20
418		COUNT_MUTATIONS_GEN = 2
419
420		data_set_1 = []
421		data_set_1.extend([[0, 0], [1, 0], [0, 1], [1, 1]])
422		data_set_1.extend([[5, 0], [6, 0], [5, 1], [6, 1]])
423		data_set_1.extend([[0, 5], [1, 5], [0, 6], [1, 6]])
424		data_set_1.extend([[4, 4], [7, 4], [4, 7], [7, 7]])
425
426		test_data_2 = np.array(data_set_1)
427
428		GeneticAlgorithm(data=test_data_2,
429		count_clusters=COUNT_CLUSTERS,
430		chromosome_count=COUNT_CHROMOSOMES,
431		population_count=COUNT_POPULATIONS,
432		count_mutation_gens=COUNT_MUTATIONS_GEN).clustering()
433

annoviko / pyclustering

Push — 0.7.dev ( 941fee...d0e462 )

GeneticAlgorithm B

Complexity

Size/Duplication

Importance

17 Methods

How to fix Duplicated Code Complexity

Duplicated Code

Complex Class

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files