GeneticAlgorithm - Code Metrics - Inspection of "#360 [pyclustering.cluster.cga] Clustering Genetic..." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( f0d749...68d2d8 )

unknown

created 2017-09-08 16:23 UTC

GeneticAlgorithm A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	222
Duplicated Lines	35.59 %

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
c	2
b	0
f	0
dl	79
loc	222
rs	10
wmc	23

10 Methods

Rating	Name	Duplication	Size	Complexity
B	__init__()	0	29	2
B	_calc_fitness_function()	34	34	5
A	_init_population()	0	7	1
A	_crossover()	23	23	2
B	clustering()	0	33	3
A	_get_crossover_mask()	0	14	1
A	_mutation()	22	22	3
A	_select()	0	21	2
A	_crossover_a_pair()	0	9	3
A	_get_best_chromosome()	0	15	1

How to fix Duplicated Code

"""!

@brief Clustering by Genetic Algorithm

@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import math


from pyclustering.cluster.ga_maths import GAMath


class GeneticAlgorithm:
    """!
    @brief Class represents Genetic clustering algorithm

    """

    def __init__(self, data, count_clusters,  chromosome_count, population_count, count_mutation_gens=2,
                 coeff_mutation_count=0.25):

        # Initialize random
        np.random.seed()

        # Clustering data
        if type(data) is list:
            self.data = np.array(data)
        else:
            self.data = data

        # Count clusters
        self.count_clusters = count_clusters

        # Home many chromosome in population
        self.chromosome_count = chromosome_count

        # How many populations
        self.population_count = population_count

        # Count mutation genes
        self.count_mutation_gens = count_mutation_gens

        # Crossover rate
        self.crossover_rate = 1.0

        # Count of chromosome for mutation (range [0, 1])
        self.coeff_mutation_count = coeff_mutation_count

    def clustering(self):
        """

        :return:
        """

        # Initialize population
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)

        # Initialize the Best solution
        best_chromosome, best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

        # Next population
        for _ in range(self.population_count):

            # Select
            chromosomes = self._select(chromosomes, self.data, self.count_clusters)

            # Crossover
            self._crossover(chromosomes)

            # Mutation
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)

            # Update the Best Solution
            new_best_chromosome, new_best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

            # Get best chromosome
            if new_best_ff < best_ff:
                best_ff = new_best_ff
                best_chromosome = new_best_chromosome

        return best_chromosome, best_ff

    @staticmethod
    def _select(chromosomes, data, count_clusters):
        """  """

        # Calc centers
        centres = GAMath.get_centres(chromosomes, data, count_clusters)

        # Calc fitness functions
        fitness = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)

        # Calc probability vector
        probabilities = GAMath.calc_probability_vector(fitness)

        # Select P chromosomes with probabilities
        new_chromosomes = np.zeros(chromosomes.shape)

        # Selecting
        for _idx in range(len(chromosomes)):
            new_chromosomes[_idx] = chromosomes[GAMath.get_uniform(probabilities)]

        return new_chromosomes

    @staticmethod

    def _crossover(chromosomes):
        """  """

        # Get pairs to Crossover
        pairs_to_crossover = np.array(range(len(chromosomes)))

        # Set random pairs
        np.random.shuffle(pairs_to_crossover)

        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
        offset_in_pair = int(len(pairs_to_crossover) / 2)

        # For each pair
        for _idx in range(offset_in_pair):

            # Generate random mask for crossover
            crossover_mask = GeneticAlgorithm._get_crossover_mask(len(chromosomes[_idx]))

            # Crossover a pair
            GeneticAlgorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
                                               chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
                                               crossover_mask)

    @staticmethod

    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
        """  """

        # Count gens in Chromosome
        count_gens = len(chromosomes[0])

        # Get random chromosomes for mutation
        random_idx_chromosomes = np.array(range(len(chromosomes)))
        np.random.shuffle(random_idx_chromosomes)

        #
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):

            #
            for _ in range(count_gen_for_mutation):

                # Get random gen
                gen_num = np.random.randint(count_gens)

                # Set random cluster
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)

    @staticmethod
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
        """  """

        for _idx in range(len(chromosome_1)):

            if mask[_idx] == 1:
                # Swap values
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]

    @staticmethod
    def _get_crossover_mask(mask_length):
        """  """

        # Initialize mask
        mask = np.zeros(mask_length)

        # Set a half of array to 1
        mask[:int(int(mask_length) / 6)] = 1

        # Random shuffle
        np.random.shuffle(mask)

        return mask

    @staticmethod
    def _init_population(count_clusters, count_data, chromosome_count):
        """ Returns first population as a uniform random choice """

        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))

        return population

    @staticmethod
    def _get_best_chromosome(chromosomes, data, count_clusters):
        """  """

        # Calc centers
        centres = GAMath.get_centres(chromosomes, data, count_clusters)

        # Calc Fitness functions
        fitness_function = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)

        # Index of the best chromosome
        best_chromosome_idx = fitness_function.argmin()

        # Get chromosome with the best fitness function
        return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]

    @staticmethod

    def _calc_fitness_function(centres, data, chromosomes):
        """  """

        # Get count of chromosomes and clusters
        count_chromosome = len(chromosomes)
        count_clusters = len(centres[0])

        # Initialize fitness function values
        fitness_function = np.zeros(count_chromosome)

        # Calc fitness function for each chromosome
        for _idx_chromosome in range(count_chromosome):

            # Calc fitness function for each cluster in a chromosome
            for _idx_cluster in range(count_clusters):

                # Initialize ff for a cluster
                ff_cluster = 0

                # For each input points
                for _idx_data in range(len(data)):

                    # If a data belong to current cluster
                    if chromosomes[_idx_chromosome][_idx_data] == _idx_cluster:

                        # Calc Manhattan distance
                        data_diff = data[_idx_data] - centres[_idx_chromosome][_idx_cluster]
                        ff_cluster += abs(data_diff[0]) + abs(data_diff[1])

                # Accumulate fitness function values
                fitness_function[_idx_chromosome] += ff_cluster

        return fitness_function


Push — 0.7.dev ( f0d749...68d2d8 )

GeneticAlgorithm A

Complexity

Size/Duplication

Importance

10 Methods

How to fix Duplicated Code

Duplicated Code

1. Missing Dependencies

2. Missing init.py files

1		"""!
2
3		@brief Clustering by Genetic Algorithm
4
5		@date 2014-2017
6		@copyright GNU Public License
7
8		@cond GNU_PUBLIC_LICENSE
9		PyClustering is free software: you can redistribute it and/or modify
10		it under the terms of the GNU General Public License as published by
11		the Free Software Foundation, either version 3 of the License, or
12		(at your option) any later version.
13
14		PyClustering is distributed in the hope that it will be useful,
15		but WITHOUT ANY WARRANTY; without even the implied warranty of
16		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17		GNU General Public License for more details.
18
19		You should have received a copy of the GNU General Public License
20		along with this program. If not, see <http://www.gnu.org/licenses/>.
21		@endcond
22
23		"""
24
25		import numpy as np
		0 ignored issues – show Configuration introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
26		import math
		0 ignored issues – show Unused Code introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report The import `math` seems to be unused. Loading history...
27
28		from pyclustering.cluster.ga_maths import GAMath
29
30
31		class GeneticAlgorithm:
32		"""!
33		@brief Class represents Genetic clustering algorithm
34
35		"""
36
37		def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
38		coeff_mutation_count=0.25):
39
40		# Initialize random
41		np.random.seed()
42
43		# Clustering data
44		if type(data) is list:
45		self.data = np.array(data)
46		else:
47		self.data = data
48
49		# Count clusters
50		self.count_clusters = count_clusters
51
52		# Home many chromosome in population
53		self.chromosome_count = chromosome_count
54
55		# How many populations
56		self.population_count = population_count
57
58		# Count mutation genes
59		self.count_mutation_gens = count_mutation_gens
60
61		# Crossover rate
62		self.crossover_rate = 1.0
63
64		# Count of chromosome for mutation (range [0, 1])
65		self.coeff_mutation_count = coeff_mutation_count
66
67		def clustering(self):
68		"""
69
70		:return:
71		"""
72
73		# Initialize population
74		chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
75
76		# Initialize the Best solution
77		best_chromosome, best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
78
79		# Next population
80		for _ in range(self.population_count):
81
82		# Select
83		chromosomes = self._select(chromosomes, self.data, self.count_clusters)
84
85		# Crossover
86		self._crossover(chromosomes)
87
88		# Mutation
89		self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
90
91		# Update the Best Solution
92		new_best_chromosome, new_best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
93
94		# Get best chromosome
95		if new_best_ff < best_ff:
96		best_ff = new_best_ff
97		best_chromosome = new_best_chromosome
98
99		return best_chromosome, best_ff
100
101		@staticmethod
102		def _select(chromosomes, data, count_clusters):
103		""" """
104
105		# Calc centers
106		centres = GAMath.get_centres(chromosomes, data, count_clusters)
107
108		# Calc fitness functions
109		fitness = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)
110
111		# Calc probability vector
112		probabilities = GAMath.calc_probability_vector(fitness)
113
114		# Select P chromosomes with probabilities
115		new_chromosomes = np.zeros(chromosomes.shape)
116
117		# Selecting
118		for _idx in range(len(chromosomes)):
119		new_chromosomes[_idx] = chromosomes[GAMath.get_uniform(probabilities)]
120
121		return new_chromosomes
122
123	View Code Duplication	@staticmethod
		0 ignored issues – show Duplication introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
124		def _crossover(chromosomes):
125		""" """
126
127		# Get pairs to Crossover
128		pairs_to_crossover = np.array(range(len(chromosomes)))
129
130		# Set random pairs
131		np.random.shuffle(pairs_to_crossover)
132
133		# Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. \| P1, P2, ...] crossover between V<->P)
134		offset_in_pair = int(len(pairs_to_crossover) / 2)
135
136		# For each pair
137		for _idx in range(offset_in_pair):
138
139		# Generate random mask for crossover
140		crossover_mask = GeneticAlgorithm._get_crossover_mask(len(chromosomes[_idx]))
141
142		# Crossover a pair
143		GeneticAlgorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
144		chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
145		crossover_mask)
146
147	View Code Duplication	@staticmethod
		0 ignored issues – show Duplication introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
148		def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
149		""" """
150
151		# Count gens in Chromosome
152		count_gens = len(chromosomes[0])
153
154		# Get random chromosomes for mutation
155		random_idx_chromosomes = np.array(range(len(chromosomes)))
156		np.random.shuffle(random_idx_chromosomes)
157
158		#
159		for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
160
161		#
162		for _ in range(count_gen_for_mutation):
163
164		# Get random gen
165		gen_num = np.random.randint(count_gens)
166
167		# Set random cluster
168		chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
169
170		@staticmethod
171		def _crossover_a_pair(chromosome_1, chromosome_2, mask):
172		""" """
173
174		for _idx in range(len(chromosome_1)):
175
176		if mask[_idx] == 1:
177		# Swap values
178		chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
179
180		@staticmethod
181		def _get_crossover_mask(mask_length):
182		""" """
183
184		# Initialize mask
185		mask = np.zeros(mask_length)
186
187		# Set a half of array to 1
188		mask[:int(int(mask_length) / 6)] = 1
189
190		# Random shuffle
191		np.random.shuffle(mask)
192
193		return mask
194
195		@staticmethod
196		def _init_population(count_clusters, count_data, chromosome_count):
197		""" Returns first population as a uniform random choice """
198
199		population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
200
201		return population
202
203		@staticmethod
204		def _get_best_chromosome(chromosomes, data, count_clusters):
205		""" """
206
207		# Calc centers
208		centres = GAMath.get_centres(chromosomes, data, count_clusters)
209
210		# Calc Fitness functions
211		fitness_function = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)
212
213		# Index of the best chromosome
214		best_chromosome_idx = fitness_function.argmin()
215
216		# Get chromosome with the best fitness function
217		return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]
218
219	View Code Duplication	@staticmethod
		1 ignored issue – show Duplication introduced 2017-09-06 17:10 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
220		def _calc_fitness_function(centres, data, chromosomes):
221		""" """
222
223		# Get count of chromosomes and clusters
224		count_chromosome = len(chromosomes)
225		count_clusters = len(centres[0])
226
227		# Initialize fitness function values
228		fitness_function = np.zeros(count_chromosome)
229
230		# Calc fitness function for each chromosome
231		for _idx_chromosome in range(count_chromosome):
232
233		# Calc fitness function for each cluster in a chromosome
234		for _idx_cluster in range(count_clusters):
235
236		# Initialize ff for a cluster
237		ff_cluster = 0
238
239		# For each input points
240		for _idx_data in range(len(data)):
241
242		# If a data belong to current cluster
243		if chromosomes[_idx_chromosome][_idx_data] == _idx_cluster:
244
245		# Calc Manhattan distance
246		data_diff = data[_idx_data] - centres[_idx_chromosome][_idx_cluster]
247		ff_cluster += abs(data_diff[0]) + abs(data_diff[1])
248
249		# Accumulate fitness function values
250		fitness_function[_idx_chromosome] += ff_cluster
251
252		return fitness_function
253

annoviko / pyclustering

Push — 0.7.dev ( f0d749...68d2d8 )

GeneticAlgorithm A

Complexity

Size/Duplication

Importance

10 Methods

How to fix Duplicated Code

Duplicated Code

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files