genetic_algorithm.process() - Code Metrics - Inspection of "Merge" - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( 20409b...5de18e )

unknown

created 2017-09-14 10:13 UTC

genetic_algorithm.process() A

↳ Parent: genetic_algorithm

Complexity

Conditions

Size

Total Lines

Duplication

Lines	23
Ratio	45.1 %

Importance

Changes

Metric	Value
cc	3
dl	23
loc	51
rs	9.4109
c	0
b	0
f	0

How to fix Long Method

"""!

@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).

@authors Aleksey Kukushkin ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import math

from pyclustering.cluster.ga_maths import ga_math
from pyclustering.cluster.ga_observer import genetic_algorithm_observer


class genetic_algorithm:
    """!
    @brief Class represents Genetic clustering algorithm.
    @details The searching capability of genetic algorithms is exploited in order to search for appropriate
             cluster centres.

    """

    def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
                 coeff_mutation_count=0.25, select_coeff=1.0, observer=genetic_algorithm_observer()):
        """!
        @brief Initialize genetic clustering algorithm for cluster analysis.
        
        @param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array
                    where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
        @param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
        @param[in] chromosome_count (uint): Amount of chromosomes in each population.
        @param[in] population_count (uint): Amount of populations.
        @param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
        @param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
                    thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
        @param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
                   math.exp(1 + fitness(chromosome) * select_coeff).
        
        """
        
        # Initialize random
        np.random.seed()

        # Clustering data
        if type(data) is list:
            self.data = np.array(data)
        else:
            self.data = data

        # Count clusters
        self.count_clusters = count_clusters

        # Home many chromosome in population
        self.chromosome_count = chromosome_count

        # How many populations
        self.population_count = population_count

        # Count mutation genes
        self.count_mutation_gens = count_mutation_gens

        # Crossover rate
        self.crossover_rate = 1.0

        # Count of chromosome for mutation (range [0, 1])
        self.coeff_mutation_count = coeff_mutation_count

        # Exponential coeff for selection
        self.select_coeff = select_coeff

        # Result of clustering : best chromosome
        self.result_clustering = {'best_chromosome': [],
                                  'best_fitness_function': 0.0}

        # Observer
        self.observer = observer

    def process(self):
        """!
        @brief Perform clustering procedure in line with rule of genetic clustering algorithm.
        
        @see get_clusters()
        
        """

        # Initialize population
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)

        # Initialize the Best solution
        best_chromosome, best_ff, first_fitness_functions \
            = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

        # Save best result into observer
        self.observer.collect_global_best(best_chromosome, best_ff)
        self.observer.collect_population_best(best_chromosome, best_ff)
        self.observer.collect_mean(first_fitness_functions)

        # Next population
        for _idx in range(self.population_count):


            # Select
            chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)

            # Crossover
            self._crossover(chromosomes)


            # Mutation
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)

            # Update the Best Solution
            new_best_chromosome, new_best_ff, fitness_functions \
                = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

            # Get best chromosome
            if new_best_ff < best_ff:
                best_ff = new_best_ff
                best_chromosome = new_best_chromosome

            # Save best result into observer
            self.observer.collect_global_best(best_chromosome, best_ff)
            self.observer.collect_population_best(new_best_chromosome, new_best_ff)
            self.observer.collect_mean(fitness_functions)

        # Save result
        self.result_clustering['best_chromosome'] = best_chromosome
        self.result_clustering['best_fitness_function'] = best_ff

        return best_chromosome, best_ff


    def get_observer(self):
        return self.observer

    def get_clusters(self):
        """!
        @brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
        
        @return (list) List of allocated clusters.
        
        @see process()
        
        """

        return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)

    @staticmethod
    def _select(chromosomes, data, count_clusters, select_coeff):
        """!
        @brief Performs selection procedure where new chromosomes are calculated.
        
        @param[in] chromosomes (numpy.array): Chromosomes 
        
        """

        # Calc centers
        centres = ga_math.get_centres(chromosomes, data, count_clusters)

        # Calc fitness functions
        fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)

        for _idx in range(len(fitness)):
            fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)

        # Calc probability vector
        probabilities = ga_math.calc_probability_vector(fitness)

        # Select P chromosomes with probabilities
        new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)

        # Selecting
        for _idx in range(len(chromosomes)):
            new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]

        return new_chromosomes

    @staticmethod
    def _crossover(chromosomes):
        """!
        @brief Crossover procedure.
        
        """

        # Get pairs to Crossover
        pairs_to_crossover = np.array(range(len(chromosomes)))

        # Set random pairs
        np.random.shuffle(pairs_to_crossover)

        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
        offset_in_pair = int(len(pairs_to_crossover) / 2)

        # For each pair
        for _idx in range(offset_in_pair):

            # Generate random mask for crossover
            crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))

            # Crossover a pair
            genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
                                                chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
                                                crossover_mask)

    @staticmethod
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
        """!
        @brief Mutation procedure.
        
        """

        # Count gens in Chromosome
        count_gens = len(chromosomes[0])

        # Get random chromosomes for mutation
        random_idx_chromosomes = np.array(range(len(chromosomes)))
        np.random.shuffle(random_idx_chromosomes)

        #
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):

            #
            for _ in range(count_gen_for_mutation):

                # Get random gen
                gen_num = np.random.randint(count_gens)

                # Set random cluster
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)

    @staticmethod
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
        """!
        @brief Crossovers a pair of chromosomes.
        
        @param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
        @param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
        @param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
        
        """

        for _idx in range(len(chromosome_1)):

            if mask[_idx] == 1:
                # Swap values
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]

    @staticmethod
    def _get_crossover_mask(mask_length):
        """!
        @brief Crossover mask to crossover a pair of chromosomes.
        
        @param[in] mask_length (uint): Length of the mask.
        
        """

        # Initialize mask
        mask = np.zeros(mask_length)

        # Set a half of array to 1
        mask[:int(int(mask_length) / 6)] = 1

        # Random shuffle
        np.random.shuffle(mask)

        return mask

    @staticmethod
    def _init_population(count_clusters, count_data, chromosome_count):
        """!
        @brief Returns first population as a uniform random choice.
        
        @param[in] count_clusters (uint):
        @param[in] count_data (uint):
        @param[in] chromosome_count (uint): 
        
        """

        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))

        return population

    @staticmethod
    def _get_best_chromosome(chromosomes, data, count_clusters):
        """!
        @brief 
        
        """

        # Calc centers
        centres = ga_math.get_centres(chromosomes, data, count_clusters)

        # Calc Fitness functions
        fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)

        # Index of the best chromosome
        best_chromosome_idx = fitness_functions.argmin()

        # Get chromosome with the best fitness function
        return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions

    @staticmethod
    def _calc_fitness_function(centres, data, chromosomes):
        """!
        @brief 
        
        """

        # Get count of chromosomes and clusters
        count_chromosome = len(chromosomes)

        # Initialize fitness function values
        fitness_function = np.zeros(count_chromosome)

        # Calc fitness function for each chromosome
        for _idx_chromosome in range(count_chromosome):

            # Get centers for a selected chromosome
            centres_data = np.zeros(data.shape)

            # Fill data centres
            for _idx in range(len(data)):
                centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]

            # Get City Block distance for a chromosome
            fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))

        return fitness_function


Push — 0.7.dev ( 20409b...5de18e )

genetic_algorithm.process() A

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

1. Missing Dependencies

2. Missing init.py files

1		"""!
2
3		@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).
4
5		@authors Aleksey Kukushkin ([email protected])
6		@date 2014-2017
7		@copyright GNU Public License
8
9		@cond GNU_PUBLIC_LICENSE
10		PyClustering is free software: you can redistribute it and/or modify
11		it under the terms of the GNU General Public License as published by
12		the Free Software Foundation, either version 3 of the License, or
13		(at your option) any later version.
14
15		PyClustering is distributed in the hope that it will be useful,
16		but WITHOUT ANY WARRANTY; without even the implied warranty of
17		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18		GNU General Public License for more details.
19
20		You should have received a copy of the GNU General Public License
21		along with this program. If not, see <http://www.gnu.org/licenses/>.
22		@endcond
23
24		"""
25
26		import numpy as np
		0 ignored issues – show Configuration introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
27		import math
28
29		from pyclustering.cluster.ga_maths import ga_math
30		from pyclustering.cluster.ga_observer import genetic_algorithm_observer
31
32
33		class genetic_algorithm:
34		"""!
35		@brief Class represents Genetic clustering algorithm.
36		@details The searching capability of genetic algorithms is exploited in order to search for appropriate
37		cluster centres.
38
39		"""
40
41		def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
42		coeff_mutation_count=0.25, select_coeff=1.0, observer=genetic_algorithm_observer()):
43		"""!
44		@brief Initialize genetic clustering algorithm for cluster analysis.
45
46		@param[in] data (numpy.array\|list): Input data for clustering that is represented by two dimensional array
47		where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
48		@param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
49		@param[in] chromosome_count (uint): Amount of chromosomes in each population.
50		@param[in] population_count (uint): Amount of populations.
51		@param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
52		@param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
53		thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
54		@param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
55		math.exp(1 + fitness(chromosome) * select_coeff).
56
57		"""
58
59		# Initialize random
60		np.random.seed()
61
62		# Clustering data
63		if type(data) is list:
64		self.data = np.array(data)
65		else:
66		self.data = data
67
68		# Count clusters
69		self.count_clusters = count_clusters
70
71		# Home many chromosome in population
72		self.chromosome_count = chromosome_count
73
74		# How many populations
75		self.population_count = population_count
76
77		# Count mutation genes
78		self.count_mutation_gens = count_mutation_gens
79
80		# Crossover rate
81		self.crossover_rate = 1.0
82
83		# Count of chromosome for mutation (range [0, 1])
84		self.coeff_mutation_count = coeff_mutation_count
85
86		# Exponential coeff for selection
87		self.select_coeff = select_coeff
88
89		# Result of clustering : best chromosome
90		self.result_clustering = {'best_chromosome': [],
91		'best_fitness_function': 0.0}
92
93		# Observer
94		self.observer = observer
95
96		def process(self):
97		"""!
98		@brief Perform clustering procedure in line with rule of genetic clustering algorithm.
99
100		@see get_clusters()
101
102		"""
103
104		# Initialize population
105		chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
106
107		# Initialize the Best solution
108		best_chromosome, best_ff, first_fitness_functions \
109		= self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
110
111		# Save best result into observer
112		self.observer.collect_global_best(best_chromosome, best_ff)
113		self.observer.collect_population_best(best_chromosome, best_ff)
114		self.observer.collect_mean(first_fitness_functions)
115
116		# Next population
117		for _idx in range(self.population_count):
		0 ignored issues – show Unused Code introduced 2017-09-14 10:15 UTC by Report Bug Copy Issue Report The variable `_idx` seems to be unused. Loading history...
118
119		# Select
120		chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)
121
122		# Crossover
123	View Code Duplication	self._crossover(chromosomes)
		0 ignored issues – show Duplication introduced 2017-09-14 10:15 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
124
125		# Mutation
126		self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
127
128		# Update the Best Solution
129		new_best_chromosome, new_best_ff, fitness_functions \
130		= self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
131
132		# Get best chromosome
133		if new_best_ff < best_ff:
134		best_ff = new_best_ff
135		best_chromosome = new_best_chromosome
136
137		# Save best result into observer
138		self.observer.collect_global_best(best_chromosome, best_ff)
139		self.observer.collect_population_best(new_best_chromosome, new_best_ff)
140		self.observer.collect_mean(fitness_functions)
141
142		# Save result
143		self.result_clustering['best_chromosome'] = best_chromosome
144		self.result_clustering['best_fitness_function'] = best_ff
145
146		return best_chromosome, best_ff
147	View Code Duplication
		0 ignored issues – show Duplication introduced 2017-09-13 16:23 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
148		def get_observer(self):
149		return self.observer
150
151		def get_clusters(self):
152		"""!
153		@brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
154
155		@return (list) List of allocated clusters.
156
157		@see process()
158
159		"""
160
161		return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)
162
163		@staticmethod
164		def _select(chromosomes, data, count_clusters, select_coeff):
165		"""!
166		@brief Performs selection procedure where new chromosomes are calculated.
167
168		@param[in] chromosomes (numpy.array): Chromosomes
169
170		"""
171
172		# Calc centers
173		centres = ga_math.get_centres(chromosomes, data, count_clusters)
174
175		# Calc fitness functions
176		fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
177
178		for _idx in range(len(fitness)):
179		fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)
180
181		# Calc probability vector
182		probabilities = ga_math.calc_probability_vector(fitness)
183
184		# Select P chromosomes with probabilities
185		new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)
186
187		# Selecting
188		for _idx in range(len(chromosomes)):
189		new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]
190
191		return new_chromosomes
192
193		@staticmethod
194		def _crossover(chromosomes):
195		"""!
196		@brief Crossover procedure.
197
198		"""
199
200		# Get pairs to Crossover
201		pairs_to_crossover = np.array(range(len(chromosomes)))
202
203		# Set random pairs
204		np.random.shuffle(pairs_to_crossover)
205
206		# Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. \| P1, P2, ...] crossover between V<->P)
207		offset_in_pair = int(len(pairs_to_crossover) / 2)
208
209		# For each pair
210		for _idx in range(offset_in_pair):
211
212		# Generate random mask for crossover
213		crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))
214
215		# Crossover a pair
216		genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
217		chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
218		crossover_mask)
219
220		@staticmethod
221		def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
222		"""!
223		@brief Mutation procedure.
224
225		"""
226
227		# Count gens in Chromosome
228		count_gens = len(chromosomes[0])
229
230		# Get random chromosomes for mutation
231		random_idx_chromosomes = np.array(range(len(chromosomes)))
232		np.random.shuffle(random_idx_chromosomes)
233
234		#
235		for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
236
237		#
238		for _ in range(count_gen_for_mutation):
239
240		# Get random gen
241		gen_num = np.random.randint(count_gens)
242
243		# Set random cluster
244		chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
245
246		@staticmethod
247		def _crossover_a_pair(chromosome_1, chromosome_2, mask):
248		"""!
249		@brief Crossovers a pair of chromosomes.
250
251		@param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
252		@param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
253		@param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
254
255		"""
256
257		for _idx in range(len(chromosome_1)):
258
259		if mask[_idx] == 1:
260		# Swap values
261		chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
262
263		@staticmethod
264		def _get_crossover_mask(mask_length):
265		"""!
266		@brief Crossover mask to crossover a pair of chromosomes.
267
268		@param[in] mask_length (uint): Length of the mask.
269
270		"""
271
272		# Initialize mask
273		mask = np.zeros(mask_length)
274
275		# Set a half of array to 1
276		mask[:int(int(mask_length) / 6)] = 1
277
278		# Random shuffle
279		np.random.shuffle(mask)
280
281		return mask
282
283		@staticmethod
284		def _init_population(count_clusters, count_data, chromosome_count):
285		"""!
286		@brief Returns first population as a uniform random choice.
287
288		@param[in] count_clusters (uint):
289		@param[in] count_data (uint):
290		@param[in] chromosome_count (uint):
291
292		"""
293
294		population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
295
296		return population
297
298		@staticmethod
299		def _get_best_chromosome(chromosomes, data, count_clusters):
300		"""!
301		@brief
302
303		"""
304
305		# Calc centers
306		centres = ga_math.get_centres(chromosomes, data, count_clusters)
307
308		# Calc Fitness functions
309		fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
310
311		# Index of the best chromosome
312		best_chromosome_idx = fitness_functions.argmin()
313
314		# Get chromosome with the best fitness function
315		return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions
316
317		@staticmethod
318		def _calc_fitness_function(centres, data, chromosomes):
319		"""!
320		@brief
321
322		"""
323
324		# Get count of chromosomes and clusters
325		count_chromosome = len(chromosomes)
326
327		# Initialize fitness function values
328		fitness_function = np.zeros(count_chromosome)
329
330		# Calc fitness function for each chromosome
331		for _idx_chromosome in range(count_chromosome):
332
333		# Get centers for a selected chromosome
334		centres_data = np.zeros(data.shape)
335
336		# Fill data centres
337		for _idx in range(len(data)):
338		centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]
339
340		# Get City Block distance for a chromosome
341		fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))
342
343		return fitness_function
344

annoviko / pyclustering

Push — 0.7.dev ( 20409b...5de18e )

genetic_algorithm.process() A

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files