Completed
Push — 0.7.dev ( 20409b...5de18e )
by
unknown
02:36
created

genetic_algorithm.process()   A

Complexity

Conditions 3

Size

Total Lines 51

Duplication

Lines 23
Ratio 45.1 %

Importance

Changes 0
Metric Value
cc 3
dl 23
loc 51
rs 9.4109
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
"""!
2
3
@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).
4
5
@authors Aleksey Kukushkin ([email protected])
6
@date 2014-2017
7
@copyright GNU Public License
8
9
@cond GNU_PUBLIC_LICENSE
10
    PyClustering is free software: you can redistribute it and/or modify
11
    it under the terms of the GNU General Public License as published by
12
    the Free Software Foundation, either version 3 of the License, or
13
    (at your option) any later version.
14
15
    PyClustering is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    GNU General Public License for more details.
19
20
    You should have received a copy of the GNU General Public License
21
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
@endcond
23
24
"""
25
26
import numpy as np
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
27
import math
28
29
from pyclustering.cluster.ga_maths import ga_math
30
from pyclustering.cluster.ga_observer import genetic_algorithm_observer
31
32
33
class genetic_algorithm:
34
    """!
35
    @brief Class represents Genetic clustering algorithm.
36
    @details The searching capability of genetic algorithms is exploited in order to search for appropriate
37
             cluster centres.
38
39
    """
40
41
    def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
42
                 coeff_mutation_count=0.25, select_coeff=1.0, observer=genetic_algorithm_observer()):
43
        """!
44
        @brief Initialize genetic clustering algorithm for cluster analysis.
45
        
46
        @param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array
47
                    where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
48
        @param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
49
        @param[in] chromosome_count (uint): Amount of chromosomes in each population.
50
        @param[in] population_count (uint): Amount of populations.
51
        @param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
52
        @param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
53
                    thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
54
        @param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
55
                   math.exp(1 + fitness(chromosome) * select_coeff).
56
        
57
        """
58
        
59
        # Initialize random
60
        np.random.seed()
61
62
        # Clustering data
63
        if type(data) is list:
64
            self.data = np.array(data)
65
        else:
66
            self.data = data
67
68
        # Count clusters
69
        self.count_clusters = count_clusters
70
71
        # Home many chromosome in population
72
        self.chromosome_count = chromosome_count
73
74
        # How many populations
75
        self.population_count = population_count
76
77
        # Count mutation genes
78
        self.count_mutation_gens = count_mutation_gens
79
80
        # Crossover rate
81
        self.crossover_rate = 1.0
82
83
        # Count of chromosome for mutation (range [0, 1])
84
        self.coeff_mutation_count = coeff_mutation_count
85
86
        # Exponential coeff for selection
87
        self.select_coeff = select_coeff
88
89
        # Result of clustering : best chromosome
90
        self.result_clustering = {'best_chromosome': [],
91
                                  'best_fitness_function': 0.0}
92
93
        # Observer
94
        self.observer = observer
95
96
    def process(self):
97
        """!
98
        @brief Perform clustering procedure in line with rule of genetic clustering algorithm.
99
        
100
        @see get_clusters()
101
        
102
        """
103
104
        # Initialize population
105
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
106
107
        # Initialize the Best solution
108
        best_chromosome, best_ff, first_fitness_functions \
109
            = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
110
111
        # Save best result into observer
112
        self.observer.collect_global_best(best_chromosome, best_ff)
113
        self.observer.collect_population_best(best_chromosome, best_ff)
114
        self.observer.collect_mean(first_fitness_functions)
115
116
        # Next population
117
        for _idx in range(self.population_count):
0 ignored issues
show
Unused Code introduced by
The variable _idx seems to be unused.
Loading history...
118
119
            # Select
120
            chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)
121
122
            # Crossover
123 View Code Duplication
            self._crossover(chromosomes)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
124
125
            # Mutation
126
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
127
128
            # Update the Best Solution
129
            new_best_chromosome, new_best_ff, fitness_functions \
130
                = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
131
132
            # Get best chromosome
133
            if new_best_ff < best_ff:
134
                best_ff = new_best_ff
135
                best_chromosome = new_best_chromosome
136
137
            # Save best result into observer
138
            self.observer.collect_global_best(best_chromosome, best_ff)
139
            self.observer.collect_population_best(new_best_chromosome, new_best_ff)
140
            self.observer.collect_mean(fitness_functions)
141
142
        # Save result
143
        self.result_clustering['best_chromosome'] = best_chromosome
144
        self.result_clustering['best_fitness_function'] = best_ff
145
146
        return best_chromosome, best_ff
147 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
148
    def get_observer(self):
149
        return self.observer
150
151
    def get_clusters(self):
152
        """!
153
        @brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
154
        
155
        @return (list) List of allocated clusters.
156
        
157
        @see process()
158
        
159
        """
160
161
        return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)
162
163
    @staticmethod
164
    def _select(chromosomes, data, count_clusters, select_coeff):
165
        """!
166
        @brief Performs selection procedure where new chromosomes are calculated.
167
        
168
        @param[in] chromosomes (numpy.array): Chromosomes 
169
        
170
        """
171
172
        # Calc centers
173
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
174
175
        # Calc fitness functions
176
        fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
177
178
        for _idx in range(len(fitness)):
179
            fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)
180
181
        # Calc probability vector
182
        probabilities = ga_math.calc_probability_vector(fitness)
183
184
        # Select P chromosomes with probabilities
185
        new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)
186
187
        # Selecting
188
        for _idx in range(len(chromosomes)):
189
            new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]
190
191
        return new_chromosomes
192
193
    @staticmethod
194
    def _crossover(chromosomes):
195
        """!
196
        @brief Crossover procedure.
197
        
198
        """
199
200
        # Get pairs to Crossover
201
        pairs_to_crossover = np.array(range(len(chromosomes)))
202
203
        # Set random pairs
204
        np.random.shuffle(pairs_to_crossover)
205
206
        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
207
        offset_in_pair = int(len(pairs_to_crossover) / 2)
208
209
        # For each pair
210
        for _idx in range(offset_in_pair):
211
212
            # Generate random mask for crossover
213
            crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))
214
215
            # Crossover a pair
216
            genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
217
                                                chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
218
                                                crossover_mask)
219
220
    @staticmethod
221
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
222
        """!
223
        @brief Mutation procedure.
224
        
225
        """
226
227
        # Count gens in Chromosome
228
        count_gens = len(chromosomes[0])
229
230
        # Get random chromosomes for mutation
231
        random_idx_chromosomes = np.array(range(len(chromosomes)))
232
        np.random.shuffle(random_idx_chromosomes)
233
234
        #
235
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
236
237
            #
238
            for _ in range(count_gen_for_mutation):
239
240
                # Get random gen
241
                gen_num = np.random.randint(count_gens)
242
243
                # Set random cluster
244
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
245
246
    @staticmethod
247
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
248
        """!
249
        @brief Crossovers a pair of chromosomes.
250
        
251
        @param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
252
        @param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
253
        @param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
254
        
255
        """
256
257
        for _idx in range(len(chromosome_1)):
258
259
            if mask[_idx] == 1:
260
                # Swap values
261
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
262
263
    @staticmethod
264
    def _get_crossover_mask(mask_length):
265
        """!
266
        @brief Crossover mask to crossover a pair of chromosomes.
267
        
268
        @param[in] mask_length (uint): Length of the mask.
269
        
270
        """
271
272
        # Initialize mask
273
        mask = np.zeros(mask_length)
274
275
        # Set a half of array to 1
276
        mask[:int(int(mask_length) / 6)] = 1
277
278
        # Random shuffle
279
        np.random.shuffle(mask)
280
281
        return mask
282
283
    @staticmethod
284
    def _init_population(count_clusters, count_data, chromosome_count):
285
        """!
286
        @brief Returns first population as a uniform random choice.
287
        
288
        @param[in] count_clusters (uint):
289
        @param[in] count_data (uint):
290
        @param[in] chromosome_count (uint): 
291
        
292
        """
293
294
        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
295
296
        return population
297
298
    @staticmethod
299
    def _get_best_chromosome(chromosomes, data, count_clusters):
300
        """!
301
        @brief 
302
        
303
        """
304
305
        # Calc centers
306
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
307
308
        # Calc Fitness functions
309
        fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
310
311
        # Index of the best chromosome
312
        best_chromosome_idx = fitness_functions.argmin()
313
314
        # Get chromosome with the best fitness function
315
        return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions
316
317
    @staticmethod
318
    def _calc_fitness_function(centres, data, chromosomes):
319
        """!
320
        @brief 
321
        
322
        """
323
324
        # Get count of chromosomes and clusters
325
        count_chromosome = len(chromosomes)
326
327
        # Initialize fitness function values
328
        fitness_function = np.zeros(count_chromosome)
329
330
        # Calc fitness function for each chromosome
331
        for _idx_chromosome in range(count_chromosome):
332
333
            # Get centers for a selected chromosome
334
            centres_data = np.zeros(data.shape)
335
336
            # Fill data centres
337
            for _idx in range(len(data)):
338
                centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]
339
340
            # Get City Block distance for a chromosome
341
            fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))
342
343
        return fitness_function
344