Completed
Push — 0.7.dev ( 5de18e...94d991 )
by Andrei
50s
created

ga_observer.collect_population_best()   A

Complexity

Conditions 2

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).
4
5
@authors Andrey Novikov, Aleksey Kukushkin ([email protected])
6
@date 2014-2017
7
@copyright GNU Public License
8
9
@cond GNU_PUBLIC_LICENSE
10
    PyClustering is free software: you can redistribute it and/or modify
11
    it under the terms of the GNU General Public License as published by
12
    the Free Software Foundation, either version 3 of the License, or
13
    (at your option) any later version.
14
15
    PyClustering is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    GNU General Public License for more details.
19
20
    You should have received a copy of the GNU General Public License
21
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
@endcond
23
24
"""
25
26
27
import numpy as np;
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
28
import math;
29
30
import matplotlib.pyplot as plt;
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
31
import matplotlib.animation as animation;
0 ignored issues
show
Configuration introduced by
The import matplotlib.animation could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
32
33
from pyclustering.cluster import cluster_visualizer;
34
from pyclustering.cluster.ga_maths import ga_math;
35
36
37
38
class ga_observer:
39
    """!
40
41
    """
42
43
    def __init__(self, need_global_best=False, need_population_best=False, need_mean_ff=False):
44
        """ """
45
46
        # Global best chromosome and fitness function for each population
47
        self._global_best_result = {'chromosome': [], 'fitness_function': []};
48
49
        # Best chromosome and fitness function on a population
50
        self._best_population_result = {'chromosome': [], 'fitness_function': []};
51
52
        # Mean fitness function on each population
53
        self._mean_ff_result = [];
54
55
        # Flags to collect
56
        self._need_global_best = need_global_best;
57
        self._need_population_best = need_population_best;
58
        self._need_mean_ff = need_mean_ff;
59
60
61
    def __len__(self):
62
        return len(self._global_best_result['chromosome']);
63
64
65
    def collect_global_best(self, best_chromosome, best_fitness_function):
66
        """ """
67
68
        if not self._need_global_best:
69
            return;
70
71
        self._global_best_result['chromosome'].append(best_chromosome);
72
        self._global_best_result['fitness_function'].append(best_fitness_function);
73
74
75
    def collect_population_best(self, best_chromosome, best_fitness_function):
76
        """ """
77
78
        if not self._need_population_best:
79
            return;
80
81
        self._best_population_result['chromosome'].append(best_chromosome);
82
        self._best_population_result['fitness_function'].append(best_fitness_function);
83
84
85
    def collect_mean(self, fitness_functions):
86
        """ """
87
88
        if not self._need_mean_ff:
89
            return;
90
91
        self._mean_ff_result.append(np.mean(fitness_functions));
92
93
94
    def get_global_best(self):
95
        return self._global_best_result;
96
97
98
    def get_population_best(self):
99
        return self._best_population_result;
100
101
102
    def get_mean_fitness_function(self):
103
        return self._mean_ff_result;
104
105
106
107
class ga_visualizer:
108
    @staticmethod
109
    def show_evolution(observer, start_iteration = 0, stop_iteration = None, ax = None, display = True):
110
        if (ax is None):
111
            _, ax = plt.subplots(1);
112
            ax.set_title("Evolution");
113
        
114
        if (stop_iteration is None):
115
            stop_iteration = len(observer);
116
        
117
        line_best, = ax.plot(observer.get_global_best()['fitness_function'][start_iteration:stop_iteration], 'r');
118
        line_current, = ax.plot(observer.get_population_best()['fitness_function'][start_iteration:stop_iteration], 'k');
119
        line_mean, = ax.plot(observer.get_mean_fitness_function()[start_iteration:stop_iteration], 'c');
120
121
        ax.set_xlabel("Iteration");
122
        ax.set_ylabel("Fitness function");
123 View Code Duplication
        ax.legend([line_best, line_current, line_mean], ["The best pop.", "Cur. best pop.", "Average"], prop={'size': 10});
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
124
        ax.grid();
125
126
        print(start_iteration, stop_iteration);
127
        if (display is True):
128
            plt.show();
129
130
131
    @staticmethod
132
    def show_clusters(data, observer, marker = '.', markersize = None):
133
        figure = plt.figure();
134
        ax1 = figure.add_subplot(121);
135
        
136
        clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][-1]);
137
        
138
        visualizer = cluster_visualizer(1, 2);
139
        visualizer.append_clusters(clusters, data, 0, marker, markersize);
140
        visualizer.show(figure, display = False);
141
        
142
        ga_visualizer.show_evolution(observer, 0, None, ax1, True);
143
144
145
    @staticmethod
146
    def animate_cluster_allocation(data, observer, animation_velocity = 75, save_movie = None):
147 View Code Duplication
        figure = plt.figure();
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
148
        
149
        def init_frame():
150
            return frame_generation(0);
151
152
        def frame_generation(index_iteration):
153
            figure.clf();
154
            
155
            figure.suptitle("Clustering genetic algorithm (iteration: " + str(index_iteration) +")", fontsize = 20, fontweight = 'bold');
156
            
157
            visualizer = cluster_visualizer(4, 2, ["Population #" + str(index_iteration), "The best population"]);
158
            
159
            local_minimum_clusters = ga_math.get_clusters_representation(observer.get_population_best()['chromosome'][index_iteration]);
160
            visualizer.append_clusters(local_minimum_clusters, data, 0);
161
            
162
            global_minimum_clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][index_iteration]);
163
            visualizer.append_clusters(global_minimum_clusters, data, 1);
164
            
165
            ax1 = plt.subplot2grid((2, 2), (1, 0), colspan = 2);
166
            ga_visualizer.show_evolution(observer, 0, index_iteration + 1, ax1, False);
167
            
168
            visualizer.show(figure, shift = 0, display = False);
169
            figure.subplots_adjust(top = 0.85);
170
            
171
            return [ figure.gca() ];
172
        
173
        iterations = len(observer);
174
        cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000);
175
176
        if (save_movie is not None):
177
#             plt.rcParams['animation.ffmpeg_path'] = 'D:\\Program Files\\ffmpeg-3.3.1-win64-static\\bin\\ffmpeg.exe';
178
#             ffmpeg_writer = animation.FFMpegWriter(fps = 15);
179
#             cluster_animation.save(save_movie, writer = ffmpeg_writer);
180
            cluster_animation.save(save_movie, writer = 'ffmpeg', fps = 15, bitrate = 1500);
181
        else:
182
            plt.show();
183
184
185
class genetic_algorithm:
186
    """!
187
    @brief Class represents Genetic clustering algorithm.
188
    @details The searching capability of genetic algorithms is exploited in order to search for appropriate
189
             cluster centres.
190
191
    """
192
193
    def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
194
                 coeff_mutation_count=0.25, select_coeff=1.0, observer=ga_observer()):
195
        """!
196
        @brief Initialize genetic clustering algorithm for cluster analysis.
197
        
198
        @param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array
199
                    where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
200
        @param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
201
        @param[in] chromosome_count (uint): Amount of chromosomes in each population.
202
        @param[in] population_count (uint): Amount of populations.
203
        @param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
204
        @param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
205
                    thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
206
        @param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
207
                   math.exp(1 + fitness(chromosome) * select_coeff).
208
        
209
        """
210
        
211
        # Initialize random
212
        np.random.seed()
213
214
        # Clustering data
215
        if type(data) is list:
216
            self.data = np.array(data)
217
        else:
218
            self.data = data
219
220
        # Count clusters
221
        self.count_clusters = count_clusters
222
223
        # Home many chromosome in population
224
        self.chromosome_count = chromosome_count
225
226
        # How many populations
227
        self.population_count = population_count
228
229
        # Count mutation genes
230
        self.count_mutation_gens = count_mutation_gens
231
232
        # Crossover rate
233
        self.crossover_rate = 1.0
234
235
        # Count of chromosome for mutation (range [0, 1])
236
        self.coeff_mutation_count = coeff_mutation_count
237
238
        # Exponential coeff for selection
239
        self.select_coeff = select_coeff
240
241
        # Result of clustering : best chromosome
242
        self.result_clustering = {'best_chromosome': [],
243
                                  'best_fitness_function': 0.0}
244
245
        # Observer
246
        self.observer = observer
247
248
    def process(self):
249
        """!
250
        @brief Perform clustering procedure in line with rule of genetic clustering algorithm.
251
        
252
        @see get_clusters()
253
        
254
        """
255
256
        # Initialize population
257
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
258
259
        # Initialize the Best solution
260
        best_chromosome, best_ff, first_fitness_functions \
261
            = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
262
263
        # Save best result into observer
264
        self.observer.collect_global_best(best_chromosome, best_ff)
265
        self.observer.collect_population_best(best_chromosome, best_ff)
266
        self.observer.collect_mean(first_fitness_functions)
267
268
        # Next population
269
        for _idx in range(self.population_count):
0 ignored issues
show
Unused Code introduced by
The variable _idx seems to be unused.
Loading history...
270
271
            # Select
272
            chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)
273
274
            # Crossover
275
            self._crossover(chromosomes)
276
277
            # Mutation
278
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
279
280
            # Update the Best Solution
281
            new_best_chromosome, new_best_ff, fitness_functions \
282
                = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
283
284
            # Get best chromosome
285
            if new_best_ff < best_ff:
286
                best_ff = new_best_ff
287
                best_chromosome = new_best_chromosome
288
289
            # Save best result into observer
290
            self.observer.collect_global_best(best_chromosome, best_ff)
291
            self.observer.collect_population_best(new_best_chromosome, new_best_ff)
292
            self.observer.collect_mean(fitness_functions)
293
294
        # Save result
295
        self.result_clustering['best_chromosome'] = best_chromosome
296
        self.result_clustering['best_fitness_function'] = best_ff
297
298
        return best_chromosome, best_ff
299
300
301
    def get_observer(self):
302
        """!
303
        @brief Returns genetic algorithm observer.
304
        
305
        """
306
        return self.observer
307
308
309
    def get_clusters(self):
310
        """!
311
        @brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
312
        
313
        @return (list) List of allocated clusters.
314
        
315
        @see process()
316
        
317
        """
318
319
        return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)
320
321
322
    @staticmethod
323
    def _select(chromosomes, data, count_clusters, select_coeff):
324
        """!
325
        @brief Performs selection procedure where new chromosomes are calculated.
326
        
327
        @param[in] chromosomes (numpy.array): Chromosomes 
328
        
329
        """
330
331
        # Calc centers
332
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
333
334
        # Calc fitness functions
335
        fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
336
337
        for _idx in range(len(fitness)):
338
            fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)
339
340
        # Calc probability vector
341
        probabilities = ga_math.calc_probability_vector(fitness)
342
343
        # Select P chromosomes with probabilities
344
        new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)
345
346
        # Selecting
347
        for _idx in range(len(chromosomes)):
348
            new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]
349
350
        return new_chromosomes
351
352
353
    @staticmethod
354
    def _crossover(chromosomes):
355
        """!
356
        @brief Crossover procedure.
357
        
358
        """
359
360
        # Get pairs to Crossover
361
        pairs_to_crossover = np.array(range(len(chromosomes)))
362
363
        # Set random pairs
364
        np.random.shuffle(pairs_to_crossover)
365
366
        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
367
        offset_in_pair = int(len(pairs_to_crossover) / 2)
368
369
        # For each pair
370
        for _idx in range(offset_in_pair):
371
372
            # Generate random mask for crossover
373
            crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))
374
375
            # Crossover a pair
376
            genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
377
                                                chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
378
                                                crossover_mask)
379
380
381
    @staticmethod
382
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
383
        """!
384
        @brief Mutation procedure.
385
        
386
        """
387
388
        # Count gens in Chromosome
389
        count_gens = len(chromosomes[0])
390
391
        # Get random chromosomes for mutation
392
        random_idx_chromosomes = np.array(range(len(chromosomes)))
393
        np.random.shuffle(random_idx_chromosomes)
394
395
        #
396
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
397
398
            #
399
            for _ in range(count_gen_for_mutation):
400
401
                # Get random gen
402
                gen_num = np.random.randint(count_gens)
403
404
                # Set random cluster
405
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
406
407
408
    @staticmethod
409
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
410
        """!
411
        @brief Crossovers a pair of chromosomes.
412
        
413
        @param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
414
        @param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
415
        @param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
416
        
417
        """
418
419
        for _idx in range(len(chromosome_1)):
420
421
            if mask[_idx] == 1:
422
                # Swap values
423
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
424
425
426
    @staticmethod
427
    def _get_crossover_mask(mask_length):
428
        """!
429
        @brief Crossover mask to crossover a pair of chromosomes.
430
        
431
        @param[in] mask_length (uint): Length of the mask.
432
        
433
        """
434
435
        # Initialize mask
436
        mask = np.zeros(mask_length)
437
438
        # Set a half of array to 1
439
        mask[:int(int(mask_length) / 6)] = 1
440
441
        # Random shuffle
442
        np.random.shuffle(mask)
443
444
        return mask
445
446
447
    @staticmethod
448
    def _init_population(count_clusters, count_data, chromosome_count):
449
        """!
450
        @brief Returns first population as a uniform random choice.
451
        
452
        @param[in] count_clusters (uint):
453
        @param[in] count_data (uint):
454
        @param[in] chromosome_count (uint): 
455
        
456
        """
457
458
        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
459
460
        return population
461
462
463
    @staticmethod
464
    def _get_best_chromosome(chromosomes, data, count_clusters):
465
        """!
466
        @brief 
467
        
468
        """
469
470
        # Calc centers
471
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
472
473
        # Calc Fitness functions
474
        fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
475
476
        # Index of the best chromosome
477
        best_chromosome_idx = fitness_functions.argmin()
478
479
        # Get chromosome with the best fitness function
480
        return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions
481
482
483
    @staticmethod
484
    def _calc_fitness_function(centres, data, chromosomes):
485
        """!
486
        @brief 
487
        
488
        """
489
490
        # Get count of chromosomes and clusters
491
        count_chromosome = len(chromosomes)
492
493
        # Initialize fitness function values
494
        fitness_function = np.zeros(count_chromosome)
495
496
        # Calc fitness function for each chromosome
497
        for _idx_chromosome in range(count_chromosome):
498
499
            # Get centers for a selected chromosome
500
            centres_data = np.zeros(data.shape)
501
502
            # Fill data centres
503
            for _idx in range(len(data)):
504
                centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]
505
506
            # Get City Block distance for a chromosome
507
            fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))
508
509
        return fitness_function
510