1
|
|
|
"""! |
2
|
|
|
|
3
|
|
|
@brief Cluster analysis algorithm: Genetic clustering algorithm (GA). |
4
|
|
|
|
5
|
|
|
@authors Andrey Novikov, Aleksey Kukushkin ([email protected]) |
6
|
|
|
@date 2014-2017 |
7
|
|
|
@copyright GNU Public License |
8
|
|
|
|
9
|
|
|
@cond GNU_PUBLIC_LICENSE |
10
|
|
|
PyClustering is free software: you can redistribute it and/or modify |
11
|
|
|
it under the terms of the GNU General Public License as published by |
12
|
|
|
the Free Software Foundation, either version 3 of the License, or |
13
|
|
|
(at your option) any later version. |
14
|
|
|
|
15
|
|
|
PyClustering is distributed in the hope that it will be useful, |
16
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
17
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18
|
|
|
GNU General Public License for more details. |
19
|
|
|
|
20
|
|
|
You should have received a copy of the GNU General Public License |
21
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
@endcond |
23
|
|
|
|
24
|
|
|
""" |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
import numpy as np; |
|
|
|
|
28
|
|
|
import math; |
29
|
|
|
|
30
|
|
|
import matplotlib.pyplot as plt; |
|
|
|
|
31
|
|
|
import matplotlib.animation as animation; |
|
|
|
|
32
|
|
|
|
33
|
|
|
from pyclustering.cluster import cluster_visualizer; |
34
|
|
|
from pyclustering.cluster.ga_maths import ga_math; |
35
|
|
|
|
36
|
|
|
|
37
|
|
|
|
38
|
|
|
class ga_observer: |
39
|
|
|
"""! |
40
|
|
|
@brief Genetic algorithm observer that is used to collect information about clustering process on each iteration. |
41
|
|
|
|
42
|
|
|
""" |
43
|
|
|
|
44
|
|
|
def __init__(self, need_global_best=False, need_population_best=False, need_mean_ff=False): |
45
|
|
|
"""! |
46
|
|
|
@brief Constructs genetic algorithm observer to collect specific information. |
47
|
|
|
|
48
|
|
|
@param[in] need_global_best (bool): If 'True' then the best chromosomes and its fitness function value (global optimum) for each iteration are stored. |
49
|
|
|
@param[in] need_population_best (bool): If 'True' then current (on each iteration) best chromosomes and its fitness function value (local optimum) are stored. |
50
|
|
|
@param[in] need_mean_ff (bool): If 'True' then average value of fitness function on each iteration is stored. |
51
|
|
|
|
52
|
|
|
""" |
53
|
|
|
|
54
|
|
|
# Global best chromosome and fitness function for each population |
55
|
|
|
self._global_best_result = {'chromosome': [], 'fitness_function': []}; |
56
|
|
|
|
57
|
|
|
# Best chromosome and fitness function on a population |
58
|
|
|
self._best_population_result = {'chromosome': [], 'fitness_function': []}; |
59
|
|
|
|
60
|
|
|
# Mean fitness function on each population |
61
|
|
|
self._mean_ff_result = []; |
62
|
|
|
|
63
|
|
|
# Flags to collect |
64
|
|
|
self._need_global_best = need_global_best; |
65
|
|
|
self._need_population_best = need_population_best; |
66
|
|
|
self._need_mean_ff = need_mean_ff; |
67
|
|
|
|
68
|
|
|
|
69
|
|
|
def __len__(self): |
70
|
|
|
"""! |
71
|
|
|
@brief Returns amount of iterations that genetic algorithm was observed. |
72
|
|
|
|
73
|
|
|
""" |
74
|
|
|
global_length = len(self._global_best_result['chromosome']); |
75
|
|
|
local_length = len(self._best_population_result['chromosome']); |
76
|
|
|
average_length = len(self._mean_ff_result); |
77
|
|
|
|
78
|
|
|
return max(global_length, local_length, average_length); |
79
|
|
|
|
80
|
|
|
|
81
|
|
|
def collect_global_best(self, best_chromosome, best_fitness_function): |
82
|
|
|
"""! |
83
|
|
|
@brief Stores the best chromosome and its fitness function's value. |
84
|
|
|
|
85
|
|
|
@param[in] best_chromosome (list): The best chromosome that were observed. |
86
|
|
|
@param[in] best_fitness_function (float): Fitness function value of the best chromosome. |
87
|
|
|
|
88
|
|
|
""" |
89
|
|
|
|
90
|
|
|
if not self._need_global_best: |
91
|
|
|
return; |
92
|
|
|
|
93
|
|
|
self._global_best_result['chromosome'].append(best_chromosome); |
94
|
|
|
self._global_best_result['fitness_function'].append(best_fitness_function); |
95
|
|
|
|
96
|
|
|
|
97
|
|
|
def collect_population_best(self, best_chromosome, best_fitness_function): |
98
|
|
|
"""! |
99
|
|
|
@brief Stores the best chromosome for current specific iteration and its fitness function's value. |
100
|
|
|
|
101
|
|
|
@param[in] best_chromosome (list): The best chromosome on specific iteration. |
102
|
|
|
@param[in] best_fitness_function (float): Fitness function value of the chromosome. |
103
|
|
|
|
104
|
|
|
""" |
105
|
|
|
|
106
|
|
|
if not self._need_population_best: |
107
|
|
|
return; |
108
|
|
|
|
109
|
|
|
self._best_population_result['chromosome'].append(best_chromosome); |
110
|
|
|
self._best_population_result['fitness_function'].append(best_fitness_function); |
111
|
|
|
|
112
|
|
|
|
113
|
|
|
def collect_mean(self, fitness_functions): |
114
|
|
|
"""! |
115
|
|
|
@brief Stores average value of fitness function among chromosomes on specific iteration. |
116
|
|
|
|
117
|
|
|
@param[in] fitness_functions (float): Average value of fitness functions among chromosomes. |
118
|
|
|
|
119
|
|
|
""" |
120
|
|
|
|
121
|
|
|
if not self._need_mean_ff: |
122
|
|
|
return; |
123
|
|
View Code Duplication |
|
|
|
|
|
124
|
|
|
self._mean_ff_result.append(np.mean(fitness_functions)); |
125
|
|
|
|
126
|
|
|
|
127
|
|
|
def get_global_best(self): |
128
|
|
|
"""! |
129
|
|
|
@return (dict) Returns dictionary with keys 'chromosome' and 'fitness_function' where evolution of the best chromosome |
130
|
|
|
and its fitness function's value (evolution of global optimum) are stored in lists. |
131
|
|
|
|
132
|
|
|
""" |
133
|
|
|
return self._global_best_result; |
134
|
|
|
|
135
|
|
|
|
136
|
|
|
def get_population_best(self): |
137
|
|
|
"""! |
138
|
|
|
@brief (dict) Returns dictionary with keys 'chromosome' and 'fitness_function' where evolution of the current best chromosome |
139
|
|
|
and its fitness function's value (evolution of local optimum) are stored in lists. |
140
|
|
|
|
141
|
|
|
""" |
142
|
|
|
return self._best_population_result; |
143
|
|
|
|
144
|
|
|
|
145
|
|
|
def get_mean_fitness_function(self): |
146
|
|
|
"""! |
147
|
|
View Code Duplication |
@brief (list) Returns fitness function's values on each iteration. |
|
|
|
|
148
|
|
|
|
149
|
|
|
""" |
150
|
|
|
return self._mean_ff_result; |
151
|
|
|
|
152
|
|
|
|
153
|
|
|
|
154
|
|
|
class ga_visualizer: |
155
|
|
|
"""! |
156
|
|
|
@brief Genetic algorithm visualizer is used to show clustering results that are specific for |
157
|
|
|
this particular algorithm: clusters, evolution of global and local optimum. |
158
|
|
|
@details The visualizer requires 'ga_observer' that collects evolution of clustering process in |
159
|
|
|
genetic algorithm. The observer is created by user and passed to genetic algorithm. There |
160
|
|
|
is usage example of the visualizer using the observer: |
161
|
|
|
@code |
162
|
|
|
# Read data for clustering |
163
|
|
|
sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1); |
164
|
|
|
|
165
|
|
|
# Create instance of observer that will collect all information: |
166
|
|
|
observer_instance = ga_observer(True, True, True); |
167
|
|
|
|
168
|
|
|
# Create genetic algorithm where observer will collect information: |
169
|
|
|
ga_instance = genetic_algorithm(data=sample, |
170
|
|
|
count_clusters=2, |
171
|
|
|
chromosome_count=20, |
172
|
|
|
population_count=20, |
173
|
|
|
count_mutation_gens=1, |
174
|
|
|
observer=observer_instance); |
175
|
|
|
|
176
|
|
|
# Start processing |
177
|
|
|
ga_instance.process(); |
178
|
|
|
|
179
|
|
|
# Show cluster using observer: |
180
|
|
|
ga_visualizer.show_clusters(sample, observer_instance); |
181
|
|
|
@endcode |
182
|
|
|
|
183
|
|
|
@see cluster_visualizer |
184
|
|
|
|
185
|
|
|
""" |
186
|
|
|
|
187
|
|
|
@staticmethod |
188
|
|
|
def show_evolution(observer, start_iteration = 0, stop_iteration = None, ax = None, display = True): |
189
|
|
|
"""! |
190
|
|
|
@brief Displays evolution of fitness function for the best chromosome, for the current best chromosome and |
191
|
|
|
average value among all chromosomes. |
192
|
|
|
|
193
|
|
|
@param[in] observer (ga_observer): Genetic algorithm observer that was used for collecting evolution in the algorithm and |
194
|
|
|
where whole required information for visualization is stored. |
195
|
|
|
@param[in] start_iteration (uint): Iteration from that evolution should be shown. |
196
|
|
|
@param[in] stop_iteration (uint): Iteration after that evolution shouldn't be shown. |
197
|
|
|
@param[in] ax (Axes): Canvas where evolution should be displayed. |
198
|
|
|
@param[in] display (bool): If 'True' then visualization of the evolution will be shown by the function. |
199
|
|
|
This argument should be 'False' if you want to add something else to the canvas and display it later. |
200
|
|
|
|
201
|
|
|
@return (Axis) Canvas where evolution was shown. |
202
|
|
|
|
203
|
|
|
""" |
204
|
|
|
|
205
|
|
|
if (ax is None): |
206
|
|
|
_, ax = plt.subplots(1); |
207
|
|
|
ax.set_title("Evolution"); |
208
|
|
|
|
209
|
|
|
if (stop_iteration is None): |
210
|
|
|
stop_iteration = len(observer); |
211
|
|
|
|
212
|
|
|
line_best, = ax.plot(observer.get_global_best()['fitness_function'][start_iteration:stop_iteration], 'r'); |
213
|
|
|
line_current, = ax.plot(observer.get_population_best()['fitness_function'][start_iteration:stop_iteration], 'k'); |
214
|
|
|
line_mean, = ax.plot(observer.get_mean_fitness_function()[start_iteration:stop_iteration], 'c'); |
215
|
|
|
|
216
|
|
|
if (start_iteration < (stop_iteration - 1)): |
217
|
|
|
ax.set_xlim([start_iteration, (stop_iteration - 1)]); |
218
|
|
|
|
219
|
|
|
ax.set_xlabel("Iteration"); |
220
|
|
|
ax.set_ylabel("Fitness function"); |
221
|
|
|
ax.legend([line_best, line_current, line_mean], ["The best pop.", "Cur. best pop.", "Average"], prop={'size': 10}); |
222
|
|
|
ax.grid(); |
223
|
|
|
|
224
|
|
|
if (display is True): |
225
|
|
|
plt.show(); |
226
|
|
|
|
227
|
|
|
return ax; |
228
|
|
|
|
229
|
|
|
|
230
|
|
|
@staticmethod |
231
|
|
|
def show_clusters(data, observer, marker = '.', markersize = None): |
232
|
|
|
"""! |
233
|
|
|
@brief Shows allocated clusters by the genetic algorithm. |
234
|
|
|
|
235
|
|
|
@param[in] data (list): Input data that was used for clustering process by the algorithm. |
236
|
|
|
@param[in] observer (ga_observer): Observer that was used for collection information about clustering process. |
237
|
|
|
@param[in] marker (char): Type of marker that should be used for object (point) representation. |
238
|
|
|
@param[in] markersize (uint): Size of the marker that is used for object (point) representation. |
239
|
|
|
|
240
|
|
|
@note If you have clusters instead of observer then 'cluster_visualizer' can be used for visualization purposes. |
241
|
|
|
|
242
|
|
|
@see cluster_visualizer |
243
|
|
|
|
244
|
|
|
""" |
245
|
|
|
|
246
|
|
|
figure = plt.figure(); |
247
|
|
|
ax1 = figure.add_subplot(121); |
248
|
|
|
|
249
|
|
|
clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][-1]); |
250
|
|
|
|
251
|
|
|
visualizer = cluster_visualizer(1, 2); |
252
|
|
|
visualizer.append_clusters(clusters, data, 0, marker, markersize); |
253
|
|
|
visualizer.show(figure, display = False); |
254
|
|
|
|
255
|
|
|
ga_visualizer.show_evolution(observer, 0, None, ax1, True); |
256
|
|
|
|
257
|
|
|
|
258
|
|
|
@staticmethod |
259
|
|
|
def animate_cluster_allocation(data, observer, animation_velocity = 75, movie_fps = 5, save_movie = None): |
260
|
|
|
"""! |
261
|
|
|
@brief Animate clustering process of genetic clustering algorithm. |
262
|
|
|
@details This method can be also used for rendering movie of clustering process and 'ffmpeg' is required for that purpuse. |
263
|
|
|
|
264
|
|
|
@param[in] data (list): Input data that was used for clustering process by the algorithm. |
265
|
|
|
@param[in] observer (ga_observer): Observer that was used for collection information about clustering process. |
266
|
|
|
Be sure that whole information was collected by the observer. |
267
|
|
|
@param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only). |
268
|
|
|
@param[in] movie_fps (uint): Defines frames per second (for rendering movie only). |
269
|
|
|
@param[in] save_movie (string): If it is specified then animation will be stored to file that is specified in this parameter. |
270
|
|
|
|
271
|
|
|
""" |
272
|
|
|
|
273
|
|
|
figure = plt.figure(); |
274
|
|
|
|
275
|
|
|
def init_frame(): |
276
|
|
|
return frame_generation(0); |
277
|
|
|
|
278
|
|
|
def frame_generation(index_iteration): |
279
|
|
|
figure.clf(); |
280
|
|
|
|
281
|
|
|
figure.suptitle("Clustering genetic algorithm (iteration: " + str(index_iteration) +")", fontsize = 18, fontweight = 'bold'); |
282
|
|
|
|
283
|
|
|
visualizer = cluster_visualizer(4, 2, ["The best pop. on step #" + str(index_iteration), "The best population"]); |
284
|
|
|
|
285
|
|
|
local_minimum_clusters = ga_math.get_clusters_representation(observer.get_population_best()['chromosome'][index_iteration]); |
286
|
|
|
visualizer.append_clusters(local_minimum_clusters, data, 0); |
287
|
|
|
|
288
|
|
|
global_minimum_clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][index_iteration]); |
289
|
|
|
visualizer.append_clusters(global_minimum_clusters, data, 1); |
290
|
|
|
|
291
|
|
|
ax1 = plt.subplot2grid((2, 2), (1, 0), colspan = 2); |
292
|
|
|
ga_visualizer.show_evolution(observer, 0, index_iteration + 1, ax1, False); |
293
|
|
|
|
294
|
|
|
visualizer.show(figure, shift = 0, display = False); |
295
|
|
|
figure.subplots_adjust(top = 0.85); |
296
|
|
|
|
297
|
|
|
return [ figure.gca() ]; |
298
|
|
|
|
299
|
|
|
iterations = len(observer); |
300
|
|
|
cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000); |
301
|
|
|
|
302
|
|
|
if (save_movie is not None): |
303
|
|
|
cluster_animation.save(save_movie, writer = 'ffmpeg', fps = movie_fps, bitrate = 1500); |
304
|
|
|
else: |
305
|
|
|
plt.show(); |
306
|
|
|
|
307
|
|
|
|
308
|
|
|
class genetic_algorithm: |
309
|
|
|
"""! |
310
|
|
|
@brief Class represents Genetic clustering algorithm. |
311
|
|
|
@details The searching capability of genetic algorithms is exploited in order to search for appropriate |
312
|
|
|
cluster centres. |
313
|
|
|
|
314
|
|
|
Example of clustering using genetic algorithm: |
315
|
|
|
@code |
316
|
|
|
# Read input data for clustering |
317
|
|
|
sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4); |
318
|
|
|
|
319
|
|
|
# Create genetic algorithm for clustering |
320
|
|
|
ga_instance = genetic_algorithm(data=sample, |
321
|
|
|
count_clusters=4, |
322
|
|
|
chromosome_count=100, |
323
|
|
|
population_count=200, |
324
|
|
|
count_mutation_gens=1); |
325
|
|
|
|
326
|
|
|
# Start clustering process |
327
|
|
|
ga_instance.process(); |
328
|
|
|
|
329
|
|
|
# Extract extracted clusters by the algorithm |
330
|
|
|
clusters = ga_instance.get_clusters(); |
331
|
|
|
print(clusters); |
332
|
|
|
@endcode |
333
|
|
|
|
334
|
|
|
There is an example of clustering results (fitness function evolution and allocated clusters) that were |
335
|
|
|
visualized by 'ga_visualizer': |
336
|
|
|
|
337
|
|
|
@image html ga_clustering_sample_simple_04.png |
338
|
|
|
|
339
|
|
|
@see ga_visualizer |
340
|
|
|
@see ga_observer |
341
|
|
|
|
342
|
|
|
""" |
343
|
|
|
|
344
|
|
|
def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2, |
345
|
|
|
coeff_mutation_count=0.25, select_coeff=1.0, observer=ga_observer()): |
346
|
|
|
"""! |
347
|
|
|
@brief Initialize genetic clustering algorithm for cluster analysis. |
348
|
|
|
|
349
|
|
|
@param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array |
350
|
|
|
where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]]. |
351
|
|
|
@param[in] count_clusters (uint): Amount of clusters that should be allocated in the data. |
352
|
|
|
@param[in] chromosome_count (uint): Amount of chromosomes in each population. |
353
|
|
|
@param[in] population_count (uint): Amount of populations. |
354
|
|
|
@param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step. |
355
|
|
|
@param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and |
356
|
|
|
thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'. |
357
|
|
|
@param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows: |
358
|
|
|
math.exp(1 + fitness(chromosome) * select_coeff). |
359
|
|
|
@param[in] observer (ga_observer): Observer that is used for collecting information of about clustering process on each step. |
360
|
|
|
|
361
|
|
|
""" |
362
|
|
|
|
363
|
|
|
# Initialize random |
364
|
|
|
np.random.seed() |
365
|
|
|
|
366
|
|
|
# Clustering data |
367
|
|
|
if type(data) is list: |
368
|
|
|
self._data = np.array(data) |
369
|
|
|
else: |
370
|
|
|
self._data = data |
371
|
|
|
|
372
|
|
|
# Count clusters |
373
|
|
|
self._count_clusters = count_clusters |
374
|
|
|
|
375
|
|
|
# Home many chromosome in population |
376
|
|
|
self._chromosome_count = chromosome_count |
377
|
|
|
|
378
|
|
|
# How many populations |
379
|
|
|
self._population_count = population_count |
380
|
|
|
|
381
|
|
|
# Count mutation genes |
382
|
|
|
self._count_mutation_gens = count_mutation_gens |
383
|
|
|
|
384
|
|
|
# Crossover rate |
385
|
|
|
self._crossover_rate = 1.0 |
386
|
|
|
|
387
|
|
|
# Count of chromosome for mutation (range [0, 1]) |
388
|
|
|
self._coeff_mutation_count = coeff_mutation_count |
389
|
|
|
|
390
|
|
|
# Exponential coeff for selection |
391
|
|
|
self._select_coeff = select_coeff |
392
|
|
|
|
393
|
|
|
# Result of clustering : best chromosome |
394
|
|
|
self._result_clustering = {'best_chromosome': [], |
395
|
|
|
'best_fitness_function': 0.0} |
396
|
|
|
|
397
|
|
|
# Observer |
398
|
|
|
self._observer = observer |
399
|
|
|
|
400
|
|
|
def process(self): |
401
|
|
|
"""! |
402
|
|
|
@brief Perform clustering procedure in line with rule of genetic clustering algorithm. |
403
|
|
|
|
404
|
|
|
@see get_clusters() |
405
|
|
|
|
406
|
|
|
""" |
407
|
|
|
|
408
|
|
|
# Initialize population |
409
|
|
|
chromosomes = self._init_population(self._count_clusters, len(self._data), self._chromosome_count) |
410
|
|
|
|
411
|
|
|
# Initialize the Best solution |
412
|
|
|
best_chromosome, best_ff, first_fitness_functions \ |
413
|
|
|
= self._get_best_chromosome(chromosomes, self._data, self._count_clusters) |
414
|
|
|
|
415
|
|
|
# Save best result into observer |
416
|
|
|
self._observer.collect_global_best(best_chromosome, best_ff) |
417
|
|
|
self._observer.collect_population_best(best_chromosome, best_ff) |
418
|
|
|
self._observer.collect_mean(first_fitness_functions) |
419
|
|
|
|
420
|
|
|
# Next population |
421
|
|
|
for _idx in range(self._population_count): |
|
|
|
|
422
|
|
|
|
423
|
|
|
# Select |
424
|
|
|
chromosomes = self._select(chromosomes, self._data, self._count_clusters, self._select_coeff) |
425
|
|
|
|
426
|
|
|
# Crossover |
427
|
|
|
self._crossover(chromosomes) |
428
|
|
|
|
429
|
|
|
# Mutation |
430
|
|
|
self._mutation(chromosomes, self._count_clusters, self._count_mutation_gens, self._coeff_mutation_count) |
431
|
|
|
|
432
|
|
|
# Update the Best Solution |
433
|
|
|
new_best_chromosome, new_best_ff, fitness_functions \ |
434
|
|
|
= self._get_best_chromosome(chromosomes, self._data, self._count_clusters) |
435
|
|
|
|
436
|
|
|
# Get best chromosome |
437
|
|
|
if new_best_ff < best_ff: |
438
|
|
|
best_ff = new_best_ff |
439
|
|
|
best_chromosome = new_best_chromosome |
440
|
|
|
|
441
|
|
|
# Save best result into observer |
442
|
|
|
self._observer.collect_global_best(best_chromosome, best_ff) |
443
|
|
|
self._observer.collect_population_best(new_best_chromosome, new_best_ff) |
444
|
|
|
self._observer.collect_mean(fitness_functions) |
445
|
|
|
|
446
|
|
|
# Save result |
447
|
|
|
self._result_clustering['best_chromosome'] = best_chromosome |
448
|
|
|
self._result_clustering['best_fitness_function'] = best_ff |
449
|
|
|
|
450
|
|
|
return best_chromosome, best_ff |
451
|
|
|
|
452
|
|
|
|
453
|
|
|
def get_observer(self): |
454
|
|
|
"""! |
455
|
|
|
@brief Returns genetic algorithm observer. |
456
|
|
|
|
457
|
|
|
""" |
458
|
|
|
return self._observer |
459
|
|
|
|
460
|
|
|
|
461
|
|
|
def get_clusters(self): |
462
|
|
|
"""! |
463
|
|
|
@brief Returns list of allocated clusters, each cluster contains indexes of objects from the data. |
464
|
|
|
|
465
|
|
|
@return (list) List of allocated clusters. |
466
|
|
|
|
467
|
|
|
@see process() |
468
|
|
|
|
469
|
|
|
""" |
470
|
|
|
|
471
|
|
|
return ga_math.get_clusters_representation(self._result_clustering['best_chromosome'], self._count_clusters) |
472
|
|
|
|
473
|
|
|
|
474
|
|
|
@staticmethod |
475
|
|
|
def _select(chromosomes, data, count_clusters, select_coeff): |
476
|
|
|
"""! |
477
|
|
|
@brief Performs selection procedure where new chromosomes are calculated. |
478
|
|
|
|
479
|
|
|
@param[in] chromosomes (numpy.array): Chromosomes |
480
|
|
|
|
481
|
|
|
""" |
482
|
|
|
|
483
|
|
|
# Calc centers |
484
|
|
|
centres = ga_math.get_centres(chromosomes, data, count_clusters) |
485
|
|
|
|
486
|
|
|
# Calc fitness functions |
487
|
|
|
fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes) |
488
|
|
|
|
489
|
|
|
for _idx in range(len(fitness)): |
490
|
|
|
fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff) |
491
|
|
|
|
492
|
|
|
# Calc probability vector |
493
|
|
|
probabilities = ga_math.calc_probability_vector(fitness) |
494
|
|
|
|
495
|
|
|
# Select P chromosomes with probabilities |
496
|
|
|
new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int) |
497
|
|
|
|
498
|
|
|
# Selecting |
499
|
|
|
for _idx in range(len(chromosomes)): |
500
|
|
|
new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)] |
501
|
|
|
|
502
|
|
|
return new_chromosomes |
503
|
|
|
|
504
|
|
|
|
505
|
|
|
@staticmethod |
506
|
|
|
def _crossover(chromosomes): |
507
|
|
|
"""! |
508
|
|
|
@brief Crossover procedure. |
509
|
|
|
|
510
|
|
|
""" |
511
|
|
|
|
512
|
|
|
# Get pairs to Crossover |
513
|
|
|
pairs_to_crossover = np.array(range(len(chromosomes))) |
514
|
|
|
|
515
|
|
|
# Set random pairs |
516
|
|
|
np.random.shuffle(pairs_to_crossover) |
517
|
|
|
|
518
|
|
|
# Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P) |
519
|
|
|
offset_in_pair = int(len(pairs_to_crossover) / 2) |
520
|
|
|
|
521
|
|
|
# For each pair |
522
|
|
|
for _idx in range(offset_in_pair): |
523
|
|
|
|
524
|
|
|
# Generate random mask for crossover |
525
|
|
|
crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx])) |
526
|
|
|
|
527
|
|
|
# Crossover a pair |
528
|
|
|
genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]], |
529
|
|
|
chromosomes[pairs_to_crossover[_idx + offset_in_pair]], |
530
|
|
|
crossover_mask) |
531
|
|
|
|
532
|
|
|
|
533
|
|
|
@staticmethod |
534
|
|
|
def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count): |
535
|
|
|
"""! |
536
|
|
|
@brief Mutation procedure. |
537
|
|
|
|
538
|
|
|
""" |
539
|
|
|
|
540
|
|
|
# Count gens in Chromosome |
541
|
|
|
count_gens = len(chromosomes[0]) |
542
|
|
|
|
543
|
|
|
# Get random chromosomes for mutation |
544
|
|
|
random_idx_chromosomes = np.array(range(len(chromosomes))) |
545
|
|
|
np.random.shuffle(random_idx_chromosomes) |
546
|
|
|
|
547
|
|
|
# |
548
|
|
|
for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)): |
549
|
|
|
|
550
|
|
|
# |
551
|
|
|
for _ in range(count_gen_for_mutation): |
552
|
|
|
|
553
|
|
|
# Get random gen |
554
|
|
|
gen_num = np.random.randint(count_gens) |
555
|
|
|
|
556
|
|
|
# Set random cluster |
557
|
|
|
chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters) |
558
|
|
|
|
559
|
|
|
|
560
|
|
|
@staticmethod |
561
|
|
|
def _crossover_a_pair(chromosome_1, chromosome_2, mask): |
562
|
|
|
"""! |
563
|
|
|
@brief Crossovers a pair of chromosomes. |
564
|
|
|
|
565
|
|
|
@param[in] chromosome_1 (numpy.array): The first chromosome for crossover. |
566
|
|
|
@param[in] chromosome_2 (numpy.array): The second chromosome for crossover. |
567
|
|
|
@param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped. |
568
|
|
|
|
569
|
|
|
""" |
570
|
|
|
|
571
|
|
|
for _idx in range(len(chromosome_1)): |
572
|
|
|
|
573
|
|
|
if mask[_idx] == 1: |
574
|
|
|
# Swap values |
575
|
|
|
chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx] |
576
|
|
|
|
577
|
|
|
|
578
|
|
|
@staticmethod |
579
|
|
|
def _get_crossover_mask(mask_length): |
580
|
|
|
"""! |
581
|
|
|
@brief Crossover mask to crossover a pair of chromosomes. |
582
|
|
|
|
583
|
|
|
@param[in] mask_length (uint): Length of the mask. |
584
|
|
|
|
585
|
|
|
""" |
586
|
|
|
|
587
|
|
|
# Initialize mask |
588
|
|
|
mask = np.zeros(mask_length) |
589
|
|
|
|
590
|
|
|
# Set a half of array to 1 |
591
|
|
|
mask[:int(int(mask_length) / 6)] = 1 |
592
|
|
|
|
593
|
|
|
# Random shuffle |
594
|
|
|
np.random.shuffle(mask) |
595
|
|
|
|
596
|
|
|
return mask |
597
|
|
|
|
598
|
|
|
|
599
|
|
|
@staticmethod |
600
|
|
|
def _init_population(count_clusters, count_data, chromosome_count): |
601
|
|
|
"""! |
602
|
|
|
@brief Returns first population as a uniform random choice. |
603
|
|
|
|
604
|
|
|
@param[in] count_clusters (uint): Amount of clusters that should be allocated. |
605
|
|
|
@param[in] count_data (uint): Data size that is used for clustering process. |
606
|
|
|
@param[in] chromosome_count (uint):Amount of chromosome that is used for clustering. |
607
|
|
|
|
608
|
|
|
""" |
609
|
|
|
|
610
|
|
|
population = np.random.randint(count_clusters, size=(chromosome_count, count_data)) |
611
|
|
|
|
612
|
|
|
return population |
613
|
|
|
|
614
|
|
|
|
615
|
|
|
@staticmethod |
616
|
|
|
def _get_best_chromosome(chromosomes, data, count_clusters): |
617
|
|
|
"""! |
618
|
|
|
@brief Returns the current best chromosome. |
619
|
|
|
|
620
|
|
|
@param[in] chromosomes (list): Chromosomes that are used for searching. |
621
|
|
|
@param[in] data (list): Input data that is used for clustering process. |
622
|
|
|
@param[in] count_clusters (uint): Amount of clusters that should be allocated. |
623
|
|
|
|
624
|
|
|
@return (list, float, list) The best chromosome, its fitness function value and fitness function values for |
625
|
|
|
all chromosomes. |
626
|
|
|
|
627
|
|
|
""" |
628
|
|
|
|
629
|
|
|
# Calc centers |
630
|
|
|
centres = ga_math.get_centres(chromosomes, data, count_clusters) |
631
|
|
|
|
632
|
|
|
# Calc Fitness functions |
633
|
|
|
fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes) |
634
|
|
|
|
635
|
|
|
# Index of the best chromosome |
636
|
|
|
best_chromosome_idx = fitness_functions.argmin() |
637
|
|
|
|
638
|
|
|
# Get chromosome with the best fitness function |
639
|
|
|
return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions |
640
|
|
|
|
641
|
|
|
|
642
|
|
|
@staticmethod |
643
|
|
|
def _calc_fitness_function(centres, data, chromosomes): |
644
|
|
|
"""! |
645
|
|
|
@brief Calculate fitness function values for chromosomes. |
646
|
|
|
|
647
|
|
|
@param[in] centres (list): Cluster centers. |
648
|
|
|
@param[in] data (list): Input data that is used for clustering process. |
649
|
|
|
@param[in] chromosomes (list): Chromosomes whose fitness function's values are calculated. |
650
|
|
|
|
651
|
|
|
@return (list) Fitness function value for each chromosome correspondingly. |
652
|
|
|
|
653
|
|
|
""" |
654
|
|
|
|
655
|
|
|
# Get count of chromosomes and clusters |
656
|
|
|
count_chromosome = len(chromosomes) |
657
|
|
|
|
658
|
|
|
# Initialize fitness function values |
659
|
|
|
fitness_function = np.zeros(count_chromosome) |
660
|
|
|
|
661
|
|
|
# Calc fitness function for each chromosome |
662
|
|
|
for _idx_chromosome in range(count_chromosome): |
663
|
|
|
|
664
|
|
|
# Get centers for a selected chromosome |
665
|
|
|
centres_data = np.zeros(data.shape) |
666
|
|
|
|
667
|
|
|
# Fill data centres |
668
|
|
|
for _idx in range(len(data)): |
669
|
|
|
centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]] |
670
|
|
|
|
671
|
|
|
# Get City Block distance for a chromosome |
672
|
|
|
fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data)) |
673
|
|
|
|
674
|
|
|
return fitness_function |
675
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.