Completed
Push — 0.7.dev ( 941fee...d0e462 )
by
unknown
01:02
created

GeneticAlgorithm.get_crossover_mask()   A

Complexity

Conditions 1

Size

Total Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
dl 0
loc 14
rs 9.4285
c 1
b 0
f 0
1
"""!
2
3
@brief Clustering by Genetic Algorithm
4
5
@date 2014-2017
6
@copyright GNU Public License
7
8
@cond GNU_PUBLIC_LICENSE
9
    PyClustering is free software: you can redistribute it and/or modify
10
    it under the terms of the GNU General Public License as published by
11
    the Free Software Foundation, either version 3 of the License, or
12
    (at your option) any later version.
13
14
    PyClustering is distributed in the hope that it will be useful,
15
    but WITHOUT ANY WARRANTY; without even the implied warranty of
16
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
    GNU General Public License for more details.
18
19
    You should have received a copy of the GNU General Public License
20
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
@endcond
22
23
"""
24
25
import numpy as np
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
26
27
28
class GeneticAlgorithm:
29
    """!
30
    @brief Class represents Genetic clustering algorithm
31
32
    """
33
34
    def __init__(self, data, count_clusters,  chromosome_count, population_count, count_mutation_gens=2):
35
36
        # Initialize random
37
        np.random.seed()
38
39
        # Clustering data
40
        self.data = data
41
42
        # Count clusters
43
        self.count_clusters = count_clusters
44
45
        # Home many chromosome in population
46
        self.chromosome_count = chromosome_count
47
48
        # How many populations
49
        self.population_count = population_count
50
51
        # Count mutation genes
52
        self.count_mutation_gens = count_mutation_gens
53
54
    def clustering(self):
55
        """
56
57
        :return:
58
        """
59
60
        # Initialize population
61
        chromosomes = self.init_population(self.count_clusters, len(self.data), self.chromosome_count)
62
63
        # Initialize the Best solution
64
        best_chromosome, best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)
65
66
        # Next population
67
        for _ in range(self.population_count):
68
            pass
69
70
            # Select
71
            chromosomes = self.select(chromosomes, self.data, self.count_clusters)
72
73
            # Crossover
74
            self.crossover(chromosomes)
75
76
            # Mutation
77
            self.mutation(chromosomes, self.count_clusters, self.count_mutation_gens)
78
79
            # Update the Best Solution
80
            new_best_chromosome, new_best_ff = self.get_best_chromosome(chromosomes, self.data, self.count_clusters)
81
82
            print('new best_chromosome : ', new_best_chromosome)
83
            print('new best_ff : ', new_best_ff)
84
85
            if new_best_ff < best_ff:
86
                best_ff = new_best_ff
87
                best_chromosome = new_best_chromosome
88
89
90
        print('best_chromosome : ', best_chromosome)
91
        print('best_ff : ', best_ff)
92
93
    @staticmethod
94
    def mutation(chromosomes, count_clusters, count_gen_for_mutation):
95
        """  """
96
97
        # Count gens in Chromosome
98
        count_gens = len(chromosomes[0])
99
100
        #
101
        for _idx_chromosome in range(len(chromosomes)):
102
103
            #
104
            for _ in range(count_gen_for_mutation):
105
106
                # Get random gen
107
                gen_num = np.random.randint(count_gens)
108
109
                # Set random cluster
110
                chromosomes[_idx_chromosome][gen_num] = np.random.randint(count_clusters)
111
112
    @staticmethod
113
    def crossover(chromosomes):
114
        """  """
115
116
        # Get pairs to Crossover
117
        pairs_to_crossover = np.array(range(len(chromosomes)))
118
119
        # Set random pairs
120
        np.random.shuffle(pairs_to_crossover)
121
122
        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
123
        offset_in_pair = int(len(pairs_to_crossover) / 2)
124
125
        # For each pair
126
        for _idx in range(offset_in_pair):
127
128
            # Generate random mask for crossover
129
            crossover_mask = GeneticAlgorithm.get_crossover_mask(len(chromosomes[_idx]))
130
131
            # Crossover a pair
132
            GeneticAlgorithm.crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
133
                                              chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
134
                                              crossover_mask)
135
136
    @staticmethod
137
    def crossover_a_pair(chromosome_1, chromosome_2, mask):
138
        """  """
139
140
        for _idx in range(len(chromosome_1)):
141
142
            if mask[_idx] == 1:
143
                # Swap values
144
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
145
146
    @staticmethod
147
    def get_crossover_mask(mask_length):
148
        """  """
149
150
        # Initialize mask
151
        mask = np.zeros(mask_length)
152
153
        # Set a half of array to 1
154
        mask[:int(int(mask_length) / 2)] = 1
155
156
        # Random shuffle
157
        np.random.shuffle(mask)
158
159
        return mask
160
161
    @staticmethod
162
    def select(chromosomes, data, count_clusters):
163
        """  """
164
165
        # Calc centers
166
        centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)
167
168
        # Calc fitness functions
169
        fitness = GeneticAlgorithm.calc_fitness_function(centres, data)
170
171
        # Calc probability vector
172
        probabilities = GeneticAlgorithm.calc_probability_vector(fitness)
173
174
        # Select P chromosomes with probabilities
175
        new_chromosomes = np.zeros(chromosomes.shape)
176
177
        # Selecting
178
        for _idx in range(len(chromosomes)):
179
            new_chromosomes[_idx] = chromosomes[GeneticAlgorithm.get_uniform(probabilities)]
180
181
        return new_chromosomes
182
183
    @staticmethod
184
    def set_last_value_to_one(probabilities):
185
        """!
186
        @brief Update the last same probabilities to one.
187
        @details All values of probability list equals to the last element are set to 1.
188
        """
189
190
        # Start from the last elem
191
        back_idx = - 1
192
193
        # All values equal to the last elem should be set to 1
194
        last_val = probabilities[back_idx]
195
196
        # for all elements or if a elem not equal to the last elem
197
        for _idx in range(-1, -len(probabilities) - 1):
0 ignored issues
show
Unused Code introduced by
The variable _idx seems to be unused.
Loading history...
198
            if probabilities[back_idx] == last_val:
199
                probabilities[back_idx] = 1
200
            else:
201
                break
202
203
    @staticmethod
204
    def get_uniform(probabilities):
205
        """!
206
        @brief Returns index in probabilities.
207
208
        @param[in] probabilities (list): List with segments in increasing sequence with val in [0, 1],
209
                   for example, [0 0.1 0.2 0.3 1.0].
210
        """
211
212
        # Initialize return value
213
        res_idx = None
214
215
        # Get random num in range [0, 1)
216
        random_num = np.random.rand()
217
218
        # Find segment with  val1 < random_num < val2
219
        for _idx in range(len(probabilities)):
220
            if random_num < probabilities[_idx]:
221
                res_idx = _idx
222
                break
223
224
        if res_idx is None:
225
            raise AttributeError("'probabilities' should contain 1 as the end of last segment(s)")
226
227
        return res_idx
228
229
    @staticmethod
230
    def get_chromosome_by_probability(probabilities):
231
        """ """
232
233
        # Initialize return value
234
        res_idx = None
235
236
        # Get uniform random in [0, 1)
237
        random_num = np.random.rand()
238
239
        # Find element with  val1 < random_num < val2
240
        for _idx in range(len(probabilities)):
241
            if random_num < probabilities[_idx]:
242
                res_idx = _idx
243
                break
244
245
        if res_idx is None:
246
            raise AttributeError("List 'probabilities' should contain 1 as the end of last segment(s)")
247
248
        return res_idx
249
250 View Code Duplication
    @staticmethod
1 ignored issue
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
251
    def calc_probability_vector(fitness):
252
        """  """
253
254
        if len(fitness) == 0:
255
            raise AttributeError("Has no any fitness functions.")
256
257
        # Initialize vector
258
        prob = np.zeros(len(fitness))
259
260
        # Get min element
261
        min_elem = np.min(fitness)
262
263
        # Initialize first element
264
        prob[0] = fitness[0] - min_elem
265
266
        # Accumulate values in probability vector
267
        for _idx in range(1, len(fitness)):
268
            prob[_idx] = prob[_idx - 1] + fitness[_idx] - min_elem
269
270
        # Normalize
271
        prob /= np.sum(fitness - min_elem)
272
273
        return prob
274
275
    @staticmethod
276
    def init_population(count_clusters, count_data, chromosome_count):
277
        """ Returns first population as a uniform random choice """
278
279
        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
280
281
        return population
282
283
    @staticmethod
284
    def get_best_chromosome(chromosomes, data, count_clusters):
285
        """  """
286
287
        # Calc centers
288
        centres = GeneticAlgorithm.get_centres(chromosomes, data, count_clusters)
289
290
        # Calc Fitness functions
291
        fitness_function = GeneticAlgorithm.calc_fitness_function(centres, data)
292
293
        # Index of the best chromosome
294
        best_chromosome_idx = fitness_function.argmin()
295
296
        # Get chromosome with the best fitness function
297
        return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]
298
299 View Code Duplication
    @staticmethod
1 ignored issue
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
300
    def calc_fitness_function(centres, data):
301
        """  """
302
303
        # Get count of chromosomes and clusters
304
        count_chromosome = len(centres)
305
        count_clusters = len(centres[0])
306
307
        # Initialize fitness function values
308
        fitness_function = np.zeros(count_chromosome)
309
310
        # Calc fitness function for each chromosome
311
        for _idx_chromosome in range(count_chromosome):
312
313
            # Calc for each cluster in a chromosome
314
            for _idx_center in range(count_clusters):
315
                fitness_function[_idx_chromosome] += np.linalg.norm(data - centres[_idx_chromosome][_idx_center])
316
317
            # Normalize fitness function
318
            fitness_function[_idx_chromosome] /= count_clusters
319
320
        return fitness_function
321
322
    @staticmethod
323
    def get_centres(chromosomes, data, count_clusters):
324
        """ """
325
326
        # Initialize centres
327
        centres = np.zeros((len(chromosomes), count_clusters, len(data[0])))
328
329
        # Calc centers for next chromosome
330
        for _idx in range(len(chromosomes)):
331
            centres[_idx] = GeneticAlgorithm.calc_centers_for_chromosome(chromosomes[_idx], data, count_clusters)
332
333
        return centres
334
335
    @staticmethod
336
    def calc_centers_for_chromosome(chromosome, data, count_clusters):
337
        """ """
338
339
        # Initialize centers
340
        centers = np.zeros((count_clusters, len(data[0])))
341
342
        # Next cluster
343
        for _idx_cluster in range(count_clusters):
344
            centers[_idx_cluster] = GeneticAlgorithm.calc_the_center(chromosome, data, _idx_cluster)
345
346
        return centers
347
348
    @staticmethod
349
    def calc_the_center(chromosome, data, cluster_num):
350
        """ """
351
352
        # Initialize center
353
        center = np.zeros(len(data[0]))
354
355
        # Get count data in clusters
356
        count_data_in_cluster = np.sum(chromosome)
357
358
        # If has no data in cluster
359
        if count_data_in_cluster == 0:
360
            return center
361
362
        # Next data point
363
        for _idx in range(len(chromosome)):
364
365
            # If data associated with current cluster
366
            if chromosome[_idx] == cluster_num:
367
                center += data[_idx]
368
369
        # Normalize center
370
        center /= count_data_in_cluster
371
372
        return center
373
374
375
# --------------------------  Unit tests  -----------------------------------
376
377
# # Count Clusters and Data points
378
# COUNT_CHROMOSOMES = 4
379
# COUNT_CLUSTERS = 4
380
# COUNT_DATA_POINTS = 10
381
# DATA_DIMENSION = 2
382
#
383
# # Chromosome for test
384
# test_chromosomes = np.random.randint(COUNT_CLUSTERS, size=(COUNT_CHROMOSOMES, COUNT_DATA_POINTS))
385
#
386
# # Data points
387
# test_data = np.random.rand(COUNT_DATA_POINTS, DATA_DIMENSION)
388
#
389
# # Current cluster
390
# test_cluster_num = 2
391
#
392
# test_center = GeneticAlgorithm.get_centres(test_chromosomes, test_data, test_cluster_num)
393
#
394
# test_fitness = GeneticAlgorithm.calc_fitness_function(test_center, test_data)
395
#
396
# print('chromosome : ', test_chromosomes)
397
# print('data : ', test_data)
398
# print('center : ', test_center)
399
#
400
# print('center shape : ', test_center.shape)
401
#
402
# print('subtract : ', test_data - test_center[0][0])
403
#
404
# print('test_fitness : ', test_fitness)
405
#
406
# a = np.zeros(10)
407
# a[0] = -1
408
# a[1] = -1
409
#
410
# print('test_fitness min: ', GeneticAlgorithm.get_best_chromosome(test_chromosomes, test_data, COUNT_CLUSTERS))
411
#
412
# GeneticAlgorithm.crossover(test_chromosomes)
413
414
415
COUNT_CHROMOSOMES = 20
416
COUNT_CLUSTERS = 4
417
COUNT_POPULATIONS = 20
418
COUNT_MUTATIONS_GEN = 2
419
420
data_set_1 = []
421
data_set_1.extend([[0, 0], [1, 0], [0, 1], [1, 1]])
422
data_set_1.extend([[5, 0], [6, 0], [5, 1], [6, 1]])
423
data_set_1.extend([[0, 5], [1, 5], [0, 6], [1, 6]])
424
data_set_1.extend([[4, 4], [7, 4], [4, 7], [7, 7]])
425
426
test_data_2 = np.array(data_set_1)
427
428
GeneticAlgorithm(data=test_data_2,
429
                 count_clusters=COUNT_CLUSTERS,
430
                 chromosome_count=COUNT_CHROMOSOMES,
431
                 population_count=COUNT_POPULATIONS,
432
                 count_mutation_gens=COUNT_MUTATIONS_GEN).clustering()
433