Completed
Push — 0.7.dev ( bfeadc...20409b )
by Andrei
01:28
created

genetic_algorithm._mutation()   B

Complexity

Conditions 3

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 25
rs 8.8571
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).
4
5
@authors Aleksey Kukushkin ([email protected])
6
@date 2014-2017
7
@copyright GNU Public License
8
9
@cond GNU_PUBLIC_LICENSE
10
    PyClustering is free software: you can redistribute it and/or modify
11
    it under the terms of the GNU General Public License as published by
12
    the Free Software Foundation, either version 3 of the License, or
13
    (at your option) any later version.
14
15
    PyClustering is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    GNU General Public License for more details.
19
20
    You should have received a copy of the GNU General Public License
21
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
@endcond
23
24
"""
25
26
import numpy as np;
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
27
import math;
28
29
from pyclustering.cluster.ga_maths import ga_math;
30
31
32
class genetic_algorithm:
33
    """!
34
    @brief Class represents Genetic clustering algorithm.
35
    @details The searching capability of genetic algorithms is exploited in order to search for appropriate cluster centres.
36
37
    """
38
39
    def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
40
                 coeff_mutation_count=0.25, select_coeff=1.0):
41
        """!
42
        @brief Initialize genetic clustering algorithm for cluster analysis.
43
        
44
        @param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array where each row is a point,
45
                    for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
46
        @param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
47
        @param[in] chromosome_count (uint): Amount of chromosomes in each population.
48
        @param[in] population_count (uint): Amount of populations.
49
        @param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
50
        @param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
51
                    thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
52
        @param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows: math.exp(1 + fitness(chromosome) * select_coeff).
53
        
54
        """
55
        
56
        # Initialize random
57
        np.random.seed()
58
59
        # Clustering data
60
        if type(data) is list:
61
            self.data = np.array(data)
62
        else:
63
            self.data = data
64
65
        # Count clusters
66
        self.count_clusters = count_clusters
67
68
        # Home many chromosome in population
69
        self.chromosome_count = chromosome_count
70
71
        # How many populations
72
        self.population_count = population_count
73
74
        # Count mutation genes
75
        self.count_mutation_gens = count_mutation_gens
76
77
        # Crossover rate
78
        self.crossover_rate = 1.0
79
80
        # Count of chromosome for mutation (range [0, 1])
81
        self.coeff_mutation_count = coeff_mutation_count
82
83
        # Exponential coeff for selection
84
        self.select_coeff = select_coeff
85
86
87
    def process(self):
88
        """!
89
        @brief Perform clustering procedure in line with rule of genetic clustering algorithm.
90
        
91
        @see get_clusters()
92
        
93
        """
94
95
        # Initialize population
96
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
97
98
        # Initialize the Best solution
99
        best_chromosome, best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
100
101
        # Next population
102
        for _ in range(self.population_count):
103
104
            # Select
105
            chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)
106
107
            # Crossover
108
            self._crossover(chromosomes)
109
110
            # Mutation
111
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
112
113
            # Update the Best Solution
114
            new_best_chromosome, new_best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
115
116
            # Get best chromosome
117
            if new_best_ff < best_ff:
118
                best_ff = new_best_ff
119
                best_chromosome = new_best_chromosome
120
121
        return best_chromosome, best_ff
122
123 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
124
    def get_clusters(self):
125
        """!
126
        @brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
127
        
128
        @return (list) List of allocated clusters.
129
        
130
        @see process()
131
        
132
        """
133
        
134
        raise NameError("Implementation is require.");
135
136
137
    @staticmethod
138
    def _select(chromosomes, data, count_clusters, select_coeff):
139
        """!
140
        @brief Performs selection procedure where new chromosomes are calculated.
141
        
142
        @param[in] chromosomes (numpy.array): Chromosomes 
143
        
144
        """
145
146
        # Calc centers
147 View Code Duplication
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
148
149
        # Calc fitness functions
150
        fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
151
152
        for _idx in range(len(fitness)):
153
            fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)
154
155
        # Calc probability vector
156
        probabilities = ga_math.calc_probability_vector(fitness)
157
158
        # Select P chromosomes with probabilities
159
        new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)
160
161
        # Selecting
162
        for _idx in range(len(chromosomes)):
163
            new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]
164
165
        return new_chromosomes
166
167
168
    @staticmethod
169
    def _crossover(chromosomes):
170
        """!
171
        @brief Crossover procedure.
172
        
173
        """
174
175
        # Get pairs to Crossover
176
        pairs_to_crossover = np.array(range(len(chromosomes)))
177
178
        # Set random pairs
179
        np.random.shuffle(pairs_to_crossover)
180
181
        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
182
        offset_in_pair = int(len(pairs_to_crossover) / 2)
183
184
        # For each pair
185
        for _idx in range(offset_in_pair):
186
187
            # Generate random mask for crossover
188
            crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))
189
190
            # Crossover a pair
191
            genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
192
                                               chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
193
                                               crossover_mask)
194
195
    @staticmethod
196
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
197
        """!
198
        @brief Mutation procedure.
199
        
200
        """
201
202
        # Count gens in Chromosome
203
        count_gens = len(chromosomes[0])
204
205
        # Get random chromosomes for mutation
206
        random_idx_chromosomes = np.array(range(len(chromosomes)))
207
        np.random.shuffle(random_idx_chromosomes)
208
209
        #
210
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
211
212
            #
213
            for _ in range(count_gen_for_mutation):
214
215
                # Get random gen
216
                gen_num = np.random.randint(count_gens)
217
218
                # Set random cluster
219
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
220
221
222
    @staticmethod
223
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
224
        """!
225
        @brief Crossovers a pair of chromosomes.
226
        
227
        @param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
228
        @param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
229
        @param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
230
        
231
        """
232
233
        for _idx in range(len(chromosome_1)):
234
235
            if mask[_idx] == 1:
236
                # Swap values
237
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
238
239
240
    @staticmethod
241
    def _get_crossover_mask(mask_length):
242
        """!
243
        @brief Crossover mask to crossover a pair of chromosomes.
244
        
245
        @param[in] mask_length (uint): Length of the mask.
246
        
247
        """
248
249
        # Initialize mask
250
        mask = np.zeros(mask_length)
251
252
        # Set a half of array to 1
253
        mask[:int(int(mask_length) / 6)] = 1
254
255
        # Random shuffle
256
        np.random.shuffle(mask)
257
258
        return mask
259
260
261
    @staticmethod
262
    def _init_population(count_clusters, count_data, chromosome_count):
263
        """!
264
        @brief Returns first population as a uniform random choice.
265
        
266
        @param[in] count_clusters (uint):
267
        @param[in] count_data (uint):
268
        @param[in] chromosome_count (uint): 
269
        
270
        """
271
272
        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
273
274
        return population
275
276
277
    @staticmethod
278
    def _get_best_chromosome(chromosomes, data, count_clusters):
279
        """!
280
        @brief 
281
        
282
        """
283
284
        # Calc centers
285
        centres = ga_math.get_centres(chromosomes, data, count_clusters)
286
287
        # Calc Fitness functions
288
        fitness_function = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
289
290
        # Index of the best chromosome
291
        best_chromosome_idx = fitness_function.argmin()
292
293
        # Get chromosome with the best fitness function
294
        return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]
295
296
297
    @staticmethod
298
    def _calc_fitness_function(centres, data, chromosomes):
299
        """!
300
        @brief 
301
        
302
        """
303
304
        # Get count of chromosomes and clusters
305
        count_chromosome = len(chromosomes)
306
307
        # Initialize fitness function values
308
        fitness_function = np.zeros(count_chromosome)
309
310
        # Calc fitness function for each chromosome
311
        for _idx_chromosome in range(count_chromosome):
312
313
            # Get centers for a selected chromosome
314
            centres_data = np.zeros(data.shape)
315
316
            # Fill data centres
317
            for _idx in range(len(data)):
318
                centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]
319
320
            # Get City Block distance for a chromosome
321
            fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))
322
323
        return fitness_function
324