Completed
Push — 0.7.dev ( f0d749...68d2d8 )
by
unknown
01:03
created

GeneticAlgorithm._calc_fitness_function()   B

Complexity

Conditions 5

Size

Total Lines 34

Duplication

Lines 34
Ratio 100 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
c 1
b 0
f 0
dl 34
loc 34
rs 8.0894
1
"""!
2
3
@brief Clustering by Genetic Algorithm
4
5
@date 2014-2017
6
@copyright GNU Public License
7
8
@cond GNU_PUBLIC_LICENSE
9
    PyClustering is free software: you can redistribute it and/or modify
10
    it under the terms of the GNU General Public License as published by
11
    the Free Software Foundation, either version 3 of the License, or
12
    (at your option) any later version.
13
14
    PyClustering is distributed in the hope that it will be useful,
15
    but WITHOUT ANY WARRANTY; without even the implied warranty of
16
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
    GNU General Public License for more details.
18
19
    You should have received a copy of the GNU General Public License
20
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
@endcond
22
23
"""
24
25
import numpy as np
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
26
import math
0 ignored issues
show
Unused Code introduced by
The import math seems to be unused.
Loading history...
27
28
from pyclustering.cluster.ga_maths import GAMath
29
30
31
class GeneticAlgorithm:
32
    """!
33
    @brief Class represents Genetic clustering algorithm
34
35
    """
36
37
    def __init__(self, data, count_clusters,  chromosome_count, population_count, count_mutation_gens=2,
38
                 coeff_mutation_count=0.25):
39
40
        # Initialize random
41
        np.random.seed()
42
43
        # Clustering data
44
        if type(data) is list:
45
            self.data = np.array(data)
46
        else:
47
            self.data = data
48
49
        # Count clusters
50
        self.count_clusters = count_clusters
51
52
        # Home many chromosome in population
53
        self.chromosome_count = chromosome_count
54
55
        # How many populations
56
        self.population_count = population_count
57
58
        # Count mutation genes
59
        self.count_mutation_gens = count_mutation_gens
60
61
        # Crossover rate
62
        self.crossover_rate = 1.0
63
64
        # Count of chromosome for mutation (range [0, 1])
65
        self.coeff_mutation_count = coeff_mutation_count
66
67
    def clustering(self):
68
        """
69
70
        :return:
71
        """
72
73
        # Initialize population
74
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
75
76
        # Initialize the Best solution
77
        best_chromosome, best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
78
79
        # Next population
80
        for _ in range(self.population_count):
81
82
            # Select
83
            chromosomes = self._select(chromosomes, self.data, self.count_clusters)
84
85
            # Crossover
86
            self._crossover(chromosomes)
87
88
            # Mutation
89
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
90
91
            # Update the Best Solution
92
            new_best_chromosome, new_best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
93
94
            # Get best chromosome
95
            if new_best_ff < best_ff:
96
                best_ff = new_best_ff
97
                best_chromosome = new_best_chromosome
98
99
        return best_chromosome, best_ff
100
101
    @staticmethod
102
    def _select(chromosomes, data, count_clusters):
103
        """  """
104
105
        # Calc centers
106
        centres = GAMath.get_centres(chromosomes, data, count_clusters)
107
108
        # Calc fitness functions
109
        fitness = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)
110
111
        # Calc probability vector
112
        probabilities = GAMath.calc_probability_vector(fitness)
113
114
        # Select P chromosomes with probabilities
115
        new_chromosomes = np.zeros(chromosomes.shape)
116
117
        # Selecting
118
        for _idx in range(len(chromosomes)):
119
            new_chromosomes[_idx] = chromosomes[GAMath.get_uniform(probabilities)]
120
121
        return new_chromosomes
122
123 View Code Duplication
    @staticmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
124
    def _crossover(chromosomes):
125
        """  """
126
127
        # Get pairs to Crossover
128
        pairs_to_crossover = np.array(range(len(chromosomes)))
129
130
        # Set random pairs
131
        np.random.shuffle(pairs_to_crossover)
132
133
        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
134
        offset_in_pair = int(len(pairs_to_crossover) / 2)
135
136
        # For each pair
137
        for _idx in range(offset_in_pair):
138
139
            # Generate random mask for crossover
140
            crossover_mask = GeneticAlgorithm._get_crossover_mask(len(chromosomes[_idx]))
141
142
            # Crossover a pair
143
            GeneticAlgorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
144
                                               chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
145
                                               crossover_mask)
146
147 View Code Duplication
    @staticmethod
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
148
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
149
        """  """
150
151
        # Count gens in Chromosome
152
        count_gens = len(chromosomes[0])
153
154
        # Get random chromosomes for mutation
155
        random_idx_chromosomes = np.array(range(len(chromosomes)))
156
        np.random.shuffle(random_idx_chromosomes)
157
158
        #
159
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
160
161
            #
162
            for _ in range(count_gen_for_mutation):
163
164
                # Get random gen
165
                gen_num = np.random.randint(count_gens)
166
167
                # Set random cluster
168
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
169
170
    @staticmethod
171
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
172
        """  """
173
174
        for _idx in range(len(chromosome_1)):
175
176
            if mask[_idx] == 1:
177
                # Swap values
178
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
179
180
    @staticmethod
181
    def _get_crossover_mask(mask_length):
182
        """  """
183
184
        # Initialize mask
185
        mask = np.zeros(mask_length)
186
187
        # Set a half of array to 1
188
        mask[:int(int(mask_length) / 6)] = 1
189
190
        # Random shuffle
191
        np.random.shuffle(mask)
192
193
        return mask
194
195
    @staticmethod
196
    def _init_population(count_clusters, count_data, chromosome_count):
197
        """ Returns first population as a uniform random choice """
198
199
        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
200
201
        return population
202
203
    @staticmethod
204
    def _get_best_chromosome(chromosomes, data, count_clusters):
205
        """  """
206
207
        # Calc centers
208
        centres = GAMath.get_centres(chromosomes, data, count_clusters)
209
210
        # Calc Fitness functions
211
        fitness_function = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes)
212
213
        # Index of the best chromosome
214
        best_chromosome_idx = fitness_function.argmin()
215
216
        # Get chromosome with the best fitness function
217
        return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx]
218
219 View Code Duplication
    @staticmethod
1 ignored issue
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
220
    def _calc_fitness_function(centres, data, chromosomes):
221
        """  """
222
223
        # Get count of chromosomes and clusters
224
        count_chromosome = len(chromosomes)
225
        count_clusters = len(centres[0])
226
227
        # Initialize fitness function values
228
        fitness_function = np.zeros(count_chromosome)
229
230
        # Calc fitness function for each chromosome
231
        for _idx_chromosome in range(count_chromosome):
232
233
            # Calc fitness function for each cluster in a chromosome
234
            for _idx_cluster in range(count_clusters):
235
236
                # Initialize ff for a cluster
237
                ff_cluster = 0
238
239
                # For each input points
240
                for _idx_data in range(len(data)):
241
242
                    # If a data belong to current cluster
243
                    if chromosomes[_idx_chromosome][_idx_data] == _idx_cluster:
244
245
                        # Calc Manhattan distance
246
                        data_diff = data[_idx_data] - centres[_idx_chromosome][_idx_cluster]
247
                        ff_cluster += abs(data_diff[0]) + abs(data_diff[1])
248
249
                # Accumulate fitness function values
250
                fitness_function[_idx_chromosome] += ff_cluster
251
252
        return fitness_function
253