| Total Complexity | 23 |
| Total Lines | 222 |
| Duplicated Lines | 35.59 % |
| Changes | 2 | ||
| Bugs | 0 | Features | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
| 1 | """! |
||
| 31 | class GeneticAlgorithm: |
||
| 32 | """! |
||
| 33 | @brief Class represents Genetic clustering algorithm |
||
| 34 | |||
| 35 | """ |
||
| 36 | |||
| 37 | def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2, |
||
| 38 | coeff_mutation_count=0.25): |
||
| 39 | |||
| 40 | # Initialize random |
||
| 41 | np.random.seed() |
||
| 42 | |||
| 43 | # Clustering data |
||
| 44 | if type(data) is list: |
||
| 45 | self.data = np.array(data) |
||
| 46 | else: |
||
| 47 | self.data = data |
||
| 48 | |||
| 49 | # Count clusters |
||
| 50 | self.count_clusters = count_clusters |
||
| 51 | |||
| 52 | # Home many chromosome in population |
||
| 53 | self.chromosome_count = chromosome_count |
||
| 54 | |||
| 55 | # How many populations |
||
| 56 | self.population_count = population_count |
||
| 57 | |||
| 58 | # Count mutation genes |
||
| 59 | self.count_mutation_gens = count_mutation_gens |
||
| 60 | |||
| 61 | # Crossover rate |
||
| 62 | self.crossover_rate = 1.0 |
||
| 63 | |||
| 64 | # Count of chromosome for mutation (range [0, 1]) |
||
| 65 | self.coeff_mutation_count = coeff_mutation_count |
||
| 66 | |||
| 67 | def clustering(self): |
||
| 68 | """ |
||
| 69 | |||
| 70 | :return: |
||
| 71 | """ |
||
| 72 | |||
| 73 | # Initialize population |
||
| 74 | chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count) |
||
| 75 | |||
| 76 | # Initialize the Best solution |
||
| 77 | best_chromosome, best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters) |
||
| 78 | |||
| 79 | # Next population |
||
| 80 | for _ in range(self.population_count): |
||
| 81 | |||
| 82 | # Select |
||
| 83 | chromosomes = self._select(chromosomes, self.data, self.count_clusters) |
||
| 84 | |||
| 85 | # Crossover |
||
| 86 | self._crossover(chromosomes) |
||
| 87 | |||
| 88 | # Mutation |
||
| 89 | self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count) |
||
| 90 | |||
| 91 | # Update the Best Solution |
||
| 92 | new_best_chromosome, new_best_ff = self._get_best_chromosome(chromosomes, self.data, self.count_clusters) |
||
| 93 | |||
| 94 | # Get best chromosome |
||
| 95 | if new_best_ff < best_ff: |
||
| 96 | best_ff = new_best_ff |
||
| 97 | best_chromosome = new_best_chromosome |
||
| 98 | |||
| 99 | return best_chromosome, best_ff |
||
| 100 | |||
| 101 | @staticmethod |
||
| 102 | def _select(chromosomes, data, count_clusters): |
||
| 103 | """ """ |
||
| 104 | |||
| 105 | # Calc centers |
||
| 106 | centres = GAMath.get_centres(chromosomes, data, count_clusters) |
||
| 107 | |||
| 108 | # Calc fitness functions |
||
| 109 | fitness = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes) |
||
| 110 | |||
| 111 | # Calc probability vector |
||
| 112 | probabilities = GAMath.calc_probability_vector(fitness) |
||
| 113 | |||
| 114 | # Select P chromosomes with probabilities |
||
| 115 | new_chromosomes = np.zeros(chromosomes.shape) |
||
| 116 | |||
| 117 | # Selecting |
||
| 118 | for _idx in range(len(chromosomes)): |
||
| 119 | new_chromosomes[_idx] = chromosomes[GAMath.get_uniform(probabilities)] |
||
| 120 | |||
| 121 | return new_chromosomes |
||
| 122 | |||
| 123 | View Code Duplication | @staticmethod |
|
| 124 | def _crossover(chromosomes): |
||
| 125 | """ """ |
||
| 126 | |||
| 127 | # Get pairs to Crossover |
||
| 128 | pairs_to_crossover = np.array(range(len(chromosomes))) |
||
| 129 | |||
| 130 | # Set random pairs |
||
| 131 | np.random.shuffle(pairs_to_crossover) |
||
| 132 | |||
| 133 | # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P) |
||
| 134 | offset_in_pair = int(len(pairs_to_crossover) / 2) |
||
| 135 | |||
| 136 | # For each pair |
||
| 137 | for _idx in range(offset_in_pair): |
||
| 138 | |||
| 139 | # Generate random mask for crossover |
||
| 140 | crossover_mask = GeneticAlgorithm._get_crossover_mask(len(chromosomes[_idx])) |
||
| 141 | |||
| 142 | # Crossover a pair |
||
| 143 | GeneticAlgorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]], |
||
| 144 | chromosomes[pairs_to_crossover[_idx + offset_in_pair]], |
||
| 145 | crossover_mask) |
||
| 146 | |||
| 147 | View Code Duplication | @staticmethod |
|
| 148 | def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count): |
||
| 149 | """ """ |
||
| 150 | |||
| 151 | # Count gens in Chromosome |
||
| 152 | count_gens = len(chromosomes[0]) |
||
| 153 | |||
| 154 | # Get random chromosomes for mutation |
||
| 155 | random_idx_chromosomes = np.array(range(len(chromosomes))) |
||
| 156 | np.random.shuffle(random_idx_chromosomes) |
||
| 157 | |||
| 158 | # |
||
| 159 | for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)): |
||
| 160 | |||
| 161 | # |
||
| 162 | for _ in range(count_gen_for_mutation): |
||
| 163 | |||
| 164 | # Get random gen |
||
| 165 | gen_num = np.random.randint(count_gens) |
||
| 166 | |||
| 167 | # Set random cluster |
||
| 168 | chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters) |
||
| 169 | |||
| 170 | @staticmethod |
||
| 171 | def _crossover_a_pair(chromosome_1, chromosome_2, mask): |
||
| 172 | """ """ |
||
| 173 | |||
| 174 | for _idx in range(len(chromosome_1)): |
||
| 175 | |||
| 176 | if mask[_idx] == 1: |
||
| 177 | # Swap values |
||
| 178 | chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx] |
||
| 179 | |||
| 180 | @staticmethod |
||
| 181 | def _get_crossover_mask(mask_length): |
||
| 182 | """ """ |
||
| 183 | |||
| 184 | # Initialize mask |
||
| 185 | mask = np.zeros(mask_length) |
||
| 186 | |||
| 187 | # Set a half of array to 1 |
||
| 188 | mask[:int(int(mask_length) / 6)] = 1 |
||
| 189 | |||
| 190 | # Random shuffle |
||
| 191 | np.random.shuffle(mask) |
||
| 192 | |||
| 193 | return mask |
||
| 194 | |||
| 195 | @staticmethod |
||
| 196 | def _init_population(count_clusters, count_data, chromosome_count): |
||
| 197 | """ Returns first population as a uniform random choice """ |
||
| 198 | |||
| 199 | population = np.random.randint(count_clusters, size=(chromosome_count, count_data)) |
||
| 200 | |||
| 201 | return population |
||
| 202 | |||
| 203 | @staticmethod |
||
| 204 | def _get_best_chromosome(chromosomes, data, count_clusters): |
||
| 205 | """ """ |
||
| 206 | |||
| 207 | # Calc centers |
||
| 208 | centres = GAMath.get_centres(chromosomes, data, count_clusters) |
||
| 209 | |||
| 210 | # Calc Fitness functions |
||
| 211 | fitness_function = GeneticAlgorithm._calc_fitness_function(centres, data, chromosomes) |
||
| 212 | |||
| 213 | # Index of the best chromosome |
||
| 214 | best_chromosome_idx = fitness_function.argmin() |
||
| 215 | |||
| 216 | # Get chromosome with the best fitness function |
||
| 217 | return chromosomes[best_chromosome_idx], fitness_function[best_chromosome_idx] |
||
| 218 | |||
| 219 | View Code Duplication | @staticmethod |
|
|
1 ignored issue
–
show
|
|||
| 220 | def _calc_fitness_function(centres, data, chromosomes): |
||
| 221 | """ """ |
||
| 222 | |||
| 223 | # Get count of chromosomes and clusters |
||
| 224 | count_chromosome = len(chromosomes) |
||
| 225 | count_clusters = len(centres[0]) |
||
| 226 | |||
| 227 | # Initialize fitness function values |
||
| 228 | fitness_function = np.zeros(count_chromosome) |
||
| 229 | |||
| 230 | # Calc fitness function for each chromosome |
||
| 231 | for _idx_chromosome in range(count_chromosome): |
||
| 232 | |||
| 233 | # Calc fitness function for each cluster in a chromosome |
||
| 234 | for _idx_cluster in range(count_clusters): |
||
| 235 | |||
| 236 | # Initialize ff for a cluster |
||
| 237 | ff_cluster = 0 |
||
| 238 | |||
| 239 | # For each input points |
||
| 240 | for _idx_data in range(len(data)): |
||
| 241 | |||
| 242 | # If a data belong to current cluster |
||
| 243 | if chromosomes[_idx_chromosome][_idx_data] == _idx_cluster: |
||
| 244 | |||
| 245 | # Calc Manhattan distance |
||
| 246 | data_diff = data[_idx_data] - centres[_idx_chromosome][_idx_cluster] |
||
| 247 | ff_cluster += abs(data_diff[0]) + abs(data_diff[1]) |
||
| 248 | |||
| 249 | # Accumulate fitness function values |
||
| 250 | fitness_function[_idx_chromosome] += ff_cluster |
||
| 251 | |||
| 252 | return fitness_function |
||
| 253 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.