ga_observer.collect_population_best() - Code Metrics - Inspection of "#360: Genetic algorithm visualizer, examples." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — 0.7.dev ( 5de18e...94d991 )

by Andrei

created 2017-09-14 21:03 UTC

ga_observer.collect_population_best() A

↳ Parent: ga_observer

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
dl	0
loc	8
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).

@authors Andrey Novikov, Aleksey Kukushkin ([email protected])
@date 2014-2017
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy as np;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import math;

import matplotlib.pyplot as plt;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import matplotlib.animation as animation;
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pyclustering.cluster import cluster_visualizer;
from pyclustering.cluster.ga_maths import ga_math;



class ga_observer:
    """!

    """

    def __init__(self, need_global_best=False, need_population_best=False, need_mean_ff=False):
        """ """

        # Global best chromosome and fitness function for each population
        self._global_best_result = {'chromosome': [], 'fitness_function': []};

        # Best chromosome and fitness function on a population
        self._best_population_result = {'chromosome': [], 'fitness_function': []};

        # Mean fitness function on each population
        self._mean_ff_result = [];

        # Flags to collect
        self._need_global_best = need_global_best;
        self._need_population_best = need_population_best;
        self._need_mean_ff = need_mean_ff;


    def __len__(self):
        return len(self._global_best_result['chromosome']);


    def collect_global_best(self, best_chromosome, best_fitness_function):
        """ """

        if not self._need_global_best:
            return;

        self._global_best_result['chromosome'].append(best_chromosome);
        self._global_best_result['fitness_function'].append(best_fitness_function);


    def collect_population_best(self, best_chromosome, best_fitness_function):
        """ """

        if not self._need_population_best:
            return;

        self._best_population_result['chromosome'].append(best_chromosome);
        self._best_population_result['fitness_function'].append(best_fitness_function);


    def collect_mean(self, fitness_functions):
        """ """

        if not self._need_mean_ff:
            return;

        self._mean_ff_result.append(np.mean(fitness_functions));


    def get_global_best(self):
        return self._global_best_result;


    def get_population_best(self):
        return self._best_population_result;


    def get_mean_fitness_function(self):
        return self._mean_ff_result;



class ga_visualizer:
    @staticmethod
    def show_evolution(observer, start_iteration = 0, stop_iteration = None, ax = None, display = True):
        if (ax is None):
            _, ax = plt.subplots(1);
            ax.set_title("Evolution");
        
        if (stop_iteration is None):
            stop_iteration = len(observer);
        
        line_best, = ax.plot(observer.get_global_best()['fitness_function'][start_iteration:stop_iteration], 'r');
        line_current, = ax.plot(observer.get_population_best()['fitness_function'][start_iteration:stop_iteration], 'k');
        line_mean, = ax.plot(observer.get_mean_fitness_function()[start_iteration:stop_iteration], 'c');

        ax.set_xlabel("Iteration");
        ax.set_ylabel("Fitness function");
        ax.legend([line_best, line_current, line_mean], ["The best pop.", "Cur. best pop.", "Average"], prop={'size': 10});

        ax.grid();

        print(start_iteration, stop_iteration);
        if (display is True):
            plt.show();


    @staticmethod
    def show_clusters(data, observer, marker = '.', markersize = None):
        figure = plt.figure();
        ax1 = figure.add_subplot(121);
        
        clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][-1]);
        
        visualizer = cluster_visualizer(1, 2);
        visualizer.append_clusters(clusters, data, 0, marker, markersize);
        visualizer.show(figure, display = False);
        
        ga_visualizer.show_evolution(observer, 0, None, ax1, True);


    @staticmethod
    def animate_cluster_allocation(data, observer, animation_velocity = 75, save_movie = None):
        figure = plt.figure();

        
        def init_frame():
            return frame_generation(0);

        def frame_generation(index_iteration):
            figure.clf();
            
            figure.suptitle("Clustering genetic algorithm (iteration: " + str(index_iteration) +")", fontsize = 20, fontweight = 'bold');
            
            visualizer = cluster_visualizer(4, 2, ["Population #" + str(index_iteration), "The best population"]);
            
            local_minimum_clusters = ga_math.get_clusters_representation(observer.get_population_best()['chromosome'][index_iteration]);
            visualizer.append_clusters(local_minimum_clusters, data, 0);
            
            global_minimum_clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][index_iteration]);
            visualizer.append_clusters(global_minimum_clusters, data, 1);
            
            ax1 = plt.subplot2grid((2, 2), (1, 0), colspan = 2);
            ga_visualizer.show_evolution(observer, 0, index_iteration + 1, ax1, False);
            
            visualizer.show(figure, shift = 0, display = False);
            figure.subplots_adjust(top = 0.85);
            
            return [ figure.gca() ];
        
        iterations = len(observer);
        cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000);

        if (save_movie is not None):
#             plt.rcParams['animation.ffmpeg_path'] = 'D:\\Program Files\\ffmpeg-3.3.1-win64-static\\bin\\ffmpeg.exe';
#             ffmpeg_writer = animation.FFMpegWriter(fps = 15);
#             cluster_animation.save(save_movie, writer = ffmpeg_writer);
            cluster_animation.save(save_movie, writer = 'ffmpeg', fps = 15, bitrate = 1500);
        else:
            plt.show();


class genetic_algorithm:
    """!
    @brief Class represents Genetic clustering algorithm.
    @details The searching capability of genetic algorithms is exploited in order to search for appropriate
             cluster centres.

    """

    def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
                 coeff_mutation_count=0.25, select_coeff=1.0, observer=ga_observer()):
        """!
        @brief Initialize genetic clustering algorithm for cluster analysis.
        
        @param[in] data (numpy.array|list): Input data for clustering that is represented by two dimensional array
                    where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
        @param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
        @param[in] chromosome_count (uint): Amount of chromosomes in each population.
        @param[in] population_count (uint): Amount of populations.
        @param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
        @param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
                    thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
        @param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
                   math.exp(1 + fitness(chromosome) * select_coeff).
        
        """
        
        # Initialize random
        np.random.seed()

        # Clustering data
        if type(data) is list:
            self.data = np.array(data)
        else:
            self.data = data

        # Count clusters
        self.count_clusters = count_clusters

        # Home many chromosome in population
        self.chromosome_count = chromosome_count

        # How many populations
        self.population_count = population_count

        # Count mutation genes
        self.count_mutation_gens = count_mutation_gens

        # Crossover rate
        self.crossover_rate = 1.0

        # Count of chromosome for mutation (range [0, 1])
        self.coeff_mutation_count = coeff_mutation_count

        # Exponential coeff for selection
        self.select_coeff = select_coeff

        # Result of clustering : best chromosome
        self.result_clustering = {'best_chromosome': [],
                                  'best_fitness_function': 0.0}

        # Observer
        self.observer = observer

    def process(self):
        """!
        @brief Perform clustering procedure in line with rule of genetic clustering algorithm.
        
        @see get_clusters()
        
        """

        # Initialize population
        chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)

        # Initialize the Best solution
        best_chromosome, best_ff, first_fitness_functions \
            = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

        # Save best result into observer
        self.observer.collect_global_best(best_chromosome, best_ff)
        self.observer.collect_population_best(best_chromosome, best_ff)
        self.observer.collect_mean(first_fitness_functions)

        # Next population
        for _idx in range(self.population_count):


            # Select
            chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)

            # Crossover
            self._crossover(chromosomes)

            # Mutation
            self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)

            # Update the Best Solution
            new_best_chromosome, new_best_ff, fitness_functions \
                = self._get_best_chromosome(chromosomes, self.data, self.count_clusters)

            # Get best chromosome
            if new_best_ff < best_ff:
                best_ff = new_best_ff
                best_chromosome = new_best_chromosome

            # Save best result into observer
            self.observer.collect_global_best(best_chromosome, best_ff)
            self.observer.collect_population_best(new_best_chromosome, new_best_ff)
            self.observer.collect_mean(fitness_functions)

        # Save result
        self.result_clustering['best_chromosome'] = best_chromosome
        self.result_clustering['best_fitness_function'] = best_ff

        return best_chromosome, best_ff


    def get_observer(self):
        """!
        @brief Returns genetic algorithm observer.
        
        """
        return self.observer


    def get_clusters(self):
        """!
        @brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
        
        @return (list) List of allocated clusters.
        
        @see process()
        
        """

        return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)


    @staticmethod
    def _select(chromosomes, data, count_clusters, select_coeff):
        """!
        @brief Performs selection procedure where new chromosomes are calculated.
        
        @param[in] chromosomes (numpy.array): Chromosomes 
        
        """

        # Calc centers
        centres = ga_math.get_centres(chromosomes, data, count_clusters)

        # Calc fitness functions
        fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)

        for _idx in range(len(fitness)):
            fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)

        # Calc probability vector
        probabilities = ga_math.calc_probability_vector(fitness)

        # Select P chromosomes with probabilities
        new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)

        # Selecting
        for _idx in range(len(chromosomes)):
            new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]

        return new_chromosomes


    @staticmethod
    def _crossover(chromosomes):
        """!
        @brief Crossover procedure.
        
        """

        # Get pairs to Crossover
        pairs_to_crossover = np.array(range(len(chromosomes)))

        # Set random pairs
        np.random.shuffle(pairs_to_crossover)

        # Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. | P1, P2, ...] crossover between V<->P)
        offset_in_pair = int(len(pairs_to_crossover) / 2)

        # For each pair
        for _idx in range(offset_in_pair):

            # Generate random mask for crossover
            crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))

            # Crossover a pair
            genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
                                                chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
                                                crossover_mask)


    @staticmethod
    def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
        """!
        @brief Mutation procedure.
        
        """

        # Count gens in Chromosome
        count_gens = len(chromosomes[0])

        # Get random chromosomes for mutation
        random_idx_chromosomes = np.array(range(len(chromosomes)))
        np.random.shuffle(random_idx_chromosomes)

        #
        for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):

            #
            for _ in range(count_gen_for_mutation):

                # Get random gen
                gen_num = np.random.randint(count_gens)

                # Set random cluster
                chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)


    @staticmethod
    def _crossover_a_pair(chromosome_1, chromosome_2, mask):
        """!
        @brief Crossovers a pair of chromosomes.
        
        @param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
        @param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
        @param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
        
        """

        for _idx in range(len(chromosome_1)):

            if mask[_idx] == 1:
                # Swap values
                chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]


    @staticmethod
    def _get_crossover_mask(mask_length):
        """!
        @brief Crossover mask to crossover a pair of chromosomes.
        
        @param[in] mask_length (uint): Length of the mask.
        
        """

        # Initialize mask
        mask = np.zeros(mask_length)

        # Set a half of array to 1
        mask[:int(int(mask_length) / 6)] = 1

        # Random shuffle
        np.random.shuffle(mask)

        return mask


    @staticmethod
    def _init_population(count_clusters, count_data, chromosome_count):
        """!
        @brief Returns first population as a uniform random choice.
        
        @param[in] count_clusters (uint):
        @param[in] count_data (uint):
        @param[in] chromosome_count (uint): 
        
        """

        population = np.random.randint(count_clusters, size=(chromosome_count, count_data))

        return population


    @staticmethod
    def _get_best_chromosome(chromosomes, data, count_clusters):
        """!
        @brief 
        
        """

        # Calc centers
        centres = ga_math.get_centres(chromosomes, data, count_clusters)

        # Calc Fitness functions
        fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)

        # Index of the best chromosome
        best_chromosome_idx = fitness_functions.argmin()

        # Get chromosome with the best fitness function
        return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions


    @staticmethod
    def _calc_fitness_function(centres, data, chromosomes):
        """!
        @brief 
        
        """

        # Get count of chromosomes and clusters
        count_chromosome = len(chromosomes)

        # Initialize fitness function values
        fitness_function = np.zeros(count_chromosome)

        # Calc fitness function for each chromosome
        for _idx_chromosome in range(count_chromosome):

            # Get centers for a selected chromosome
            centres_data = np.zeros(data.shape)

            # Fill data centres
            for _idx in range(len(data)):
                centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]

            # Get City Block distance for a chromosome
            fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))

        return fitness_function


Push — 0.7.dev ( 5de18e...94d991 )

ga_observer.collect_population_best() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1		"""!
2
3		@brief Cluster analysis algorithm: Genetic clustering algorithm (GA).
4
5		@authors Andrey Novikov, Aleksey Kukushkin ([email protected])
6		@date 2014-2017
7		@copyright GNU Public License
8
9		@cond GNU_PUBLIC_LICENSE
10		PyClustering is free software: you can redistribute it and/or modify
11		it under the terms of the GNU General Public License as published by
12		the Free Software Foundation, either version 3 of the License, or
13		(at your option) any later version.
14
15		PyClustering is distributed in the hope that it will be useful,
16		but WITHOUT ANY WARRANTY; without even the implied warranty of
17		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18		GNU General Public License for more details.
19
20		You should have received a copy of the GNU General Public License
21		along with this program. If not, see <http://www.gnu.org/licenses/>.
22		@endcond
23
24		"""
25
26
27		import numpy as np;
		0 ignored issues – show Configuration introduced 2017-09-08 16:24 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
28		import math;
29
30		import matplotlib.pyplot as plt;
		0 ignored issues – show Configuration introduced 2016-06-21 15:55 UTC by Report Bug Copy Issue Report The import `matplotlib.pyplot` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
31		import matplotlib.animation as animation;
		0 ignored issues – show Configuration introduced 2016-06-21 15:55 UTC by Report Bug Copy Issue Report The import `matplotlib.animation` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
32
33		from pyclustering.cluster import cluster_visualizer;
34		from pyclustering.cluster.ga_maths import ga_math;
35
36
37
38		class ga_observer:
39		"""!
40
41		"""
42
43		def __init__(self, need_global_best=False, need_population_best=False, need_mean_ff=False):
44		""" """
45
46		# Global best chromosome and fitness function for each population
47		self._global_best_result = {'chromosome': [], 'fitness_function': []};
48
49		# Best chromosome and fitness function on a population
50		self._best_population_result = {'chromosome': [], 'fitness_function': []};
51
52		# Mean fitness function on each population
53		self._mean_ff_result = [];
54
55		# Flags to collect
56		self._need_global_best = need_global_best;
57		self._need_population_best = need_population_best;
58		self._need_mean_ff = need_mean_ff;
59
60
61		def __len__(self):
62		return len(self._global_best_result['chromosome']);
63
64
65		def collect_global_best(self, best_chromosome, best_fitness_function):
66		""" """
67
68		if not self._need_global_best:
69		return;
70
71		self._global_best_result['chromosome'].append(best_chromosome);
72		self._global_best_result['fitness_function'].append(best_fitness_function);
73
74
75		def collect_population_best(self, best_chromosome, best_fitness_function):
76		""" """
77
78		if not self._need_population_best:
79		return;
80
81		self._best_population_result['chromosome'].append(best_chromosome);
82		self._best_population_result['fitness_function'].append(best_fitness_function);
83
84
85		def collect_mean(self, fitness_functions):
86		""" """
87
88		if not self._need_mean_ff:
89		return;
90
91		self._mean_ff_result.append(np.mean(fitness_functions));
92
93
94		def get_global_best(self):
95		return self._global_best_result;
96
97
98		def get_population_best(self):
99		return self._best_population_result;
100
101
102		def get_mean_fitness_function(self):
103		return self._mean_ff_result;
104
105
106
107		class ga_visualizer:
108		@staticmethod
109		def show_evolution(observer, start_iteration = 0, stop_iteration = None, ax = None, display = True):
110		if (ax is None):
111		_, ax = plt.subplots(1);
112		ax.set_title("Evolution");
113
114		if (stop_iteration is None):
115		stop_iteration = len(observer);
116
117		line_best, = ax.plot(observer.get_global_best()['fitness_function'][start_iteration:stop_iteration], 'r');
118		line_current, = ax.plot(observer.get_population_best()['fitness_function'][start_iteration:stop_iteration], 'k');
119		line_mean, = ax.plot(observer.get_mean_fitness_function()[start_iteration:stop_iteration], 'c');
120
121		ax.set_xlabel("Iteration");
122		ax.set_ylabel("Fitness function");
123	View Code Duplication	ax.legend([line_best, line_current, line_mean], ["The best pop.", "Cur. best pop.", "Average"], prop={'size': 10});
		0 ignored issues – show Duplication introduced 2017-09-14 21:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
124		ax.grid();
125
126		print(start_iteration, stop_iteration);
127		if (display is True):
128		plt.show();
129
130
131		@staticmethod
132		def show_clusters(data, observer, marker = '.', markersize = None):
133		figure = plt.figure();
134		ax1 = figure.add_subplot(121);
135
136		clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][-1]);
137
138		visualizer = cluster_visualizer(1, 2);
139		visualizer.append_clusters(clusters, data, 0, marker, markersize);
140		visualizer.show(figure, display = False);
141
142		ga_visualizer.show_evolution(observer, 0, None, ax1, True);
143
144
145		@staticmethod
146		def animate_cluster_allocation(data, observer, animation_velocity = 75, save_movie = None):
147	View Code Duplication	figure = plt.figure();
		0 ignored issues – show Duplication introduced 2017-09-14 21:03 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
148
149		def init_frame():
150		return frame_generation(0);
151
152		def frame_generation(index_iteration):
153		figure.clf();
154
155		figure.suptitle("Clustering genetic algorithm (iteration: " + str(index_iteration) +")", fontsize = 20, fontweight = 'bold');
156
157		visualizer = cluster_visualizer(4, 2, ["Population #" + str(index_iteration), "The best population"]);
158
159		local_minimum_clusters = ga_math.get_clusters_representation(observer.get_population_best()['chromosome'][index_iteration]);
160		visualizer.append_clusters(local_minimum_clusters, data, 0);
161
162		global_minimum_clusters = ga_math.get_clusters_representation(observer.get_global_best()['chromosome'][index_iteration]);
163		visualizer.append_clusters(global_minimum_clusters, data, 1);
164
165		ax1 = plt.subplot2grid((2, 2), (1, 0), colspan = 2);
166		ga_visualizer.show_evolution(observer, 0, index_iteration + 1, ax1, False);
167
168		visualizer.show(figure, shift = 0, display = False);
169		figure.subplots_adjust(top = 0.85);
170
171		return [ figure.gca() ];
172
173		iterations = len(observer);
174		cluster_animation = animation.FuncAnimation(figure, frame_generation, iterations, interval = animation_velocity, init_func = init_frame, repeat_delay = 5000);
175
176		if (save_movie is not None):
177		# plt.rcParams['animation.ffmpeg_path'] = 'D:\\Program Files\\ffmpeg-3.3.1-win64-static\\bin\\ffmpeg.exe';
178		# ffmpeg_writer = animation.FFMpegWriter(fps = 15);
179		# cluster_animation.save(save_movie, writer = ffmpeg_writer);
180		cluster_animation.save(save_movie, writer = 'ffmpeg', fps = 15, bitrate = 1500);
181		else:
182		plt.show();
183
184
185		class genetic_algorithm:
186		"""!
187		@brief Class represents Genetic clustering algorithm.
188		@details The searching capability of genetic algorithms is exploited in order to search for appropriate
189		cluster centres.
190
191		"""
192
193		def __init__(self, data, count_clusters, chromosome_count, population_count, count_mutation_gens=2,
194		coeff_mutation_count=0.25, select_coeff=1.0, observer=ga_observer()):
195		"""!
196		@brief Initialize genetic clustering algorithm for cluster analysis.
197
198		@param[in] data (numpy.array\|list): Input data for clustering that is represented by two dimensional array
199		where each row is a point, for example, [[0.0, 2.1], [0.1, 2.0], [-0.2, 2.4]].
200		@param[in] count_clusters (uint): Amount of clusters that should be allocated in the data.
201		@param[in] chromosome_count (uint): Amount of chromosomes in each population.
202		@param[in] population_count (uint): Amount of populations.
203		@param[in] count_mutation_gens (uint): Amount of genes in chromosome that is mutated on each step.
204		@param[in] coeff_mutation_count (float): Percent of chromosomes for mutation, destributed in range (0, 1] and
205		thus amount of chromosomes is defined as follows: 'chromosome_count' * 'coeff_mutation_count'.
206		@param[in] select_coeff (float): Exponential coefficient for selection procedure that is used as follows:
207		math.exp(1 + fitness(chromosome) * select_coeff).
208
209		"""
210
211		# Initialize random
212		np.random.seed()
213
214		# Clustering data
215		if type(data) is list:
216		self.data = np.array(data)
217		else:
218		self.data = data
219
220		# Count clusters
221		self.count_clusters = count_clusters
222
223		# Home many chromosome in population
224		self.chromosome_count = chromosome_count
225
226		# How many populations
227		self.population_count = population_count
228
229		# Count mutation genes
230		self.count_mutation_gens = count_mutation_gens
231
232		# Crossover rate
233		self.crossover_rate = 1.0
234
235		# Count of chromosome for mutation (range [0, 1])
236		self.coeff_mutation_count = coeff_mutation_count
237
238		# Exponential coeff for selection
239		self.select_coeff = select_coeff
240
241		# Result of clustering : best chromosome
242		self.result_clustering = {'best_chromosome': [],
243		'best_fitness_function': 0.0}
244
245		# Observer
246		self.observer = observer
247
248		def process(self):
249		"""!
250		@brief Perform clustering procedure in line with rule of genetic clustering algorithm.
251
252		@see get_clusters()
253
254		"""
255
256		# Initialize population
257		chromosomes = self._init_population(self.count_clusters, len(self.data), self.chromosome_count)
258
259		# Initialize the Best solution
260		best_chromosome, best_ff, first_fitness_functions \
261		= self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
262
263		# Save best result into observer
264		self.observer.collect_global_best(best_chromosome, best_ff)
265		self.observer.collect_population_best(best_chromosome, best_ff)
266		self.observer.collect_mean(first_fitness_functions)
267
268		# Next population
269		for _idx in range(self.population_count):
		0 ignored issues – show Unused Code introduced 2017-09-14 10:15 UTC by Report Bug Copy Issue Report The variable `_idx` seems to be unused. Loading history...
270
271		# Select
272		chromosomes = self._select(chromosomes, self.data, self.count_clusters, self.select_coeff)
273
274		# Crossover
275		self._crossover(chromosomes)
276
277		# Mutation
278		self._mutation(chromosomes, self.count_clusters, self.count_mutation_gens, self.coeff_mutation_count)
279
280		# Update the Best Solution
281		new_best_chromosome, new_best_ff, fitness_functions \
282		= self._get_best_chromosome(chromosomes, self.data, self.count_clusters)
283
284		# Get best chromosome
285		if new_best_ff < best_ff:
286		best_ff = new_best_ff
287		best_chromosome = new_best_chromosome
288
289		# Save best result into observer
290		self.observer.collect_global_best(best_chromosome, best_ff)
291		self.observer.collect_population_best(new_best_chromosome, new_best_ff)
292		self.observer.collect_mean(fitness_functions)
293
294		# Save result
295		self.result_clustering['best_chromosome'] = best_chromosome
296		self.result_clustering['best_fitness_function'] = best_ff
297
298		return best_chromosome, best_ff
299
300
301		def get_observer(self):
302		"""!
303		@brief Returns genetic algorithm observer.
304
305		"""
306		return self.observer
307
308
309		def get_clusters(self):
310		"""!
311		@brief Returns list of allocated clusters, each cluster contains indexes of objects from the data.
312
313		@return (list) List of allocated clusters.
314
315		@see process()
316
317		"""
318
319		return ga_math.get_clusters_representation(self.result_clustering['best_chromosome'], self.count_clusters)
320
321
322		@staticmethod
323		def _select(chromosomes, data, count_clusters, select_coeff):
324		"""!
325		@brief Performs selection procedure where new chromosomes are calculated.
326
327		@param[in] chromosomes (numpy.array): Chromosomes
328
329		"""
330
331		# Calc centers
332		centres = ga_math.get_centres(chromosomes, data, count_clusters)
333
334		# Calc fitness functions
335		fitness = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
336
337		for _idx in range(len(fitness)):
338		fitness[_idx] = math.exp(1 + fitness[_idx] * select_coeff)
339
340		# Calc probability vector
341		probabilities = ga_math.calc_probability_vector(fitness)
342
343		# Select P chromosomes with probabilities
344		new_chromosomes = np.zeros(chromosomes.shape, dtype=np.int)
345
346		# Selecting
347		for _idx in range(len(chromosomes)):
348		new_chromosomes[_idx] = chromosomes[ga_math.get_uniform(probabilities)]
349
350		return new_chromosomes
351
352
353		@staticmethod
354		def _crossover(chromosomes):
355		"""!
356		@brief Crossover procedure.
357
358		"""
359
360		# Get pairs to Crossover
361		pairs_to_crossover = np.array(range(len(chromosomes)))
362
363		# Set random pairs
364		np.random.shuffle(pairs_to_crossover)
365
366		# Index offset ( pairs_to_crossover split into 2 parts : [V1, V2, .. \| P1, P2, ...] crossover between V<->P)
367		offset_in_pair = int(len(pairs_to_crossover) / 2)
368
369		# For each pair
370		for _idx in range(offset_in_pair):
371
372		# Generate random mask for crossover
373		crossover_mask = genetic_algorithm._get_crossover_mask(len(chromosomes[_idx]))
374
375		# Crossover a pair
376		genetic_algorithm._crossover_a_pair(chromosomes[pairs_to_crossover[_idx]],
377		chromosomes[pairs_to_crossover[_idx + offset_in_pair]],
378		crossover_mask)
379
380
381		@staticmethod
382		def _mutation(chromosomes, count_clusters, count_gen_for_mutation, coeff_mutation_count):
383		"""!
384		@brief Mutation procedure.
385
386		"""
387
388		# Count gens in Chromosome
389		count_gens = len(chromosomes[0])
390
391		# Get random chromosomes for mutation
392		random_idx_chromosomes = np.array(range(len(chromosomes)))
393		np.random.shuffle(random_idx_chromosomes)
394
395		#
396		for _idx_chromosome in range(int(len(random_idx_chromosomes) * coeff_mutation_count)):
397
398		#
399		for _ in range(count_gen_for_mutation):
400
401		# Get random gen
402		gen_num = np.random.randint(count_gens)
403
404		# Set random cluster
405		chromosomes[random_idx_chromosomes[_idx_chromosome]][gen_num] = np.random.randint(count_clusters)
406
407
408		@staticmethod
409		def _crossover_a_pair(chromosome_1, chromosome_2, mask):
410		"""!
411		@brief Crossovers a pair of chromosomes.
412
413		@param[in] chromosome_1 (numpy.array): The first chromosome for crossover.
414		@param[in] chromosome_2 (numpy.array): The second chromosome for crossover.
415		@param[in] mask (numpy.array): Crossover mask that defines which genes should be swapped.
416
417		"""
418
419		for _idx in range(len(chromosome_1)):
420
421		if mask[_idx] == 1:
422		# Swap values
423		chromosome_1[_idx], chromosome_2[_idx] = chromosome_2[_idx], chromosome_1[_idx]
424
425
426		@staticmethod
427		def _get_crossover_mask(mask_length):
428		"""!
429		@brief Crossover mask to crossover a pair of chromosomes.
430
431		@param[in] mask_length (uint): Length of the mask.
432
433		"""
434
435		# Initialize mask
436		mask = np.zeros(mask_length)
437
438		# Set a half of array to 1
439		mask[:int(int(mask_length) / 6)] = 1
440
441		# Random shuffle
442		np.random.shuffle(mask)
443
444		return mask
445
446
447		@staticmethod
448		def _init_population(count_clusters, count_data, chromosome_count):
449		"""!
450		@brief Returns first population as a uniform random choice.
451
452		@param[in] count_clusters (uint):
453		@param[in] count_data (uint):
454		@param[in] chromosome_count (uint):
455
456		"""
457
458		population = np.random.randint(count_clusters, size=(chromosome_count, count_data))
459
460		return population
461
462
463		@staticmethod
464		def _get_best_chromosome(chromosomes, data, count_clusters):
465		"""!
466		@brief
467
468		"""
469
470		# Calc centers
471		centres = ga_math.get_centres(chromosomes, data, count_clusters)
472
473		# Calc Fitness functions
474		fitness_functions = genetic_algorithm._calc_fitness_function(centres, data, chromosomes)
475
476		# Index of the best chromosome
477		best_chromosome_idx = fitness_functions.argmin()
478
479		# Get chromosome with the best fitness function
480		return chromosomes[best_chromosome_idx], fitness_functions[best_chromosome_idx], fitness_functions
481
482
483		@staticmethod
484		def _calc_fitness_function(centres, data, chromosomes):
485		"""!
486		@brief
487
488		"""
489
490		# Get count of chromosomes and clusters
491		count_chromosome = len(chromosomes)
492
493		# Initialize fitness function values
494		fitness_function = np.zeros(count_chromosome)
495
496		# Calc fitness function for each chromosome
497		for _idx_chromosome in range(count_chromosome):
498
499		# Get centers for a selected chromosome
500		centres_data = np.zeros(data.shape)
501
502		# Fill data centres
503		for _idx in range(len(data)):
504		centres_data[_idx] = centres[_idx_chromosome][chromosomes[_idx_chromosome][_idx]]
505
506		# Get City Block distance for a chromosome
507		fitness_function[_idx_chromosome] += np.sum(abs(data - centres_data))
508
509		return fitness_function
510

annoviko / pyclustering

Push — 0.7.dev ( 5de18e...94d991 )

ga_observer.collect_population_best() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files