cure.__delete_represented_points() - Code Metrics - Inspection of "#438: numpy.array is supported for KD-tree." - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 74690e...60d876 )

by Andrei

created 2018-05-29 22:38 UTC

cure.__delete_represented_points() A

↳ Parent: cure

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
dl	0
loc	10
rs	9.4285
c	0
b	0
f	0

"""!

@brief Cluster analysis algorithm: CURE
@details Implementation based on paper @cite article::cure::1.

@authors Andrei Novikov ([email protected])
@date 2014-2018
@copyright GNU Public License

@cond GNU_PUBLIC_LICENSE
    PyClustering is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    
    PyClustering is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
@endcond

"""


import numpy
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from pyclustering.cluster.encoder import type_encoding;

from pyclustering.utils import euclidean_distance;

from pyclustering.container.kdtree import kdtree;

from pyclustering.core.wrapper import ccore_library

import pyclustering.core.cure_wrapper as wrapper;


class cure_cluster:
    """!
    @brief Represents data cluster in CURE term. 
    @details CURE cluster is described by points of cluster, representation points of the cluster and by the cluster center.
    
    """
    
    def __init__(self, point, index):
        """!
        @brief Constructor of CURE cluster.
        
        @param[in] point (list): Point represented by list of coordinates.
        @param[in] index (uint): Index point in dataset.
        
        """
        
        ## List of points that make up cluster.
        self.points = [ ];
        
        ## Point indexes in dataset.
        self.indexes = -1;
        
        ## Mean of points that make up cluster.
        self.mean = None;
        
        ## List of points that represents clusters.
        self.rep = [ ];
        
        if (point is not None):
            self.points = [ point ];
            self.indexes = [ index ];
            self.mean = point;
            self.rep = [ point ];
        
        ## Pointer to the closest cluster.
        self.closest = None;
        
        ## Distance to the closest cluster.
        self.distance = float('inf');      # calculation of distance is really complexity operation (even square distance), so let's store distance to closest cluster.

    def __repr__(self):
        """!
        @brief Displays distance to closest cluster and points that are contained by current cluster.
        
        """
        return "%s, %s" % (self.distance, self.points);
        

class cure:
    """!
    @brief Class represents clustering algorithm CURE with KD-tree optimization.
    @details CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
    
    Example:
    @code
        # read data for clustering from some file
        sample = read_sample(path_to_data);
        
        # create instance of cure algorithm for cluster analysis
        # request for allocation of two clusters.
        cure_instance = cure(sample, 2, 5, 0.5, True);
        
        # run cluster analysis
        cure_instance.process();
        
        # get results of clustering
        clusters = cure_instance.get_clusters();
    @endcode
    
    """
    
    def __init__(self, data, number_cluster, number_represent_points = 5, compression = 0.5, ccore = True):
        """!
        @brief Constructor of clustering algorithm CURE.
        
        @param[in] data (array_like): Input data that should be processed.
        @param[in] number_cluster (uint): Number of clusters that should be allocated.
        @param[in] number_represent_points (uint): Number of representative points for each cluster.
        @param[in] compression (double): Coefficient defines level of shrinking of representation points toward the mean of the new created cluster after merging on each step. Usually it destributed from 0 to 1.
        @param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
        
        """
        
        self.__pointer_data = data
        
        self.__clusters = None
        self.__representors = None
        self.__means = None
        
        self.__number_cluster = number_cluster
        self.__number_represent_points = number_represent_points
        self.__compression = compression
        
        self.__ccore = ccore
        if self.__ccore:
            self.__ccore = ccore_library.workable()

        self.__validate_arguments()


    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of CURE algorithm.
        
        @remark Results of clustering can be obtained using corresponding get methods.
        
        @see get_clusters()
        
        """
        
        if (self.__ccore is True):
            cure_data_pointer = wrapper.cure_algorithm(self.__pointer_data, self.__number_cluster, self.__number_represent_points, self.__compression);
            
            self.__clusters = wrapper.cure_get_clusters(cure_data_pointer);
            self.__representors = wrapper.cure_get_representors(cure_data_pointer);
            self.__means = wrapper.cure_get_means(cure_data_pointer);
            
            wrapper.cure_data_destroy(cure_data_pointer);
            
        else:
            self.__create_queue()      # queue
            self.__create_kdtree()     # create k-d tree

            while (len(self.__queue) > self.__number_cluster):
                cluster1 = self.__queue[0];            # cluster that has nearest neighbor.
                cluster2 = cluster1.closest;    # closest cluster.
                
                #print("Merge decision: \n\t", cluster1, "\n\t", cluster2);
                
                self.__queue.remove(cluster1);
                self.__queue.remove(cluster2);
                
                self.__delete_represented_points(cluster1);
                self.__delete_represented_points(cluster2);
        
                merged_cluster = self.__merge_clusters(cluster1, cluster2);
        
                self.__insert_represented_points(merged_cluster);
                
                # Pointers to clusters that should be relocated is stored here.
                cluster_relocation_requests = [];
                
                # Check for the last cluster
                if (len(self.__queue) > 0):
                    merged_cluster.closest = self.__queue[0];  # arbitrary cluster from queue
                    merged_cluster.distance = self.__cluster_distance(merged_cluster, merged_cluster.closest);
                    
                    for item in self.__queue:
                        distance = self.__cluster_distance(merged_cluster, item);
                        # Check if distance between new cluster and current is the best than now.
                        if (distance < merged_cluster.distance):
                            merged_cluster.closest = item;
                            merged_cluster.distance = distance;
                        
                        # Check if current cluster has removed neighbor.
                        if ( (item.closest is cluster1) or (item.closest is cluster2) ):
                            # If previous distance was less then distance to new cluster then nearest cluster should be found in the tree.
                            #print("Update: ", item);
                            if (item.distance < distance):
                                (item.closest, item.distance) = self.__closest_cluster(item, distance);
                                
                                # TODO: investigation of root cause is required.
                                # Itself and merged cluster should be always in list of neighbors in line with specified radius.
                                # But merged cluster may not be in list due to error calculation, therefore it should be added
                                # manually.
                                if (item.closest is None):
                                    item.closest = merged_cluster;
                                    item.distance = distance;
                                
                            # Otherwise new cluster is nearest.
                            else:
                                item.closest = merged_cluster;
                                item.distance = distance;
                            
                            cluster_relocation_requests.append(item);
                        elif (item.distance > distance):
                            item.closest = merged_cluster;
                            item.distance = distance;
                            
                            cluster_relocation_requests.append(item);
                
                # New cluster and updated clusters should relocated in queue
                self.__insert_cluster(merged_cluster);
                for item in cluster_relocation_requests:
                    self.__relocate_cluster(item);
        
            # Change cluster representation
            self.__clusters = [ cure_cluster_unit.indexes for cure_cluster_unit in self.__queue ];
            self.__representors = [ cure_cluster_unit.rep for cure_cluster_unit in self.__queue ];
            self.__means = [ cure_cluster_unit.mean for cure_cluster_unit in self.__queue ];
    
    
    def get_clusters(self):
        """!
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
        
        @return (list) List of allocated clusters.
        
        @see process()
        @see get_representors()
        @see get_means()
        
        """
        
        return self.__clusters;
    
    
    def get_representors(self):
        """!
        @brief Returns list of point-representors of each cluster.
        @details Cluster index should be used for navigation between lists of point-representors.
        
        @return (list) List of point-representors of each cluster.
        
        @see get_clusters()
        @see get_means()
        
        """
        
        return self.__representors;
    
    
    def get_means(self):
        """!
        @brief Returns list of mean values of each cluster.
        @details Cluster index should be used for navigation between mean values.
        
        @return (list) List of mean values of each cluster.
        
        @see get_clusters()
        @see get_representors()
        
        """
        
        return self.__means;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __validate_arguments(self):
        """!
        @brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
                is thrown.

        """

        if len(self.__pointer_data) == 0:
            raise ValueError("Empty input data. Data should contain at least one point.")

        if self.__number_cluster <= 0:
            raise ValueError("Incorrect amount of clusters '%d'. Amount of cluster should be greater than 0." % self.__number_cluster)

        if self.__compression < 0:
            raise ValueError("Incorrect compression level '%f'. Compression should not be negative." % self.__compression)

        if self.__number_represent_points <= 0:
            raise ValueError("Incorrect amount of representatives '%d'. Amount of representatives should be greater than 0." % self.__number_cluster)


    def __insert_cluster(self, cluster):
        """!
        @brief Insert cluster to the list (sorted queue) in line with sequence order (distance).
        
        @param[in] cluster (cure_cluster): Cluster that should be inserted.
        
        """
        
        for index in range(len(self.__queue)):
            if (cluster.distance < self.__queue[index].distance):
                self.__queue.insert(index, cluster);
                return;
    
        self.__queue.append(cluster);


    def __relocate_cluster(self, cluster):
        """!
        @brief Relocate cluster in list in line with distance order.
        
        @param[in] cluster (cure_cluster): Cluster that should be relocated in line with order.
        
        """
        
        self.__queue.remove(cluster);
        self.__insert_cluster(cluster);


    def __closest_cluster(self, cluster, distance):
        """!
        @brief Find closest cluster to the specified cluster in line with distance.
        
        @param[in] cluster (cure_cluster): Cluster for which nearest cluster should be found.
        @param[in] distance (double): Closest distance to the previous cluster.
        
        @return (tuple) Pair (nearest CURE cluster, nearest distance) if the nearest cluster has been found, otherwise None is returned.
        
        """
        
        nearest_cluster = None;
        nearest_distance = float('inf');
        
        for point in cluster.rep:
            # Nearest nodes should be returned (at least it will return itself).
            nearest_nodes = self.__tree.find_nearest_dist_nodes(point, distance);
            for (candidate_distance, kdtree_node) in nearest_nodes:
                if ( (candidate_distance < nearest_distance) and (kdtree_node is not None) and (kdtree_node.payload is not cluster) ):
                    nearest_distance = candidate_distance;
                    nearest_cluster = kdtree_node.payload;
                    
        return (nearest_cluster, nearest_distance);


    def __insert_represented_points(self, cluster):
        """!
        @brief Insert representation points to the k-d tree.
        
        @param[in] cluster (cure_cluster): Cluster whose representation points should be inserted.
        
        """
        
        for point in cluster.rep:
            self.__tree.insert(point, cluster);


    def __delete_represented_points(self, cluster): 
        """!
        @brief Remove representation points of clusters from the k-d tree
        
        @param[in] cluster (cure_cluster): Cluster whose representation points should be removed.
        
        """
        
        for point in cluster.rep:
            self.__tree.remove(point, payload=cluster);


    def __merge_clusters(self, cluster1, cluster2):
        """!
        @brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
        
        @param[in] cluster1 (cure_cluster): Cluster that should be merged.
        @param[in] cluster2 (cure_cluster): Cluster that should be merged.
        
        @return (cure_cluster) New merged CURE cluster.
        
        """
        
        merged_cluster = cure_cluster(None, None);
        
        merged_cluster.points = cluster1.points + cluster2.points;
        merged_cluster.indexes = cluster1.indexes + cluster2.indexes;
        
        # merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
        dimension = len(cluster1.mean);
        merged_cluster.mean = [0] * dimension;
        if merged_cluster.points[1:] == merged_cluster.points[:-1]:
            merged_cluster.mean = merged_cluster.points[0]
        else:
            for index in range(dimension):
                merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
        
        temporary = list();
        
        for index in range(self.__number_represent_points):
            maximal_distance = 0;
            maximal_point = None;
            
            for point in merged_cluster.points:
                minimal_distance = 0;
                if (index == 0):
                    minimal_distance = euclidean_distance(point, merged_cluster.mean);
                    #minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
                else:
                    minimal_distance = min([euclidean_distance(point, p) for p in temporary]);
                    #minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
                    
                if (minimal_distance >= maximal_distance):
                    maximal_distance = minimal_distance;
                    maximal_point = point;
        
            if (maximal_point not in temporary):
                temporary.append(maximal_point);
                
        for point in temporary:
            representative_point = [0] * dimension;
            for index in range(dimension):
                representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index]);
                
            merged_cluster.rep.append(representative_point);
        
        return merged_cluster;


    def __create_queue(self):
        """!
        @brief Create queue of sorted clusters by distance between them, where first cluster has the nearest neighbor. At the first iteration each cluster contains only one point.
        
        @param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
        
        @return (list) Create queue of sorted clusters by distance between them.
        
        """
        
        self.__queue = [cure_cluster(self.__pointer_data[index_point], index_point) for index_point in range(len(self.__pointer_data))];
        
        # set closest clusters
        for i in range(0, len(self.__queue)):
            minimal_distance = float('inf');
            closest_index_cluster = -1;
            
            for k in range(0, len(self.__queue)):
                if (i != k):
                    dist = self.__cluster_distance(self.__queue[i], self.__queue[k]);
                    if (dist < minimal_distance):
                        minimal_distance = dist;
                        closest_index_cluster = k;
            
            self.__queue[i].closest = self.__queue[closest_index_cluster];
            self.__queue[i].distance = minimal_distance;
        
        # sort clusters
        self.__queue.sort(key = lambda x: x.distance, reverse = False);
    

    def __create_kdtree(self):
        """!
        @brief Create k-d tree in line with created clusters. At the first iteration contains all points from the input data set.
        
        @return (kdtree) k-d tree that consist of representative points of CURE clusters.
        
        """
        
        self.__tree = kdtree();
        for current_cluster in self.__queue:
            for representative_point in current_cluster.rep:
                self.__tree.insert(representative_point, current_cluster);


    def __cluster_distance(self, cluster1, cluster2):
        """!
        @brief Calculate minimal distance between clusters using representative points.
        
        @param[in] cluster1 (cure_cluster): The first cluster.
        @param[in] cluster2 (cure_cluster): The second cluster.
        
        @return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
        
        """
        
        distance = float('inf');
        for i in range(0, len(cluster1.rep)):
            for k in range(0, len(cluster2.rep)):
                #dist = euclidean_distance_sqrt(cluster1.rep[i], cluster2.rep[k]);   # Fast mode
                dist = euclidean_distance(cluster1.rep[i], cluster2.rep[k]);        # Slow mode
                if (dist < distance):
                    distance = dist;
                    
        return distance;


Push — master ( 74690e...60d876 )

cure.__delete_represented_points() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1			"""!
2
3			@brief Cluster analysis algorithm: CURE
4			@details Implementation based on paper @cite article::cure::1.
5
6			@authors Andrei Novikov ([email protected])
7			@date 2014-2018
8			@copyright GNU Public License
9
10			@cond GNU_PUBLIC_LICENSE
11			PyClustering is free software: you can redistribute it and/or modify
12			it under the terms of the GNU General Public License as published by
13			the Free Software Foundation, either version 3 of the License, or
14			(at your option) any later version.
15
16			PyClustering is distributed in the hope that it will be useful,
17			but WITHOUT ANY WARRANTY; without even the implied warranty of
18			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19			GNU General Public License for more details.
20
21			You should have received a copy of the GNU General Public License
22			along with this program. If not, see <http://www.gnu.org/licenses/>.
23			@endcond
24
25			"""
26
27
28			import numpy
			0 ignored issues – show Configuration introduced 2018-05-29 18:03 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history... Unused Code introduced 2018-05-29 18:03 UTC by Report Bug Copy Issue Report The import `numpy` seems to be unused. Loading history...
29
30			from pyclustering.cluster.encoder import type_encoding;
31
32			from pyclustering.utils import euclidean_distance;
33
34			from pyclustering.container.kdtree import kdtree;
35
36			from pyclustering.core.wrapper import ccore_library
37
38			import pyclustering.core.cure_wrapper as wrapper;
39
40
41			class cure_cluster:
42			"""!
43			@brief Represents data cluster in CURE term.
44			@details CURE cluster is described by points of cluster, representation points of the cluster and by the cluster center.
45
46			"""
47
48			def __init__(self, point, index):
49			"""!
50			@brief Constructor of CURE cluster.
51
52			@param[in] point (list): Point represented by list of coordinates.
53			@param[in] index (uint): Index point in dataset.
54
55			"""
56
57			## List of points that make up cluster.
58			self.points = [ ];
59
60			## Point indexes in dataset.
61			self.indexes = -1;
62
63			## Mean of points that make up cluster.
64			self.mean = None;
65
66			## List of points that represents clusters.
67			self.rep = [ ];
68
69			if (point is not None):
70			self.points = [ point ];
71			self.indexes = [ index ];
72			self.mean = point;
73			self.rep = [ point ];
74
75			## Pointer to the closest cluster.
76			self.closest = None;
77
78			## Distance to the closest cluster.
79			self.distance = float('inf'); # calculation of distance is really complexity operation (even square distance), so let's store distance to closest cluster.
80
81			def __repr__(self):
82			"""!
83			@brief Displays distance to closest cluster and points that are contained by current cluster.
84
85			"""
86			return "%s, %s" % (self.distance, self.points);
87
88
89			class cure:
90			"""!
91			@brief Class represents clustering algorithm CURE with KD-tree optimization.
92			@details CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
93
94			Example:
95			@code
96			# read data for clustering from some file
97			sample = read_sample(path_to_data);
98
99			# create instance of cure algorithm for cluster analysis
100			# request for allocation of two clusters.
101			cure_instance = cure(sample, 2, 5, 0.5, True);
102
103			# run cluster analysis
104			cure_instance.process();
105
106			# get results of clustering
107			clusters = cure_instance.get_clusters();
108			@endcode
109
110			"""
111
112			def __init__(self, data, number_cluster, number_represent_points = 5, compression = 0.5, ccore = True):
113			"""!
114			@brief Constructor of clustering algorithm CURE.
115
116			@param[in] data (array_like): Input data that should be processed.
117			@param[in] number_cluster (uint): Number of clusters that should be allocated.
118			@param[in] number_represent_points (uint): Number of representative points for each cluster.
119			@param[in] compression (double): Coefficient defines level of shrinking of representation points toward the mean of the new created cluster after merging on each step. Usually it destributed from 0 to 1.
120			@param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
121
122			"""
123
124			self.__pointer_data = data
125
126			self.__clusters = None
127			self.__representors = None
128			self.__means = None
129
130			self.__number_cluster = number_cluster
131			self.__number_represent_points = number_represent_points
132			self.__compression = compression
133
134			self.__ccore = ccore
135			if self.__ccore:
136			self.__ccore = ccore_library.workable()
137
138			self.__validate_arguments()
139
140
141			def process(self):
142			"""!
143			@brief Performs cluster analysis in line with rules of CURE algorithm.
144
145			@remark Results of clustering can be obtained using corresponding get methods.
146
147			@see get_clusters()
148
149			"""
150
151			if (self.__ccore is True):
152			cure_data_pointer = wrapper.cure_algorithm(self.__pointer_data, self.__number_cluster, self.__number_represent_points, self.__compression);
153
154			self.__clusters = wrapper.cure_get_clusters(cure_data_pointer);
155			self.__representors = wrapper.cure_get_representors(cure_data_pointer);
156			self.__means = wrapper.cure_get_means(cure_data_pointer);
157
158			wrapper.cure_data_destroy(cure_data_pointer);
159
160			else:
161			self.__create_queue() # queue
162			self.__create_kdtree() # create k-d tree
163
164			while (len(self.__queue) > self.__number_cluster):
165			cluster1 = self.__queue[0]; # cluster that has nearest neighbor.
166			cluster2 = cluster1.closest; # closest cluster.
167
168			#print("Merge decision: \n\t", cluster1, "\n\t", cluster2);
169
170			self.__queue.remove(cluster1);
171			self.__queue.remove(cluster2);
172
173			self.__delete_represented_points(cluster1);
174			self.__delete_represented_points(cluster2);
175
176			merged_cluster = self.__merge_clusters(cluster1, cluster2);
177
178			self.__insert_represented_points(merged_cluster);
179
180			# Pointers to clusters that should be relocated is stored here.
181			cluster_relocation_requests = [];
182
183			# Check for the last cluster
184			if (len(self.__queue) > 0):
185			merged_cluster.closest = self.__queue[0]; # arbitrary cluster from queue
186			merged_cluster.distance = self.__cluster_distance(merged_cluster, merged_cluster.closest);
187
188			for item in self.__queue:
189			distance = self.__cluster_distance(merged_cluster, item);
190			# Check if distance between new cluster and current is the best than now.
191			if (distance < merged_cluster.distance):
192			merged_cluster.closest = item;
193			merged_cluster.distance = distance;
194
195			# Check if current cluster has removed neighbor.
196			if ( (item.closest is cluster1) or (item.closest is cluster2) ):
197			# If previous distance was less then distance to new cluster then nearest cluster should be found in the tree.
198			#print("Update: ", item);
199			if (item.distance < distance):
200			(item.closest, item.distance) = self.__closest_cluster(item, distance);
201
202			# TODO: investigation of root cause is required.
203			# Itself and merged cluster should be always in list of neighbors in line with specified radius.
204			# But merged cluster may not be in list due to error calculation, therefore it should be added
205			# manually.
206			if (item.closest is None):
207			item.closest = merged_cluster;
208			item.distance = distance;
209
210			# Otherwise new cluster is nearest.
211			else:
212			item.closest = merged_cluster;
213			item.distance = distance;
214
215			cluster_relocation_requests.append(item);
216			elif (item.distance > distance):
217			item.closest = merged_cluster;
218			item.distance = distance;
219
220			cluster_relocation_requests.append(item);
221
222			# New cluster and updated clusters should relocated in queue
223			self.__insert_cluster(merged_cluster);
224			for item in cluster_relocation_requests:
225			self.__relocate_cluster(item);
226
227			# Change cluster representation
228			self.__clusters = [ cure_cluster_unit.indexes for cure_cluster_unit in self.__queue ];
229			self.__representors = [ cure_cluster_unit.rep for cure_cluster_unit in self.__queue ];
230			self.__means = [ cure_cluster_unit.mean for cure_cluster_unit in self.__queue ];
231
232
233			def get_clusters(self):
234			"""!
235			@brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
236
237			@return (list) List of allocated clusters.
238
239			@see process()
240			@see get_representors()
241			@see get_means()
242
243			"""
244
245			return self.__clusters;
246
247
248			def get_representors(self):
249			"""!
250			@brief Returns list of point-representors of each cluster.
251			@details Cluster index should be used for navigation between lists of point-representors.
252
253			@return (list) List of point-representors of each cluster.
254
255			@see get_clusters()
256			@see get_means()
257
258			"""
259
260			return self.__representors;
261
262
263			def get_means(self):
264			"""!
265			@brief Returns list of mean values of each cluster.
266			@details Cluster index should be used for navigation between mean values.
267
268			@return (list) List of mean values of each cluster.
269
270			@see get_clusters()
271			@see get_representors()
272
273			"""
274
275			return self.__means;
276
277
278			def get_cluster_encoding(self):
279			"""!
280			@brief Returns clustering result representation type that indicate how clusters are encoded.
281
282			@return (type_encoding) Clustering result representation.
283
284			@see get_clusters()
285
286			"""
287
288			return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
289
290
291			def __validate_arguments(self):
292			"""!
293			@brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
294			is thrown.
295
296			"""
297
298			if len(self.__pointer_data) == 0:
299			raise ValueError("Empty input data. Data should contain at least one point.")
300
301			if self.__number_cluster <= 0:
302			raise ValueError("Incorrect amount of clusters '%d'. Amount of cluster should be greater than 0." % self.__number_cluster)
303
304			if self.__compression < 0:
305			raise ValueError("Incorrect compression level '%f'. Compression should not be negative." % self.__compression)
306
307			if self.__number_represent_points <= 0:
308			raise ValueError("Incorrect amount of representatives '%d'. Amount of representatives should be greater than 0." % self.__number_cluster)
309
310
311			def __insert_cluster(self, cluster):
312			"""!
313			@brief Insert cluster to the list (sorted queue) in line with sequence order (distance).
314
315			@param[in] cluster (cure_cluster): Cluster that should be inserted.
316
317			"""
318
319			for index in range(len(self.__queue)):
320			if (cluster.distance < self.__queue[index].distance):
321			self.__queue.insert(index, cluster);
322			return;
323
324			self.__queue.append(cluster);
325
326
327			def __relocate_cluster(self, cluster):
328			"""!
329			@brief Relocate cluster in list in line with distance order.
330
331			@param[in] cluster (cure_cluster): Cluster that should be relocated in line with order.
332
333			"""
334
335			self.__queue.remove(cluster);
336			self.__insert_cluster(cluster);
337
338
339			def __closest_cluster(self, cluster, distance):
340			"""!
341			@brief Find closest cluster to the specified cluster in line with distance.
342
343			@param[in] cluster (cure_cluster): Cluster for which nearest cluster should be found.
344			@param[in] distance (double): Closest distance to the previous cluster.
345
346			@return (tuple) Pair (nearest CURE cluster, nearest distance) if the nearest cluster has been found, otherwise None is returned.
347
348			"""
349
350			nearest_cluster = None;
351			nearest_distance = float('inf');
352
353			for point in cluster.rep:
354			# Nearest nodes should be returned (at least it will return itself).
355			nearest_nodes = self.__tree.find_nearest_dist_nodes(point, distance);
356			for (candidate_distance, kdtree_node) in nearest_nodes:
357			if ( (candidate_distance < nearest_distance) and (kdtree_node is not None) and (kdtree_node.payload is not cluster) ):
358			nearest_distance = candidate_distance;
359			nearest_cluster = kdtree_node.payload;
360
361			return (nearest_cluster, nearest_distance);
362
363
364			def __insert_represented_points(self, cluster):
365			"""!
366			@brief Insert representation points to the k-d tree.
367
368			@param[in] cluster (cure_cluster): Cluster whose representation points should be inserted.
369
370			"""
371
372			for point in cluster.rep:
373			self.__tree.insert(point, cluster);
374
375
376			def __delete_represented_points(self, cluster):
377			"""!
378			@brief Remove representation points of clusters from the k-d tree
379
380			@param[in] cluster (cure_cluster): Cluster whose representation points should be removed.
381
382			"""
383
384			for point in cluster.rep:
385			self.__tree.remove(point, payload=cluster);
386
387
388			def __merge_clusters(self, cluster1, cluster2):
389			"""!
390			@brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
391
392			@param[in] cluster1 (cure_cluster): Cluster that should be merged.
393			@param[in] cluster2 (cure_cluster): Cluster that should be merged.
394
395			@return (cure_cluster) New merged CURE cluster.
396
397			"""
398
399			merged_cluster = cure_cluster(None, None);
400
401			merged_cluster.points = cluster1.points + cluster2.points;
402			merged_cluster.indexes = cluster1.indexes + cluster2.indexes;
403
404			# merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
405			dimension = len(cluster1.mean);
406			merged_cluster.mean = [0] * dimension;
407			if merged_cluster.points[1:] == merged_cluster.points[:-1]:
408			merged_cluster.mean = merged_cluster.points[0]
409			else:
410			for index in range(dimension):
411			merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
412
413			temporary = list();
414
415			for index in range(self.__number_represent_points):
416			maximal_distance = 0;
417			maximal_point = None;
418
419			for point in merged_cluster.points:
420			minimal_distance = 0;
421			if (index == 0):
422			minimal_distance = euclidean_distance(point, merged_cluster.mean);
423			#minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
424			else:
425			minimal_distance = min([euclidean_distance(point, p) for p in temporary]);
426			#minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
427
428			if (minimal_distance >= maximal_distance):
429			maximal_distance = minimal_distance;
430			maximal_point = point;
431
432			if (maximal_point not in temporary):
433			temporary.append(maximal_point);
434
435			for point in temporary:
436			representative_point = [0] * dimension;
437			for index in range(dimension):
438			representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index]);
439
440			merged_cluster.rep.append(representative_point);
441
442			return merged_cluster;
443
444
445			def __create_queue(self):
446			"""!
447			@brief Create queue of sorted clusters by distance between them, where first cluster has the nearest neighbor. At the first iteration each cluster contains only one point.
448
449			@param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
450
451			@return (list) Create queue of sorted clusters by distance between them.
452
453			"""
454
455			self.__queue = [cure_cluster(self.__pointer_data[index_point], index_point) for index_point in range(len(self.__pointer_data))];
456
457			# set closest clusters
458			for i in range(0, len(self.__queue)):
459			minimal_distance = float('inf');
460			closest_index_cluster = -1;
461
462			for k in range(0, len(self.__queue)):
463			if (i != k):
464			dist = self.__cluster_distance(self.__queue[i], self.__queue[k]);
465			if (dist < minimal_distance):
466			minimal_distance = dist;
467			closest_index_cluster = k;
468
469			self.__queue[i].closest = self.__queue[closest_index_cluster];
470			self.__queue[i].distance = minimal_distance;
471
472			# sort clusters
473			self.__queue.sort(key = lambda x: x.distance, reverse = False);
474
475
476			def __create_kdtree(self):
477			"""!
478			@brief Create k-d tree in line with created clusters. At the first iteration contains all points from the input data set.
479
480			@return (kdtree) k-d tree that consist of representative points of CURE clusters.
481
482			"""
483
484			self.__tree = kdtree();
485			for current_cluster in self.__queue:
486			for representative_point in current_cluster.rep:
487			self.__tree.insert(representative_point, current_cluster);
488
489
490			def __cluster_distance(self, cluster1, cluster2):
491			"""!
492			@brief Calculate minimal distance between clusters using representative points.
493
494			@param[in] cluster1 (cure_cluster): The first cluster.
495			@param[in] cluster2 (cure_cluster): The second cluster.
496
497			@return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
498
499			"""
500
501			distance = float('inf');
502			for i in range(0, len(cluster1.rep)):
503			for k in range(0, len(cluster2.rep)):
504			#dist = euclidean_distance_sqrt(cluster1.rep[i], cluster2.rep[k]); # Fast mode
505			dist = euclidean_distance(cluster1.rep[i], cluster2.rep[k]); # Slow mode
506			if (dist < distance):
507			distance = dist;
508
509			return distance;
510

annoviko / pyclustering

Push — master ( 74690e...60d876 )

cure.__delete_represented_points() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files