Code Duplication - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer


        return self.__medians;


    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__medians))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;
             
            for index in range(len(self.__medians)):
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]);
                 
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
             
            clusters[index_optim].append(index_point);
            
        # If cluster is not able to capture object it should be removed
        clusters = [cluster for cluster in clusters if len(cluster) > 0];
        
        return clusters;
    
    
    def __update_medians(self):


            sigma /= (N - K);
        
            # splitting criterion    
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster]);
                
                if (sigma > 0.0):
                    scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
                  
        return sum(scores);
 
 
    def __update_clusters(self, centers, available_indexes = None):
        """!
        @brief Calculates Euclidean distance to each point from the each cluster.
               Nearest points are captured by according clusters and as a result clusters are updated.
               
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
        
        @return (list) Updated clusters.
        
        """
            
        bypass = None;
        if (available_indexes is None):
            bypass = range(len(self.__pointer_data));
        else:
            bypass = available_indexes;
          
        clusters = [[] for i in range(len(centers))];
        for index_point in bypass:
            index_optim = -1;
            dist_optim = 0.0;
              
            for index in range(len(centers)):
                # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]);      # Fast solution
                  
                if ( (dist < dist_optim) or (index is 0)):


        return self.__centers;


    def __update_clusters(self):
        """!
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__centers))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;
             
            for index in range(len(self.__centers)):
                # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]);      # Fast solution
                 
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
             
            clusters[index_optim].append(index_point);
        
        # If cluster is not able to capture object it should be removed
        clusters = [cluster for cluster in clusters if len(cluster) > 0];
        
        return clusters;
    
    
    def __update_centers(self):


        """!
        @brief Returns list of medoids of allocated clusters.
        
        @see process()
        @see get_clusters()
        
        """

        return self.__medoids;


    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__medoids))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;
             
            for index in range(len(self.__medoids)):
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
                 
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
             
            clusters[index_optim].append(index_point);

		@@ 129-155 (lines=27) @@
126		return self.__medians;
127
128
129		def __update_clusters(self):
130		"""!
131		@brief Calculate Manhattan distance to each point from the each cluster.
132		@details Nearest points are captured by according clusters and as a result clusters are updated.
133
134		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
135
136		"""
137
138		clusters = [[] for i in range(len(self.__medians))];
139		for index_point in range(len(self.__pointer_data)):
140		index_optim = -1;
141		dist_optim = 0.0;
142
143		for index in range(len(self.__medians)):
144		dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]);
145
146		if ( (dist < dist_optim) or (index is 0)):
147		index_optim = index;
148		dist_optim = dist;
149
150		clusters[index_optim].append(index_point);
151
152		# If cluster is not able to capture object it should be removed
153		clusters = [cluster for cluster in clusters if len(cluster) > 0];
154
155		return clusters;
156
157
158		def __update_medians(self):

		@@ 361-394 (lines=34) @@
358		sigma /= (N - K);
359
360		# splitting criterion
361		for index_cluster in range(0, len(clusters), 1):
362		n = len(clusters[index_cluster]);
363
364		if (sigma > 0.0):
365		scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
366
367		return sum(scores);
368
369
370		def __update_clusters(self, centers, available_indexes = None):
371		"""!
372		@brief Calculates Euclidean distance to each point from the each cluster.
373		Nearest points are captured by according clusters and as a result clusters are updated.
374
375		@param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
376		@param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
377
378		@return (list) Updated clusters.
379
380		"""
381
382		bypass = None;
383		if (available_indexes is None):
384		bypass = range(len(self.__pointer_data));
385		else:
386		bypass = available_indexes;
387
388		clusters = [[] for i in range(len(centers))];
389		for index_point in bypass:
390		index_optim = -1;
391		dist_optim = 0.0;
392
393		for index in range(len(centers)):
394		# dist = euclidean_distance(data[index_point], centers[index]); # Slow solution
395		dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]); # Fast solution
396
397		if ( (dist < dist_optim) or (index is 0)):

		@@ 126-152 (lines=27) @@
123		return self.__centers;
124
125
126		def __update_clusters(self):
127		"""!
128		@brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
129
130		@return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
131
132		"""
133
134		clusters = [[] for i in range(len(self.__centers))];
135		for index_point in range(len(self.__pointer_data)):
136		index_optim = -1;
137		dist_optim = 0.0;
138
139		for index in range(len(self.__centers)):
140		# dist = euclidean_distance(data[index_point], centers[index]); # Slow solution
141		dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]); # Fast solution
142
143		if ( (dist < dist_optim) or (index is 0)):
144		index_optim = index;
145		dist_optim = dist;
146
147		clusters[index_optim].append(index_point);
148
149		# If cluster is not able to capture object it should be removed
150		clusters = [cluster for cluster in clusters if len(cluster) > 0];
151
152		return clusters;
153
154
155		def __update_centers(self):

		@@ 123-149 (lines=27) @@
120		"""!
121		@brief Returns list of medoids of allocated clusters.
122
123		@see process()
124		@see get_clusters()
125
126		"""
127
128		return self.__medoids;
129
130
131		def __update_clusters(self):
132		"""!
133		@brief Calculate distance to each point from the each cluster.
134		@details Nearest points are captured by according clusters and as a result clusters are updated.
135
136		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
137
138		"""
139
140		clusters = [[] for i in range(len(self.__medoids))];
141		for index_point in range(len(self.__pointer_data)):
142		index_optim = -1;
143		dist_optim = 0.0;
144
145		for index in range(len(self.__medoids)):
146		dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
147
148		if ( (dist < dist_optim) or (index is 0)):
149		index_optim = index;
150		dist_optim = dist;
151
152		clusters[index_optim].append(index_point);

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location

annoviko / pyclustering

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location