Code Duplication - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 17-22 lines in 3 locations

pyclustering/cluster/xmeans.py 1 location


      
        if (N - K > 0):
            sigma_sqrt /= (N - K);
            p = (K - 1) + dimension * K + 1;
            
            # splitting criterion    
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster]);
                
                L = n * log(n) - n * log(N) - n * 0.5 * log(2.0 * numpy.pi) - n * dimension * 0.5 * log(sigma_sqrt) - (n - K) * 0.5;
                
                # BIC calculation
                scores[index_cluster] = L - p * 0.5 * log(N);
                
        return sum(scores);
 
 
    def __update_clusters(self, centers, available_indexes = None):
        """!
        @brief Calculates Euclidean distance to each point from the each cluster.
               Nearest points are captured by according clusters and as a result clusters are updated.
               
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
        
        @return (list) Updated clusters.
        
        """

pyclustering/cluster/kmeans.py 1 location



    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__centers))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;

pyclustering/cluster/kmedoids.py 1 location


        return clusters;
    
    
    def __update_medoids(self):
        """!
        @brief Find medoids of clusters in line with contained objects.
        
        @return (list) list of medoids for current number of clusters.
        
        """
         
        medoids = [[] for _ in range(len(self.__clusters))];
        medoid_indexes = [-1] * len(self.__clusters);
        
        for index in range(len(self.__clusters)):
            medoid_index = median(self.__pointer_data, self.__clusters[index]);
            medoids[index] = self.__pointer_data[medoid_index];
            medoid_indexes[index] = medoid_index;
             
        return medoids, medoid_indexes;

		@@ 397-418 (lines=22) @@
394
395		if (N - K > 0):
396		sigma_sqrt /= (N - K);
397		p = (K - 1) + dimension * K + 1;
398
399		# splitting criterion
400		for index_cluster in range(0, len(clusters), 1):
401		n = len(clusters[index_cluster]);
402
403		L = n * log(n) - n * log(N) - n * 0.5 * log(2.0 * numpy.pi) - n * dimension * 0.5 * log(sigma_sqrt) - (n - K) * 0.5;
404
405		# BIC calculation
406		scores[index_cluster] = L - p * 0.5 * log(N);
407
408		return sum(scores);
409
410
411		def __update_clusters(self, centers, available_indexes = None):
412		"""!
413		@brief Calculates Euclidean distance to each point from the each cluster.
414		Nearest points are captured by according clusters and as a result clusters are updated.
415
416		@param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
417		@param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
418
419		@return (list) Updated clusters.
420
421		"""

		@@ 155-173 (lines=19) @@
152
153		def get_cluster_encoding(self):
154		"""!
155		@brief Returns clustering result representation type that indicate how clusters are encoded.
156
157		@return (type_encoding) Clustering result representation.
158
159		@see get_clusters()
160
161		"""
162
163		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
164
165
166		def __update_clusters(self):
167		"""!
168		@brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
169
170		@return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
171
172		"""
173
174		clusters = [[] for i in range(len(self.__centers))];
175		for index_point in range(len(self.__pointer_data)):
176		index_optim = -1;

		@@ 178-194 (lines=17) @@
175		return clusters;
176
177
178		def __update_medoids(self):
179		"""!
180		@brief Find medoids of clusters in line with contained objects.
181
182		@return (list) list of medoids for current number of clusters.
183
184		"""
185
186		medoids = [[] for _ in range(len(self.__clusters))];
187		medoid_indexes = [-1] * len(self.__clusters);
188
189		for index in range(len(self.__clusters)):
190		medoid_index = median(self.__pointer_data, self.__clusters[index]);
191		medoids[index] = self.__pointer_data[medoid_index];
192		medoid_indexes[index] = medoid_index;
193
194		return medoids, medoid_indexes;

annoviko / pyclustering

Code Duplication Length = 17-22 lines in 3 locations

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location