Code Duplication    Length = 17-22 lines in 3 locations

pyclustering/cluster/xmeans.py 1 location

@@ 397-418 (lines=22) @@
394
      
395
        if (N - K > 0):
396
            sigma_sqrt /= (N - K);
397
            p = (K - 1) + dimension * K + 1;
398
            
399
            # splitting criterion    
400
            for index_cluster in range(0, len(clusters), 1):
401
                n = len(clusters[index_cluster]);
402
                
403
                L = n * log(n) - n * log(N) - n * 0.5 * log(2.0 * numpy.pi) - n * dimension * 0.5 * log(sigma_sqrt) - (n - K) * 0.5;
404
                
405
                # BIC calculation
406
                scores[index_cluster] = L - p * 0.5 * log(N);
407
                
408
        return sum(scores);
409
 
410
 
411
    def __update_clusters(self, centers, available_indexes = None):
412
        """!
413
        @brief Calculates Euclidean distance to each point from the each cluster.
414
               Nearest points are captured by according clusters and as a result clusters are updated.
415
               
416
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
417
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
418
        
419
        @return (list) Updated clusters.
420
        
421
        """

pyclustering/cluster/kmeans.py 1 location

@@ 155-173 (lines=19) @@
152
153
    def get_cluster_encoding(self):
154
        """!
155
        @brief Returns clustering result representation type that indicate how clusters are encoded.
156
        
157
        @return (type_encoding) Clustering result representation.
158
        
159
        @see get_clusters()
160
        
161
        """
162
        
163
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
164
165
166
    def __update_clusters(self):
167
        """!
168
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
169
        
170
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
171
        
172
        """
173
        
174
        clusters = [[] for i in range(len(self.__centers))];
175
        for index_point in range(len(self.__pointer_data)):
176
            index_optim = -1;

pyclustering/cluster/kmedoids.py 1 location

@@ 178-194 (lines=17) @@
175
        return clusters;
176
    
177
    
178
    def __update_medoids(self):
179
        """!
180
        @brief Find medoids of clusters in line with contained objects.
181
        
182
        @return (list) list of medoids for current number of clusters.
183
        
184
        """
185
         
186
        medoids = [[] for _ in range(len(self.__clusters))];
187
        medoid_indexes = [-1] * len(self.__clusters);
188
        
189
        for index in range(len(self.__clusters)):
190
            medoid_index = median(self.__pointer_data, self.__clusters[index]);
191
            medoids[index] = self.__pointer_data[medoid_index];
192
            medoid_indexes[index] = medoid_index;
193
             
194
        return medoids, medoid_indexes;