Code Duplication    Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

@@ 129-155 (lines=27) @@
126
        @see get_clusters()
127
        
128
        """
129
130
        return self.__medians;
131
132
133
    def get_cluster_encoding(self):
134
        """!
135
        @brief Returns clustering result representation type that indicate how clusters are encoded.
136
        
137
        @return (type_encoding) Clustering result representation.
138
        
139
        @see get_clusters()
140
        
141
        """
142
        
143
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146
    def __update_clusters(self):
147
        """!
148
        @brief Calculate Manhattan distance to each point from the each cluster. 
149
        @details Nearest points are captured by according clusters and as a result clusters are updated.
150
        
151
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152
        
153
        """
154
        
155
        clusters = [[] for i in range(len(self.__medians))];
156
        for index_point in range(len(self.__pointer_data)):
157
            index_optim = -1;
158
            dist_optim = 0.0;

pyclustering/cluster/xmeans.py 1 location

@@ 361-394 (lines=34) @@
358
            
359
            scores = sigma_sqrt * (2 * K)**0.5 * ((2 * K)**0.5 + betta) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N
360
        
361
        return scores;
362
363
364
    def __bayesian_information_criterion(self, clusters, centers):
365
        """!
366
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
367
        
368
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
369
        @param[in] centers (list): Centers of the clusters.
370
        
371
        @return (double) Splitting criterion in line with bayesian information criterion.
372
                High value of splitting criterion means that current structure is much better.
373
                
374
        @see __minimum_noiseless_description_length(clusters, centers)
375
        
376
        """
377
378
        scores = [float('inf')] * len(clusters)     # splitting criterion
379
        dimension = len(self.__pointer_data[0]);
380
          
381
        # estimation of the noise variance in the data set
382
        sigma_sqrt = 0.0;
383
        K = len(clusters);
384
        N = 0.0;
385
          
386
        for index_cluster in range(0, len(clusters), 1):
387
            for index_object in clusters[index_cluster]:
388
                sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]);
389
390
            N += len(clusters[index_cluster]);
391
      
392
        if (N - K > 0):
393
            sigma_sqrt /= (N - K);
394
            p = (K - 1) + dimension * K + 1;
395
            
396
            # splitting criterion    
397
            for index_cluster in range(0, len(clusters), 1):

pyclustering/cluster/kmeans.py 1 location

@@ 126-152 (lines=27) @@
123
        @see get_clusters()
124
        
125
        """
126
127
        return self.__centers;
128
129
130
    def get_cluster_encoding(self):
131
        """!
132
        @brief Returns clustering result representation type that indicate how clusters are encoded.
133
        
134
        @return (type_encoding) Clustering result representation.
135
        
136
        @see get_clusters()
137
        
138
        """
139
        
140
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
141
142
143
    def __update_clusters(self):
144
        """!
145
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
146
        
147
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
148
        
149
        """
150
        
151
        clusters = [[] for i in range(len(self.__centers))];
152
        for index_point in range(len(self.__pointer_data)):
153
            index_optim = -1;
154
            dist_optim = 0.0;
155
             

pyclustering/cluster/kmedoids.py 1 location

@@ 123-149 (lines=27) @@
120
    
121
    def get_medoids(self):
122
        """!
123
        @brief Returns list of medoids of allocated clusters.
124
        
125
        @see process()
126
        @see get_clusters()
127
        
128
        """
129
130
        return self.__medoids;
131
132
133
    def get_cluster_encoding(self):
134
        """!
135
        @brief Returns clustering result representation type that indicate how clusters are encoded.
136
        
137
        @return (type_encoding) Clustering result representation.
138
        
139
        @see get_clusters()
140
        
141
        """
142
        
143
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146
    def __update_clusters(self):
147
        """!
148
        @brief Calculate distance to each point from the each cluster. 
149
        @details Nearest points are captured by according clusters and as a result clusters are updated.
150
        
151
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152