Code Duplication    Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

@@ 129-155 (lines=27) @@
126
        @see get_clusters()
127
        
128
        """
129
130
        return self.__medians;
131
132
133
    def get_cluster_encoding(self):
134
        """!
135
        @brief Returns clustering result representation type that indicate how clusters are encoded.
136
        
137
        @return (type_encoding) Clustering result representation.
138
        
139
        @see get_clusters()
140
        
141
        """
142
        
143
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146
    def __update_clusters(self):
147
        """!
148
        @brief Calculate Manhattan distance to each point from the each cluster. 
149
        @details Nearest points are captured by according clusters and as a result clusters are updated.
150
        
151
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152
        
153
        """
154
        
155
        clusters = [[] for i in range(len(self.__medians))];
156
        for index_point in range(len(self.__pointer_data)):
157
            index_optim = -1;
158
            dist_optim = 0.0;

pyclustering/cluster/xmeans.py 1 location

@@ 361-394 (lines=34) @@
358
359
        scores = [0.0] * len(clusters)     # splitting criterion
360
        dimension = len(self.__pointer_data[0]);
361
          
362
        # estimation of the noise variance in the data set
363
        sigma = 0.0;
364
        K = len(clusters);
365
        N = 0.0;
366
          
367
        for index_cluster in range(0, len(clusters), 1):
368
            for index_object in clusters[index_cluster]:
369
                sigma += (euclidean_distance(self.__pointer_data[index_object], centers[index_cluster]));  # It works
370
371
            N += len(clusters[index_cluster]);
372
      
373
        if (N - K != 0):
374
            sigma /= (N - K);
375
        
376
            # splitting criterion    
377
            for index_cluster in range(0, len(clusters), 1):
378
                n = len(clusters[index_cluster]);
379
                
380
                if (sigma > 0.0):
381
                    scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
382
                  
383
        return sum(scores);
384
 
385
 
386
    def __update_clusters(self, centers, available_indexes = None):
387
        """!
388
        @brief Calculates Euclidean distance to each point from the each cluster.
389
               Nearest points are captured by according clusters and as a result clusters are updated.
390
               
391
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
392
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
393
        
394
        @return (list) Updated clusters.
395
        
396
        """
397
            

pyclustering/cluster/kmeans.py 1 location

@@ 126-152 (lines=27) @@
123
        @see get_clusters()
124
        
125
        """
126
127
        return self.__centers;
128
129
130
    def get_cluster_encoding(self):
131
        """!
132
        @brief Returns clustering result representation type that indicate how clusters are encoded.
133
        
134
        @return (type_encoding) Clustering result representation.
135
        
136
        @see get_clusters()
137
        
138
        """
139
        
140
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
141
142
143
    def __update_clusters(self):
144
        """!
145
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
146
        
147
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
148
        
149
        """
150
        
151
        clusters = [[] for i in range(len(self.__centers))];
152
        for index_point in range(len(self.__pointer_data)):
153
            index_optim = -1;
154
            dist_optim = 0.0;
155
             

pyclustering/cluster/kmedoids.py 1 location

@@ 123-149 (lines=27) @@
120
    
121
    def get_medoids(self):
122
        """!
123
        @brief Returns list of medoids of allocated clusters.
124
        
125
        @see process()
126
        @see get_clusters()
127
        
128
        """
129
130
        return self.__medoids;
131
132
133
    def get_cluster_encoding(self):
134
        """!
135
        @brief Returns clustering result representation type that indicate how clusters are encoded.
136
        
137
        @return (type_encoding) Clustering result representation.
138
        
139
        @see get_clusters()
140
        
141
        """
142
        
143
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146
    def __update_clusters(self):
147
        """!
148
        @brief Calculate distance to each point from the each cluster. 
149
        @details Nearest points are captured by according clusters and as a result clusters are updated.
150
        
151
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152