Code Duplication    Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

@@ 129-155 (lines=27) @@
126
        return self.__medians;
127
128
129
    def __update_clusters(self):
130
        """!
131
        @brief Calculate Manhattan distance to each point from the each cluster. 
132
        @details Nearest points are captured by according clusters and as a result clusters are updated.
133
        
134
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
135
        
136
        """
137
        
138
        clusters = [[] for i in range(len(self.__medians))];
139
        for index_point in range(len(self.__pointer_data)):
140
            index_optim = -1;
141
            dist_optim = 0.0;
142
             
143
            for index in range(len(self.__medians)):
144
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]);
145
                 
146
                if ( (dist < dist_optim) or (index is 0)):
147
                    index_optim = index;
148
                    dist_optim = dist;
149
             
150
            clusters[index_optim].append(index_point);
151
            
152
        # If cluster is not able to capture object it should be removed
153
        clusters = [cluster for cluster in clusters if len(cluster) > 0];
154
        
155
        return clusters;
156
    
157
    
158
    def __update_medians(self):

pyclustering/cluster/xmeans.py 1 location

@@ 361-394 (lines=34) @@
358
            sigma /= (N - K);
359
        
360
            # splitting criterion    
361
            for index_cluster in range(0, len(clusters), 1):
362
                n = len(clusters[index_cluster]);
363
                
364
                if (sigma > 0.0):
365
                    scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
366
                  
367
        return sum(scores);
368
 
369
 
370
    def __update_clusters(self, centers, available_indexes = None):
371
        """!
372
        @brief Calculates Euclidean distance to each point from the each cluster.
373
               Nearest points are captured by according clusters and as a result clusters are updated.
374
               
375
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
376
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
377
        
378
        @return (list) Updated clusters.
379
        
380
        """
381
            
382
        bypass = None;
383
        if (available_indexes is None):
384
            bypass = range(len(self.__pointer_data));
385
        else:
386
            bypass = available_indexes;
387
          
388
        clusters = [[] for i in range(len(centers))];
389
        for index_point in bypass:
390
            index_optim = -1;
391
            dist_optim = 0.0;
392
              
393
            for index in range(len(centers)):
394
                # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
395
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]);      # Fast solution
396
                  
397
                if ( (dist < dist_optim) or (index is 0)):

pyclustering/cluster/kmeans.py 1 location

@@ 126-152 (lines=27) @@
123
        return self.__centers;
124
125
126
    def __update_clusters(self):
127
        """!
128
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
129
        
130
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
131
        
132
        """
133
        
134
        clusters = [[] for i in range(len(self.__centers))];
135
        for index_point in range(len(self.__pointer_data)):
136
            index_optim = -1;
137
            dist_optim = 0.0;
138
             
139
            for index in range(len(self.__centers)):
140
                # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
141
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]);      # Fast solution
142
                 
143
                if ( (dist < dist_optim) or (index is 0)):
144
                    index_optim = index;
145
                    dist_optim = dist;
146
             
147
            clusters[index_optim].append(index_point);
148
        
149
        # If cluster is not able to capture object it should be removed
150
        clusters = [cluster for cluster in clusters if len(cluster) > 0];
151
        
152
        return clusters;
153
    
154
    
155
    def __update_centers(self):

pyclustering/cluster/kmedoids.py 1 location

@@ 123-149 (lines=27) @@
120
        """!
121
        @brief Returns list of medoids of allocated clusters.
122
        
123
        @see process()
124
        @see get_clusters()
125
        
126
        """
127
128
        return self.__medoids;
129
130
131
    def __update_clusters(self):
132
        """!
133
        @brief Calculate distance to each point from the each cluster. 
134
        @details Nearest points are captured by according clusters and as a result clusters are updated.
135
        
136
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
137
        
138
        """
139
        
140
        clusters = [[] for i in range(len(self.__medoids))];
141
        for index_point in range(len(self.__pointer_data)):
142
            index_optim = -1;
143
            dist_optim = 0.0;
144
             
145
            for index in range(len(self.__medoids)):
146
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
147
                 
148
                if ( (dist < dist_optim) or (index is 0)):
149
                    index_optim = index;
150
                    dist_optim = dist;
151
             
152
            clusters[index_optim].append(index_point);