Code Duplication - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer


        @see get_clusters()
        
        """

        return self.__medians;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__medians))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;


            
            Kw = (1.0 - K / N) * sigma_sqrt;
            Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) * ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
            
            scores = sigma_sqrt * (2 * K)**0.5 * ((2 * K)**0.5 + betta) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N
        
        return scores;


    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting criterion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [float('inf')] * len(clusters)     # splitting criterion
        dimension = len(self.__pointer_data[0]);
          
        # estimation of the noise variance in the data set
        sigma_sqrt = 0.0;
        K = len(clusters);
        N = 0.0;
          
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]);

            N += len(clusters[index_cluster]);
      
        if (N - K > 0):
            sigma_sqrt /= (N - K);
            p = (K - 1) + dimension * K + 1;


                #changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]);        # Slow solution
                changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
                 
                self.__centers = updated_centers;


    def get_clusters(self):
        """!
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
        
        @see process()
        @see get_centers()
        
        """
        
        return self.__clusters;
    
    
    def get_centers(self):
        """!
        @brief Returns list of centers of allocated clusters.
        
        @see process()
        @see get_clusters()
        
        """

        return self.__centers;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.


        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))];
        for index_point in range(len(self.__pointer_data)):
            if (index_point in self.__medoid_indexes):
                continue;

            index_optim = -1;
            dist_optim = float('Inf');
            
            for index in range(len(self.__medoids)):
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
                
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
            

		@@ 129-155 (lines=27) @@
126		@see get_clusters()
127
128		"""
129
130		return self.__medians;
131
132
133		def get_cluster_encoding(self):
134		"""!
135		@brief Returns clustering result representation type that indicate how clusters are encoded.
136
137		@return (type_encoding) Clustering result representation.
138
139		@see get_clusters()
140
141		"""
142
143		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146		def __update_clusters(self):
147		"""!
148		@brief Calculate Manhattan distance to each point from the each cluster.
149		@details Nearest points are captured by according clusters and as a result clusters are updated.
150
151		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152
153		"""
154
155		clusters = [[] for i in range(len(self.__medians))];
156		for index_point in range(len(self.__pointer_data)):
157		index_optim = -1;
158		dist_optim = 0.0;

		@@ 361-394 (lines=34) @@
358
359		Kw = (1.0 - K / N) * sigma_sqrt;
360		Ks = ( 2.0 * alpha * sigma / (N ** 0.5) ) * ( (alpha ** 2.0) * sigma_sqrt / N + W - Kw / 2.0 ) ** 0.5;
361
362		scores = sigma_sqrt * (2 * K)*0.5 ((2 * K)*0.5 + betta) / N + W - sigma_sqrt + Ks + 2 alpha*0.5 sigma_sqrt / N
363
364		return scores;
365
366
367		def __bayesian_information_criterion(self, clusters, centers):
368		"""!
369		@brief Calculates splitting criterion for input clusters using bayesian information criterion.
370
371		@param[in] clusters (list): Clusters for which splitting criterion should be calculated.
372		@param[in] centers (list): Centers of the clusters.
373
374		@return (double) Splitting criterion in line with bayesian information criterion.
375		High value of splitting criterion means that current structure is much better.
376
377		@see __minimum_noiseless_description_length(clusters, centers)
378
379		"""
380
381		scores = [float('inf')] * len(clusters) # splitting criterion
382		dimension = len(self.__pointer_data[0]);
383
384		# estimation of the noise variance in the data set
385		sigma_sqrt = 0.0;
386		K = len(clusters);
387		N = 0.0;
388
389		for index_cluster in range(0, len(clusters), 1):
390		for index_object in clusters[index_cluster]:
391		sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]);
392
393		N += len(clusters[index_cluster]);
394
395		if (N - K > 0):
396		sigma_sqrt /= (N - K);
397		p = (K - 1) + dimension * K + 1;

		@@ 126-152 (lines=27) @@
123		#changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]); # Slow solution
124		changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(updated_centers))]); # Fast solution
125
126		self.__centers = updated_centers;
127
128
129		def get_clusters(self):
130		"""!
131		@brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
132
133		@see process()
134		@see get_centers()
135
136		"""
137
138		return self.__clusters;
139
140
141		def get_centers(self):
142		"""!
143		@brief Returns list of centers of allocated clusters.
144
145		@see process()
146		@see get_clusters()
147
148		"""
149
150		return self.__centers;
151
152
153		def get_cluster_encoding(self):
154		"""!
155		@brief Returns clustering result representation type that indicate how clusters are encoded.

		@@ 146-172 (lines=27) @@
143		@return (type_encoding) Clustering result representation.
144
145		@see get_clusters()
146
147		"""
148
149		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
150
151
152		def __update_clusters(self):
153		"""!
154		@brief Calculate distance to each point from the each cluster.
155		@details Nearest points are captured by according clusters and as a result clusters are updated.
156
157		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
158
159		"""
160
161		clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))];
162		for index_point in range(len(self.__pointer_data)):
163		if (index_point in self.__medoid_indexes):
164		continue;
165
166		index_optim = -1;
167		dist_optim = float('Inf');
168
169		for index in range(len(self.__medoids)):
170		dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
171
172		if ( (dist < dist_optim) or (index is 0)):
173		index_optim = index;
174		dist_optim = dist;
175

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location

annoviko / pyclustering

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location