Code Duplication - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer


        @see get_clusters()
        
        """

        return self.__medians;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__medians))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;


            
            scores = sigma_sqrt * (2 * K)**0.5 * ((2 * K)**0.5 + betta) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N
        
        return scores;


    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting criterion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [float('inf')] * len(clusters)     # splitting criterion
        dimension = len(self.__pointer_data[0]);
          
        # estimation of the noise variance in the data set
        sigma_sqrt = 0.0;
        K = len(clusters);
        N = 0.0;
          
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]);

            N += len(clusters[index_cluster]);
      
        if (N - K > 0):
            sigma_sqrt /= (N - K);
            p = (K - 1) + dimension * K + 1;
            
            # splitting criterion    
            for index_cluster in range(0, len(clusters), 1):


        @see get_clusters()
        
        """

        return self.__centers;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__centers))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;
             


    
    def get_medoids(self):
        """!
        @brief Returns list of medoids of allocated clusters.
        
        @see process()
        @see get_clusters()
        
        """

        return self.__medoids;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        

		@@ 129-155 (lines=27) @@
126		@see get_clusters()
127
128		"""
129
130		return self.__medians;
131
132
133		def get_cluster_encoding(self):
134		"""!
135		@brief Returns clustering result representation type that indicate how clusters are encoded.
136
137		@return (type_encoding) Clustering result representation.
138
139		@see get_clusters()
140
141		"""
142
143		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146		def __update_clusters(self):
147		"""!
148		@brief Calculate Manhattan distance to each point from the each cluster.
149		@details Nearest points are captured by according clusters and as a result clusters are updated.
150
151		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152
153		"""
154
155		clusters = [[] for i in range(len(self.__medians))];
156		for index_point in range(len(self.__pointer_data)):
157		index_optim = -1;
158		dist_optim = 0.0;

		@@ 361-394 (lines=34) @@
358
359		scores = sigma_sqrt * (2 * K)*0.5 ((2 * K)*0.5 + betta) / N + W - sigma_sqrt + Ks + 2 alpha*0.5 sigma_sqrt / N
360
361		return scores;
362
363
364		def __bayesian_information_criterion(self, clusters, centers):
365		"""!
366		@brief Calculates splitting criterion for input clusters using bayesian information criterion.
367
368		@param[in] clusters (list): Clusters for which splitting criterion should be calculated.
369		@param[in] centers (list): Centers of the clusters.
370
371		@return (double) Splitting criterion in line with bayesian information criterion.
372		High value of splitting criterion means that current structure is much better.
373
374		@see __minimum_noiseless_description_length(clusters, centers)
375
376		"""
377
378		scores = [float('inf')] * len(clusters) # splitting criterion
379		dimension = len(self.__pointer_data[0]);
380
381		# estimation of the noise variance in the data set
382		sigma_sqrt = 0.0;
383		K = len(clusters);
384		N = 0.0;
385
386		for index_cluster in range(0, len(clusters), 1):
387		for index_object in clusters[index_cluster]:
388		sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]);
389
390		N += len(clusters[index_cluster]);
391
392		if (N - K > 0):
393		sigma_sqrt /= (N - K);
394		p = (K - 1) + dimension * K + 1;
395
396		# splitting criterion
397		for index_cluster in range(0, len(clusters), 1):

		@@ 126-152 (lines=27) @@
123		@see get_clusters()
124
125		"""
126
127		return self.__centers;
128
129
130		def get_cluster_encoding(self):
131		"""!
132		@brief Returns clustering result representation type that indicate how clusters are encoded.
133
134		@return (type_encoding) Clustering result representation.
135
136		@see get_clusters()
137
138		"""
139
140		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
141
142
143		def __update_clusters(self):
144		"""!
145		@brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
146
147		@return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
148
149		"""
150
151		clusters = [[] for i in range(len(self.__centers))];
152		for index_point in range(len(self.__pointer_data)):
153		index_optim = -1;
154		dist_optim = 0.0;
155

		@@ 123-149 (lines=27) @@
120
121		def get_medoids(self):
122		"""!
123		@brief Returns list of medoids of allocated clusters.
124
125		@see process()
126		@see get_clusters()
127
128		"""
129
130		return self.__medoids;
131
132
133		def get_cluster_encoding(self):
134		"""!
135		@brief Returns clustering result representation type that indicate how clusters are encoded.
136
137		@return (type_encoding) Clustering result representation.
138
139		@see get_clusters()
140
141		"""
142
143		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146		def __update_clusters(self):
147		"""!
148		@brief Calculate distance to each point from the each cluster.
149		@details Nearest points are captured by according clusters and as a result clusters are updated.
150
151		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location

annoviko / pyclustering

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location