Code Duplication - annoviko/pyclustering - Measure and Improve Code Quality continuously with Scrutinizer


        @see get_clusters()
        
        """

        return self.__medians;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__medians))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;



        scores = [0.0] * len(clusters)     # splitting criterion
        dimension = len(self.__pointer_data[0]);
          
        # estimation of the noise variance in the data set
        sigma = 0.0;
        K = len(clusters);
        N = 0.0;
          
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma += (euclidean_distance(self.__pointer_data[index_object], centers[index_cluster]));  # It works

            N += len(clusters[index_cluster]);
      
        if (N - K != 0):
            sigma /= (N - K);
        
            # splitting criterion    
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster]);
                
                if (sigma > 0.0):
                    scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
                  
        return sum(scores);
 
 
    def __update_clusters(self, centers, available_indexes = None):
        """!
        @brief Calculates Euclidean distance to each point from the each cluster.
               Nearest points are captured by according clusters and as a result clusters are updated.
               
        @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
        @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
        
        @return (list) Updated clusters.
        
        """
            


        @see get_clusters()
        
        """

        return self.__centers;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[] for i in range(len(self.__centers))];
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1;
            dist_optim = 0.0;
             


    
    def get_medoids(self):
        """!
        @brief Returns list of medoids of allocated clusters.
        
        @see process()
        @see get_clusters()
        
        """

        return self.__medoids;


    def get_cluster_encoding(self):
        """!
        @brief Returns clustering result representation type that indicate how clusters are encoded.
        
        @return (type_encoding) Clustering result representation.
        
        @see get_clusters()
        
        """
        
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;


    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        

		@@ 129-155 (lines=27) @@
126		@see get_clusters()
127
128		"""
129
130		return self.__medians;
131
132
133		def get_cluster_encoding(self):
134		"""!
135		@brief Returns clustering result representation type that indicate how clusters are encoded.
136
137		@return (type_encoding) Clustering result representation.
138
139		@see get_clusters()
140
141		"""
142
143		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146		def __update_clusters(self):
147		"""!
148		@brief Calculate Manhattan distance to each point from the each cluster.
149		@details Nearest points are captured by according clusters and as a result clusters are updated.
150
151		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152
153		"""
154
155		clusters = [[] for i in range(len(self.__medians))];
156		for index_point in range(len(self.__pointer_data)):
157		index_optim = -1;
158		dist_optim = 0.0;

		@@ 361-394 (lines=34) @@
358
359		scores = [0.0] * len(clusters) # splitting criterion
360		dimension = len(self.__pointer_data[0]);
361
362		# estimation of the noise variance in the data set
363		sigma = 0.0;
364		K = len(clusters);
365		N = 0.0;
366
367		for index_cluster in range(0, len(clusters), 1):
368		for index_object in clusters[index_cluster]:
369		sigma += (euclidean_distance(self.__pointer_data[index_object], centers[index_cluster])); # It works
370
371		N += len(clusters[index_cluster]);
372
373		if (N - K != 0):
374		sigma /= (N - K);
375
376		# splitting criterion
377		for index_cluster in range(0, len(clusters), 1):
378		n = len(clusters[index_cluster]);
379
380		if (sigma > 0.0):
381		scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
382
383		return sum(scores);
384
385
386		def __update_clusters(self, centers, available_indexes = None):
387		"""!
388		@brief Calculates Euclidean distance to each point from the each cluster.
389		Nearest points are captured by according clusters and as a result clusters are updated.
390
391		@param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
392		@param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
393
394		@return (list) Updated clusters.
395
396		"""
397

		@@ 126-152 (lines=27) @@
123		@see get_clusters()
124
125		"""
126
127		return self.__centers;
128
129
130		def get_cluster_encoding(self):
131		"""!
132		@brief Returns clustering result representation type that indicate how clusters are encoded.
133
134		@return (type_encoding) Clustering result representation.
135
136		@see get_clusters()
137
138		"""
139
140		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
141
142
143		def __update_clusters(self):
144		"""!
145		@brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
146
147		@return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
148
149		"""
150
151		clusters = [[] for i in range(len(self.__centers))];
152		for index_point in range(len(self.__pointer_data)):
153		index_optim = -1;
154		dist_optim = 0.0;
155

		@@ 123-149 (lines=27) @@
120
121		def get_medoids(self):
122		"""!
123		@brief Returns list of medoids of allocated clusters.
124
125		@see process()
126		@see get_clusters()
127
128		"""
129
130		return self.__medoids;
131
132
133		def get_cluster_encoding(self):
134		"""!
135		@brief Returns clustering result representation type that indicate how clusters are encoded.
136
137		@return (type_encoding) Clustering result representation.
138
139		@see get_clusters()
140
141		"""
142
143		return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
144
145
146		def __update_clusters(self):
147		"""!
148		@brief Calculate distance to each point from the each cluster.
149		@details Nearest points are captured by according clusters and as a result clusters are updated.
150
151		@return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
152

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location

annoviko / pyclustering

Code Duplication Length = 27-34 lines in 4 locations

pyclustering/cluster/kmedians.py 1 location

pyclustering/cluster/xmeans.py 1 location

pyclustering/cluster/kmeans.py 1 location

pyclustering/cluster/kmedoids.py 1 location