@@ 129-155 (lines=27) @@ | ||
126 | @see get_clusters() |
|
127 | ||
128 | """ |
|
129 | ||
130 | return self.__medians; |
|
131 | ||
132 | ||
133 | def get_cluster_encoding(self): |
|
134 | """! |
|
135 | @brief Returns clustering result representation type that indicate how clusters are encoded. |
|
136 | ||
137 | @return (type_encoding) Clustering result representation. |
|
138 | ||
139 | @see get_clusters() |
|
140 | ||
141 | """ |
|
142 | ||
143 | return type_encoding.CLUSTER_INDEX_LIST_SEPARATION; |
|
144 | ||
145 | ||
146 | def __update_clusters(self): |
|
147 | """! |
|
148 | @brief Calculate Manhattan distance to each point from the each cluster. |
|
149 | @details Nearest points are captured by according clusters and as a result clusters are updated. |
|
150 | ||
151 | @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. |
|
152 | ||
153 | """ |
|
154 | ||
155 | clusters = [[] for i in range(len(self.__medians))]; |
|
156 | for index_point in range(len(self.__pointer_data)): |
|
157 | index_optim = -1; |
|
158 | dist_optim = 0.0; |
@@ 361-394 (lines=34) @@ | ||
358 | ||
359 | scores = sigma_sqrt * (2 * K)**0.5 * ((2 * K)**0.5 + betta) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N |
|
360 | ||
361 | return scores; |
|
362 | ||
363 | ||
364 | def __bayesian_information_criterion(self, clusters, centers): |
|
365 | """! |
|
366 | @brief Calculates splitting criterion for input clusters using bayesian information criterion. |
|
367 | ||
368 | @param[in] clusters (list): Clusters for which splitting criterion should be calculated. |
|
369 | @param[in] centers (list): Centers of the clusters. |
|
370 | ||
371 | @return (double) Splitting criterion in line with bayesian information criterion. |
|
372 | High value of splitting criterion means that current structure is much better. |
|
373 | ||
374 | @see __minimum_noiseless_description_length(clusters, centers) |
|
375 | ||
376 | """ |
|
377 | ||
378 | scores = [float('inf')] * len(clusters) # splitting criterion |
|
379 | dimension = len(self.__pointer_data[0]); |
|
380 | ||
381 | # estimation of the noise variance in the data set |
|
382 | sigma_sqrt = 0.0; |
|
383 | K = len(clusters); |
|
384 | N = 0.0; |
|
385 | ||
386 | for index_cluster in range(0, len(clusters), 1): |
|
387 | for index_object in clusters[index_cluster]: |
|
388 | sigma_sqrt += euclidean_distance_sqrt(self.__pointer_data[index_object], centers[index_cluster]); |
|
389 | ||
390 | N += len(clusters[index_cluster]); |
|
391 | ||
392 | if (N - K > 0): |
|
393 | sigma_sqrt /= (N - K); |
|
394 | p = (K - 1) + dimension * K + 1; |
|
395 | ||
396 | # splitting criterion |
|
397 | for index_cluster in range(0, len(clusters), 1): |
@@ 126-152 (lines=27) @@ | ||
123 | @see get_clusters() |
|
124 | ||
125 | """ |
|
126 | ||
127 | return self.__centers; |
|
128 | ||
129 | ||
130 | def get_cluster_encoding(self): |
|
131 | """! |
|
132 | @brief Returns clustering result representation type that indicate how clusters are encoded. |
|
133 | ||
134 | @return (type_encoding) Clustering result representation. |
|
135 | ||
136 | @see get_clusters() |
|
137 | ||
138 | """ |
|
139 | ||
140 | return type_encoding.CLUSTER_INDEX_LIST_SEPARATION; |
|
141 | ||
142 | ||
143 | def __update_clusters(self): |
|
144 | """! |
|
145 | @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. |
|
146 | ||
147 | @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. |
|
148 | ||
149 | """ |
|
150 | ||
151 | clusters = [[] for i in range(len(self.__centers))]; |
|
152 | for index_point in range(len(self.__pointer_data)): |
|
153 | index_optim = -1; |
|
154 | dist_optim = 0.0; |
|
155 |
@@ 123-149 (lines=27) @@ | ||
120 | ||
121 | def get_medoids(self): |
|
122 | """! |
|
123 | @brief Returns list of medoids of allocated clusters. |
|
124 | ||
125 | @see process() |
|
126 | @see get_clusters() |
|
127 | ||
128 | """ |
|
129 | ||
130 | return self.__medoids; |
|
131 | ||
132 | ||
133 | def get_cluster_encoding(self): |
|
134 | """! |
|
135 | @brief Returns clustering result representation type that indicate how clusters are encoded. |
|
136 | ||
137 | @return (type_encoding) Clustering result representation. |
|
138 | ||
139 | @see get_clusters() |
|
140 | ||
141 | """ |
|
142 | ||
143 | return type_encoding.CLUSTER_INDEX_LIST_SEPARATION; |
|
144 | ||
145 | ||
146 | def __update_clusters(self): |
|
147 | """! |
|
148 | @brief Calculate distance to each point from the each cluster. |
|
149 | @details Nearest points are captured by according clusters and as a result clusters are updated. |
|
150 | ||
151 | @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data. |
|
152 |