| @@ 397-418 (lines=22) @@ | ||
| 394 | ||
| 395 | if (N - K > 0): |
|
| 396 | sigma_sqrt /= (N - K); |
|
| 397 | p = (K - 1) + dimension * K + 1; |
|
| 398 | ||
| 399 | # splitting criterion |
|
| 400 | for index_cluster in range(0, len(clusters), 1): |
|
| 401 | n = len(clusters[index_cluster]); |
|
| 402 | ||
| 403 | L = n * log(n) - n * log(N) - n * 0.5 * log(2.0 * numpy.pi) - n * dimension * 0.5 * log(sigma_sqrt) - (n - K) * 0.5; |
|
| 404 | ||
| 405 | # BIC calculation |
|
| 406 | scores[index_cluster] = L - p * 0.5 * log(N); |
|
| 407 | ||
| 408 | return sum(scores); |
|
| 409 | ||
| 410 | ||
| 411 | def __update_clusters(self, centers, available_indexes = None): |
|
| 412 | """! |
|
| 413 | @brief Calculates Euclidean distance to each point from the each cluster. |
|
| 414 | Nearest points are captured by according clusters and as a result clusters are updated. |
|
| 415 | ||
| 416 | @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...]. |
|
| 417 | @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used. |
|
| 418 | ||
| 419 | @return (list) Updated clusters. |
|
| 420 | ||
| 421 | """ |
|
| @@ 155-173 (lines=19) @@ | ||
| 152 | ||
| 153 | def get_cluster_encoding(self): |
|
| 154 | """! |
|
| 155 | @brief Returns clustering result representation type that indicate how clusters are encoded. |
|
| 156 | ||
| 157 | @return (type_encoding) Clustering result representation. |
|
| 158 | ||
| 159 | @see get_clusters() |
|
| 160 | ||
| 161 | """ |
|
| 162 | ||
| 163 | return type_encoding.CLUSTER_INDEX_LIST_SEPARATION; |
|
| 164 | ||
| 165 | ||
| 166 | def __update_clusters(self): |
|
| 167 | """! |
|
| 168 | @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated. |
|
| 169 | ||
| 170 | @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data. |
|
| 171 | ||
| 172 | """ |
|
| 173 | ||
| 174 | clusters = [[] for i in range(len(self.__centers))]; |
|
| 175 | for index_point in range(len(self.__pointer_data)): |
|
| 176 | index_optim = -1; |
|
| @@ 178-194 (lines=17) @@ | ||
| 175 | ||
| 176 | clusters[index_optim].append(index_point); |
|
| 177 | ||
| 178 | return clusters; |
|
| 179 | ||
| 180 | ||
| 181 | def __update_medoids(self): |
|
| 182 | """! |
|
| 183 | @brief Find medoids of clusters in line with contained objects. |
|
| 184 | ||
| 185 | @return (list) list of medoids for current number of clusters. |
|
| 186 | ||
| 187 | """ |
|
| 188 | ||
| 189 | medoids = [[] for _ in range(len(self.__clusters))]; |
|
| 190 | medoid_indexes = [-1] * len(self.__clusters); |
|
| 191 | ||
| 192 | for index in range(len(self.__clusters)): |
|
| 193 | medoid_index = median(self.__pointer_data, self.__clusters[index]); |
|
| 194 | medoids[index] = self.__pointer_data[medoid_index]; |
|
| 195 | medoid_indexes[index] = medoid_index; |
|
| 196 | ||
| 197 | return medoids, medoid_indexes; |
|