1
|
|
|
"""!
|
2
|
|
|
|
3
|
|
|
@brief Cluster analysis algorithm: CURE
|
4
|
|
|
@details Implementation based on paper @cite article::cure::1.
|
5
|
|
|
|
6
|
|
|
@authors Andrei Novikov ([email protected])
|
7
|
|
|
@date 2014-2018
|
8
|
|
|
@copyright GNU Public License
|
9
|
|
|
|
10
|
|
|
@cond GNU_PUBLIC_LICENSE
|
11
|
|
|
PyClustering is free software: you can redistribute it and/or modify
|
12
|
|
|
it under the terms of the GNU General Public License as published by
|
13
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
14
|
|
|
(at your option) any later version.
|
15
|
|
|
|
16
|
|
|
PyClustering is distributed in the hope that it will be useful,
|
17
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
|
|
GNU General Public License for more details.
|
20
|
|
|
|
21
|
|
|
You should have received a copy of the GNU General Public License
|
22
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
23
|
|
|
@endcond
|
24
|
|
|
|
25
|
|
|
"""
|
26
|
|
|
|
27
|
|
|
|
28
|
|
|
import numpy
|
|
|
|
|
29
|
|
|
|
30
|
|
|
from pyclustering.cluster.encoder import type_encoding
|
31
|
|
|
|
32
|
|
|
from pyclustering.utils import euclidean_distance
|
33
|
|
|
|
34
|
|
|
from pyclustering.container.kdtree import kdtree
|
35
|
|
|
|
36
|
|
|
from pyclustering.core.wrapper import ccore_library
|
37
|
|
|
|
38
|
|
|
import pyclustering.core.cure_wrapper as wrapper
|
39
|
|
|
|
40
|
|
|
|
41
|
|
|
class cure_cluster:
|
42
|
|
|
"""!
|
43
|
|
|
@brief Represents data cluster in CURE term.
|
44
|
|
|
@details CURE cluster is described by points of cluster, representation points of the cluster and by the cluster center.
|
45
|
|
|
|
46
|
|
|
"""
|
47
|
|
|
|
48
|
|
|
def __init__(self, point, index):
|
49
|
|
|
"""!
|
50
|
|
|
@brief Constructor of CURE cluster.
|
51
|
|
|
|
52
|
|
|
@param[in] point (list): Point represented by list of coordinates.
|
53
|
|
|
@param[in] index (uint): Index point in dataset.
|
54
|
|
|
|
55
|
|
|
"""
|
56
|
|
|
|
57
|
|
|
## List of points that make up cluster.
|
58
|
|
|
self.points = [ ]
|
59
|
|
|
|
60
|
|
|
## Point indexes in dataset.
|
61
|
|
|
self.indexes = -1
|
62
|
|
|
|
63
|
|
|
## Mean of points that make up cluster.
|
64
|
|
|
self.mean = None
|
65
|
|
|
|
66
|
|
|
## List of points that represents clusters.
|
67
|
|
|
self.rep = [ ]
|
68
|
|
|
|
69
|
|
|
if point is not None:
|
70
|
|
|
self.points = [ point ]
|
71
|
|
|
self.indexes = [ index ]
|
72
|
|
|
self.mean = point
|
73
|
|
|
self.rep = [ point ]
|
74
|
|
|
|
75
|
|
|
## Pointer to the closest cluster.
|
76
|
|
|
self.closest = None
|
77
|
|
|
|
78
|
|
|
## Distance to the closest cluster.
|
79
|
|
|
self.distance = float('inf') # calculation of distance is really complexity operation (even square distance), so let's store distance to closest cluster.
|
80
|
|
|
|
81
|
|
|
def __repr__(self):
|
82
|
|
|
"""!
|
83
|
|
|
@brief Displays distance to closest cluster and points that are contained by current cluster.
|
84
|
|
|
|
85
|
|
|
"""
|
86
|
|
|
return "%s, %s" % (self.distance, self.points)
|
87
|
|
|
|
88
|
|
|
|
89
|
|
|
class cure:
|
90
|
|
|
"""!
|
91
|
|
|
@brief Class represents clustering algorithm CURE with KD-tree optimization.
|
92
|
|
|
@details CCORE option can be used to use the pyclustering core - C/C++ shared library for processing that significantly increases performance.
|
93
|
|
|
|
94
|
|
|
Example:
|
95
|
|
|
@code
|
96
|
|
|
# read data for clustering from some file
|
97
|
|
|
sample = read_sample(path_to_data);
|
98
|
|
|
|
99
|
|
|
# create instance of cure algorithm for cluster analysis
|
100
|
|
|
# request for allocation of two clusters.
|
101
|
|
|
cure_instance = cure(sample, 2, 5, 0.5, True);
|
102
|
|
|
|
103
|
|
|
# run cluster analysis
|
104
|
|
|
cure_instance.process();
|
105
|
|
|
|
106
|
|
|
# get results of clustering
|
107
|
|
|
clusters = cure_instance.get_clusters();
|
108
|
|
|
@endcode
|
109
|
|
|
|
110
|
|
|
"""
|
111
|
|
|
|
112
|
|
|
def __init__(self, data, number_cluster, number_represent_points = 5, compression = 0.5, ccore = True):
|
113
|
|
|
"""!
|
114
|
|
|
@brief Constructor of clustering algorithm CURE.
|
115
|
|
|
|
116
|
|
|
@param[in] data (array_like): Input data that should be processed.
|
117
|
|
|
@param[in] number_cluster (uint): Number of clusters that should be allocated.
|
118
|
|
|
@param[in] number_represent_points (uint): Number of representative points for each cluster.
|
119
|
|
|
@param[in] compression (double): Coefficient defines level of shrinking of representation points toward the mean of the new created cluster after merging on each step. Usually it destributed from 0 to 1.
|
120
|
|
|
@param[in] ccore (bool): If True than DLL CCORE (C++ solution) will be used for solving.
|
121
|
|
|
|
122
|
|
|
"""
|
123
|
|
|
|
124
|
|
|
self.__pointer_data = self.__prepare_data_points(data)
|
125
|
|
|
|
126
|
|
|
self.__clusters = None
|
127
|
|
|
self.__representors = None
|
128
|
|
|
self.__means = None
|
129
|
|
|
|
130
|
|
|
self.__number_cluster = number_cluster
|
131
|
|
|
self.__number_represent_points = number_represent_points
|
132
|
|
|
self.__compression = compression
|
133
|
|
|
|
134
|
|
|
self.__ccore = ccore
|
135
|
|
|
if self.__ccore:
|
136
|
|
|
self.__ccore = ccore_library.workable()
|
137
|
|
|
|
138
|
|
|
self.__validate_arguments()
|
139
|
|
|
|
140
|
|
|
|
141
|
|
|
def process(self):
|
142
|
|
|
"""!
|
143
|
|
|
@brief Performs cluster analysis in line with rules of CURE algorithm.
|
144
|
|
|
|
145
|
|
|
@remark Results of clustering can be obtained using corresponding get methods.
|
146
|
|
|
|
147
|
|
|
@see get_clusters()
|
148
|
|
|
|
149
|
|
|
"""
|
150
|
|
|
|
151
|
|
|
if self.__ccore is True:
|
152
|
|
|
self.__process_by_ccore()
|
153
|
|
|
|
154
|
|
|
else:
|
155
|
|
|
self.__process_by_python()
|
156
|
|
|
|
157
|
|
|
|
158
|
|
|
def __process_by_ccore(self):
|
159
|
|
|
"""!
|
160
|
|
|
@brief Performs cluster analysis using CCORE (C/C++ part of pyclustering library).
|
161
|
|
|
|
162
|
|
|
"""
|
163
|
|
|
cure_data_pointer = wrapper.cure_algorithm(self.__pointer_data, self.__number_cluster,
|
164
|
|
|
self.__number_represent_points, self.__compression)
|
165
|
|
|
|
166
|
|
|
self.__clusters = wrapper.cure_get_clusters(cure_data_pointer)
|
167
|
|
|
self.__representors = wrapper.cure_get_representors(cure_data_pointer)
|
168
|
|
|
self.__means = wrapper.cure_get_means(cure_data_pointer)
|
169
|
|
|
|
170
|
|
|
wrapper.cure_data_destroy(cure_data_pointer)
|
171
|
|
|
|
172
|
|
|
|
173
|
|
|
def __process_by_python(self):
|
174
|
|
|
"""!
|
175
|
|
|
@brief Performs cluster analysis using python code.
|
176
|
|
|
|
177
|
|
|
"""
|
178
|
|
|
self.__create_queue() # queue
|
179
|
|
|
self.__create_kdtree() # create k-d tree
|
180
|
|
|
|
181
|
|
|
while len(self.__queue) > self.__number_cluster:
|
182
|
|
|
cluster1 = self.__queue[0] # cluster that has nearest neighbor.
|
183
|
|
|
cluster2 = cluster1.closest # closest cluster.
|
184
|
|
|
|
185
|
|
|
self.__queue.remove(cluster1)
|
186
|
|
|
self.__queue.remove(cluster2)
|
187
|
|
|
|
188
|
|
|
self.__delete_represented_points(cluster1)
|
189
|
|
|
self.__delete_represented_points(cluster2)
|
190
|
|
|
|
191
|
|
|
merged_cluster = self.__merge_clusters(cluster1, cluster2)
|
192
|
|
|
|
193
|
|
|
self.__insert_represented_points(merged_cluster)
|
194
|
|
|
|
195
|
|
|
# Pointers to clusters that should be relocated is stored here.
|
196
|
|
|
cluster_relocation_requests = []
|
197
|
|
|
|
198
|
|
|
# Check for the last cluster
|
199
|
|
|
if len(self.__queue) > 0:
|
200
|
|
|
merged_cluster.closest = self.__queue[0] # arbitrary cluster from queue
|
201
|
|
|
merged_cluster.distance = self.__cluster_distance(merged_cluster, merged_cluster.closest)
|
202
|
|
|
|
203
|
|
|
for item in self.__queue:
|
204
|
|
|
distance = self.__cluster_distance(merged_cluster, item)
|
205
|
|
|
# Check if distance between new cluster and current is the best than now.
|
206
|
|
|
if distance < merged_cluster.distance:
|
207
|
|
|
merged_cluster.closest = item
|
208
|
|
|
merged_cluster.distance = distance
|
209
|
|
|
|
210
|
|
|
# Check if current cluster has removed neighbor.
|
211
|
|
|
if (item.closest is cluster1) or (item.closest is cluster2):
|
212
|
|
|
# If previous distance was less then distance to new cluster then nearest cluster should be found in the tree.
|
213
|
|
|
# print("Update: ", item);
|
214
|
|
|
if item.distance < distance:
|
215
|
|
|
(item.closest, item.distance) = self.__closest_cluster(item, distance)
|
216
|
|
|
|
217
|
|
|
# TODO: investigation of root cause is required.
|
218
|
|
|
# Itself and merged cluster should be always in list of neighbors in line with specified radius.
|
219
|
|
|
# But merged cluster may not be in list due to error calculation, therefore it should be added
|
220
|
|
|
# manually.
|
221
|
|
|
if item.closest is None:
|
222
|
|
|
item.closest = merged_cluster
|
223
|
|
|
item.distance = distance
|
224
|
|
|
|
225
|
|
|
# Otherwise new cluster is nearest.
|
226
|
|
|
else:
|
227
|
|
|
item.closest = merged_cluster
|
228
|
|
|
item.distance = distance
|
229
|
|
|
|
230
|
|
|
cluster_relocation_requests.append(item)
|
231
|
|
|
elif item.distance > distance:
|
232
|
|
|
item.closest = merged_cluster
|
233
|
|
|
item.distance = distance
|
234
|
|
|
|
235
|
|
|
cluster_relocation_requests.append(item)
|
236
|
|
|
|
237
|
|
|
# New cluster and updated clusters should relocated in queue
|
238
|
|
|
self.__insert_cluster(merged_cluster)
|
239
|
|
|
for item in cluster_relocation_requests:
|
240
|
|
|
self.__relocate_cluster(item)
|
241
|
|
|
|
242
|
|
|
# Change cluster representation
|
243
|
|
|
self.__clusters = [cure_cluster_unit.indexes for cure_cluster_unit in self.__queue]
|
244
|
|
|
self.__representors = [cure_cluster_unit.rep for cure_cluster_unit in self.__queue]
|
245
|
|
|
self.__means = [cure_cluster_unit.mean for cure_cluster_unit in self.__queue]
|
246
|
|
|
|
247
|
|
|
|
248
|
|
|
def get_clusters(self):
|
249
|
|
|
"""!
|
250
|
|
|
@brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
|
251
|
|
|
|
252
|
|
|
@return (list) List of allocated clusters.
|
253
|
|
|
|
254
|
|
|
@see process()
|
255
|
|
|
@see get_representors()
|
256
|
|
|
@see get_means()
|
257
|
|
|
|
258
|
|
|
"""
|
259
|
|
|
|
260
|
|
|
return self.__clusters
|
261
|
|
|
|
262
|
|
|
|
263
|
|
|
def get_representors(self):
|
264
|
|
|
"""!
|
265
|
|
|
@brief Returns list of point-representors of each cluster.
|
266
|
|
|
@details Cluster index should be used for navigation between lists of point-representors.
|
267
|
|
|
|
268
|
|
|
@return (list) List of point-representors of each cluster.
|
269
|
|
|
|
270
|
|
|
@see get_clusters()
|
271
|
|
|
@see get_means()
|
272
|
|
|
|
273
|
|
|
"""
|
274
|
|
|
|
275
|
|
|
return self.__representors
|
276
|
|
|
|
277
|
|
|
|
278
|
|
|
def get_means(self):
|
279
|
|
|
"""!
|
280
|
|
|
@brief Returns list of mean values of each cluster.
|
281
|
|
|
@details Cluster index should be used for navigation between mean values.
|
282
|
|
|
|
283
|
|
|
@return (list) List of mean values of each cluster.
|
284
|
|
|
|
285
|
|
|
@see get_clusters()
|
286
|
|
|
@see get_representors()
|
287
|
|
|
|
288
|
|
|
"""
|
289
|
|
|
|
290
|
|
|
return self.__means
|
291
|
|
|
|
292
|
|
|
|
293
|
|
|
def get_cluster_encoding(self):
|
294
|
|
|
"""!
|
295
|
|
|
@brief Returns clustering result representation type that indicate how clusters are encoded.
|
296
|
|
|
|
297
|
|
|
@return (type_encoding) Clustering result representation.
|
298
|
|
|
|
299
|
|
|
@see get_clusters()
|
300
|
|
|
|
301
|
|
|
"""
|
302
|
|
|
|
303
|
|
|
return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
|
304
|
|
|
|
305
|
|
|
|
306
|
|
|
def __prepare_data_points(self, sample):
|
307
|
|
|
"""!
|
308
|
|
|
@brief Prepare data points for clustering.
|
309
|
|
|
@details In case of numpy.array there are a lot of overloaded basic operators, such as __contains__, __eq__.
|
310
|
|
|
|
311
|
|
|
@return (list) Returns sample in list format.
|
312
|
|
|
|
313
|
|
|
"""
|
314
|
|
|
if isinstance(sample, numpy.ndarray):
|
315
|
|
|
return sample.tolist()
|
316
|
|
|
|
317
|
|
|
return sample
|
318
|
|
|
|
319
|
|
|
|
320
|
|
|
def __validate_arguments(self):
|
321
|
|
|
"""!
|
322
|
|
|
@brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
|
323
|
|
|
is thrown.
|
324
|
|
|
|
325
|
|
|
"""
|
326
|
|
|
|
327
|
|
|
if len(self.__pointer_data) == 0:
|
328
|
|
|
raise ValueError("Empty input data. Data should contain at least one point.")
|
329
|
|
|
|
330
|
|
|
if self.__number_cluster <= 0:
|
331
|
|
|
raise ValueError("Incorrect amount of clusters '%d'. Amount of cluster should be greater than 0." % self.__number_cluster)
|
332
|
|
|
|
333
|
|
|
if self.__compression < 0:
|
334
|
|
|
raise ValueError("Incorrect compression level '%f'. Compression should not be negative." % self.__compression)
|
335
|
|
|
|
336
|
|
|
if self.__number_represent_points <= 0:
|
337
|
|
|
raise ValueError("Incorrect amount of representatives '%d'. Amount of representatives should be greater than 0." % self.__number_cluster)
|
338
|
|
|
|
339
|
|
|
|
340
|
|
|
def __insert_cluster(self, cluster):
|
341
|
|
|
"""!
|
342
|
|
|
@brief Insert cluster to the list (sorted queue) in line with sequence order (distance).
|
343
|
|
|
|
344
|
|
|
@param[in] cluster (cure_cluster): Cluster that should be inserted.
|
345
|
|
|
|
346
|
|
|
"""
|
347
|
|
|
|
348
|
|
|
for index in range(len(self.__queue)):
|
349
|
|
|
if cluster.distance < self.__queue[index].distance:
|
350
|
|
|
self.__queue.insert(index, cluster)
|
351
|
|
|
return
|
352
|
|
|
|
353
|
|
|
self.__queue.append(cluster)
|
354
|
|
|
|
355
|
|
|
|
356
|
|
|
def __relocate_cluster(self, cluster):
|
357
|
|
|
"""!
|
358
|
|
|
@brief Relocate cluster in list in line with distance order.
|
359
|
|
|
|
360
|
|
|
@param[in] cluster (cure_cluster): Cluster that should be relocated in line with order.
|
361
|
|
|
|
362
|
|
|
"""
|
363
|
|
|
|
364
|
|
|
self.__queue.remove(cluster)
|
365
|
|
|
self.__insert_cluster(cluster)
|
366
|
|
|
|
367
|
|
|
|
368
|
|
|
def __closest_cluster(self, cluster, distance):
|
369
|
|
|
"""!
|
370
|
|
|
@brief Find closest cluster to the specified cluster in line with distance.
|
371
|
|
|
|
372
|
|
|
@param[in] cluster (cure_cluster): Cluster for which nearest cluster should be found.
|
373
|
|
|
@param[in] distance (double): Closest distance to the previous cluster.
|
374
|
|
|
|
375
|
|
|
@return (tuple) Pair (nearest CURE cluster, nearest distance) if the nearest cluster has been found, otherwise None is returned.
|
376
|
|
|
|
377
|
|
|
"""
|
378
|
|
|
|
379
|
|
|
nearest_cluster = None
|
380
|
|
|
nearest_distance = float('inf')
|
381
|
|
|
|
382
|
|
|
for point in cluster.rep:
|
383
|
|
|
# Nearest nodes should be returned (at least it will return itself).
|
384
|
|
|
nearest_nodes = self.__tree.find_nearest_dist_nodes(point, distance)
|
385
|
|
|
for (candidate_distance, kdtree_node) in nearest_nodes:
|
386
|
|
|
if (candidate_distance < nearest_distance) and (kdtree_node is not None) and (kdtree_node.payload is not cluster):
|
387
|
|
|
nearest_distance = candidate_distance
|
388
|
|
|
nearest_cluster = kdtree_node.payload
|
389
|
|
|
|
390
|
|
|
return (nearest_cluster, nearest_distance)
|
391
|
|
|
|
392
|
|
|
|
393
|
|
|
def __insert_represented_points(self, cluster):
|
394
|
|
|
"""!
|
395
|
|
|
@brief Insert representation points to the k-d tree.
|
396
|
|
|
|
397
|
|
|
@param[in] cluster (cure_cluster): Cluster whose representation points should be inserted.
|
398
|
|
|
|
399
|
|
|
"""
|
400
|
|
|
|
401
|
|
|
for point in cluster.rep:
|
402
|
|
|
self.__tree.insert(point, cluster)
|
403
|
|
|
|
404
|
|
|
|
405
|
|
|
def __delete_represented_points(self, cluster):
|
406
|
|
|
"""!
|
407
|
|
|
@brief Remove representation points of clusters from the k-d tree
|
408
|
|
|
|
409
|
|
|
@param[in] cluster (cure_cluster): Cluster whose representation points should be removed.
|
410
|
|
|
|
411
|
|
|
"""
|
412
|
|
|
|
413
|
|
|
for point in cluster.rep:
|
414
|
|
|
self.__tree.remove(point, payload=cluster)
|
415
|
|
|
|
416
|
|
|
|
417
|
|
|
def __merge_clusters(self, cluster1, cluster2):
|
418
|
|
|
"""!
|
419
|
|
|
@brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
|
420
|
|
|
|
421
|
|
|
@param[in] cluster1 (cure_cluster): Cluster that should be merged.
|
422
|
|
|
@param[in] cluster2 (cure_cluster): Cluster that should be merged.
|
423
|
|
|
|
424
|
|
|
@return (cure_cluster) New merged CURE cluster.
|
425
|
|
|
|
426
|
|
|
"""
|
427
|
|
|
|
428
|
|
|
merged_cluster = cure_cluster(None, None)
|
429
|
|
|
|
430
|
|
|
merged_cluster.points = cluster1.points + cluster2.points
|
431
|
|
|
merged_cluster.indexes = cluster1.indexes + cluster2.indexes
|
432
|
|
|
|
433
|
|
|
# merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
|
434
|
|
|
dimension = len(cluster1.mean)
|
435
|
|
|
merged_cluster.mean = [0] * dimension
|
436
|
|
|
if merged_cluster.points[1:] == merged_cluster.points[:-1]:
|
437
|
|
|
merged_cluster.mean = merged_cluster.points[0]
|
438
|
|
|
else:
|
439
|
|
|
for index in range(dimension):
|
440
|
|
|
merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
|
441
|
|
|
|
442
|
|
|
temporary = list()
|
443
|
|
|
|
444
|
|
|
for index in range(self.__number_represent_points):
|
445
|
|
|
maximal_distance = 0
|
446
|
|
|
maximal_point = None
|
447
|
|
|
|
448
|
|
|
for point in merged_cluster.points:
|
449
|
|
|
minimal_distance = 0
|
450
|
|
|
if index == 0:
|
451
|
|
|
minimal_distance = euclidean_distance(point, merged_cluster.mean)
|
452
|
|
|
#minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
|
453
|
|
|
else:
|
454
|
|
|
minimal_distance = min([euclidean_distance(point, p) for p in temporary])
|
455
|
|
|
#minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
|
456
|
|
|
|
457
|
|
|
if minimal_distance >= maximal_distance:
|
458
|
|
|
maximal_distance = minimal_distance
|
459
|
|
|
maximal_point = point
|
460
|
|
|
|
461
|
|
|
if maximal_point not in temporary:
|
462
|
|
|
temporary.append(maximal_point)
|
463
|
|
|
|
464
|
|
|
for point in temporary:
|
465
|
|
|
representative_point = [0] * dimension
|
466
|
|
|
for index in range(dimension):
|
467
|
|
|
representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index])
|
468
|
|
|
|
469
|
|
|
merged_cluster.rep.append(representative_point)
|
470
|
|
|
|
471
|
|
|
return merged_cluster
|
472
|
|
|
|
473
|
|
|
|
474
|
|
|
def __create_queue(self):
|
475
|
|
|
"""!
|
476
|
|
|
@brief Create queue of sorted clusters by distance between them, where first cluster has the nearest neighbor. At the first iteration each cluster contains only one point.
|
477
|
|
|
|
478
|
|
|
@param[in] data (list): Input data that is presented as list of points (objects), each point should be represented by list or tuple.
|
479
|
|
|
|
480
|
|
|
@return (list) Create queue of sorted clusters by distance between them.
|
481
|
|
|
|
482
|
|
|
"""
|
483
|
|
|
|
484
|
|
|
self.__queue = [cure_cluster(self.__pointer_data[index_point], index_point) for index_point in range(len(self.__pointer_data))]
|
485
|
|
|
|
486
|
|
|
# set closest clusters
|
487
|
|
|
for i in range(0, len(self.__queue)):
|
488
|
|
|
minimal_distance = float('inf')
|
489
|
|
|
closest_index_cluster = -1
|
490
|
|
|
|
491
|
|
|
for k in range(0, len(self.__queue)):
|
492
|
|
|
if i != k:
|
493
|
|
|
dist = self.__cluster_distance(self.__queue[i], self.__queue[k])
|
494
|
|
|
if dist < minimal_distance:
|
495
|
|
|
minimal_distance = dist
|
496
|
|
|
closest_index_cluster = k
|
497
|
|
|
|
498
|
|
|
self.__queue[i].closest = self.__queue[closest_index_cluster]
|
499
|
|
|
self.__queue[i].distance = minimal_distance
|
500
|
|
|
|
501
|
|
|
# sort clusters
|
502
|
|
|
self.__queue.sort(key = lambda x: x.distance, reverse = False)
|
503
|
|
|
|
504
|
|
|
|
505
|
|
|
def __create_kdtree(self):
|
506
|
|
|
"""!
|
507
|
|
|
@brief Create k-d tree in line with created clusters. At the first iteration contains all points from the input data set.
|
508
|
|
|
|
509
|
|
|
@return (kdtree) k-d tree that consist of representative points of CURE clusters.
|
510
|
|
|
|
511
|
|
|
"""
|
512
|
|
|
|
513
|
|
|
self.__tree = kdtree()
|
514
|
|
|
for current_cluster in self.__queue:
|
515
|
|
|
for representative_point in current_cluster.rep:
|
516
|
|
|
self.__tree.insert(representative_point, current_cluster)
|
517
|
|
|
|
518
|
|
|
|
519
|
|
|
def __cluster_distance(self, cluster1, cluster2):
|
520
|
|
|
"""!
|
521
|
|
|
@brief Calculate minimal distance between clusters using representative points.
|
522
|
|
|
|
523
|
|
|
@param[in] cluster1 (cure_cluster): The first cluster.
|
524
|
|
|
@param[in] cluster2 (cure_cluster): The second cluster.
|
525
|
|
|
|
526
|
|
|
@return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
|
527
|
|
|
|
528
|
|
|
"""
|
529
|
|
|
|
530
|
|
|
distance = float('inf')
|
531
|
|
|
for i in range(0, len(cluster1.rep)):
|
532
|
|
|
for k in range(0, len(cluster2.rep)):
|
533
|
|
|
#dist = euclidean_distance_sqrt(cluster1.rep[i], cluster2.rep[k]); # Fast mode
|
534
|
|
|
dist = euclidean_distance(cluster1.rep[i], cluster2.rep[k]) # Slow mode
|
535
|
|
|
if dist < distance:
|
536
|
|
|
distance = dist
|
537
|
|
|
|
538
|
|
|
return distance
|
539
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.