1
|
|
|
"""!
|
2
|
|
|
|
3
|
|
|
@brief Cluster analysis algorithm: BANG.
|
4
|
|
|
@details Implementation based on paper @cite inproceedings::bang::1.
|
5
|
|
|
|
6
|
|
|
@authors Andrei Novikov ([email protected])
|
7
|
|
|
@date 2014-2018
|
8
|
|
|
@copyright GNU Public License
|
9
|
|
|
|
10
|
|
|
@cond GNU_PUBLIC_LICENSE
|
11
|
|
|
PyClustering is free software: you can redistribute it and/or modify
|
12
|
|
|
it under the terms of the GNU General Public License as published by
|
13
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
14
|
|
|
(at your option) any later version.
|
15
|
|
|
|
16
|
|
|
PyClustering is distributed in the hope that it will be useful,
|
17
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
|
|
GNU General Public License for more details.
|
20
|
|
|
|
21
|
|
|
You should have received a copy of the GNU General Public License
|
22
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
23
|
|
|
@endcond
|
24
|
|
|
|
25
|
|
|
"""
|
26
|
|
|
|
27
|
|
|
|
28
|
|
|
import matplotlib
|
|
|
|
|
29
|
|
|
import matplotlib.gridspec as gridspec
|
|
|
|
|
30
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
31
|
|
|
import matplotlib.patches as patches
|
|
|
|
|
32
|
|
|
import matplotlib.animation as animation
|
|
|
|
|
33
|
|
|
|
34
|
|
|
import itertools
|
35
|
|
|
|
36
|
|
|
from pyclustering.cluster import cluster_visualizer
|
37
|
|
|
from pyclustering.cluster.encoder import type_encoding
|
38
|
|
|
|
39
|
|
|
from pyclustering.utils import data_corners
|
40
|
|
|
from pyclustering.utils.color import color as color_list
|
41
|
|
|
|
42
|
|
|
|
43
|
|
|
|
44
|
|
|
class bang_visualizer:
|
45
|
|
|
"""!
|
46
|
|
|
@brief Visualizer of BANG algorithm's results.
|
47
|
|
|
@details BANG visualizer provides visualization services that are specific for BANG algorithm.
|
48
|
|
|
|
49
|
|
|
"""
|
50
|
|
|
|
51
|
|
|
|
52
|
|
|
__maximum_density_alpha = 0.6
|
53
|
|
|
|
54
|
|
|
|
55
|
|
|
@staticmethod
|
56
|
|
|
def show_blocks(directory):
|
57
|
|
|
"""!
|
58
|
|
|
@brief Show BANG-blocks (leafs only) in data space.
|
59
|
|
|
@details BANG-blocks represents grid that was used for clustering process.
|
60
|
|
|
|
61
|
|
|
@param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process.
|
62
|
|
|
|
63
|
|
|
"""
|
64
|
|
|
|
65
|
|
|
dimension = len(directory.get_data()[0])
|
66
|
|
|
|
67
|
|
|
amount_canvases = 1
|
68
|
|
|
if dimension > 1:
|
69
|
|
|
amount_canvases = int(dimension * (dimension - 1) / 2)
|
70
|
|
|
|
71
|
|
|
figure = plt.figure()
|
72
|
|
|
grid_spec = gridspec.GridSpec(1, amount_canvases)
|
73
|
|
|
|
74
|
|
|
pairs = list(itertools.combinations(range(dimension), 2))
|
75
|
|
|
if len(pairs) == 0: pairs = [(0, 0)]
|
76
|
|
|
|
77
|
|
|
for index in range(amount_canvases):
|
78
|
|
|
ax = figure.add_subplot(grid_spec[index])
|
79
|
|
|
bang_visualizer.__draw_blocks(ax, directory.get_leafs(), pairs[index])
|
80
|
|
|
bang_visualizer.__draw_two_dimension_data(ax, directory.get_data(), pairs[index])
|
81
|
|
|
|
82
|
|
|
plt.show()
|
83
|
|
|
|
84
|
|
|
|
85
|
|
|
@staticmethod
|
86
|
|
|
def show_dendrogram(dendrogram):
|
87
|
|
|
"""!
|
88
|
|
|
@brief Display dendrogram of BANG-blocks.
|
89
|
|
|
|
90
|
|
|
@param[in] dendrogram (list): List representation of dendrogram of BANG-blocks.
|
91
|
|
|
|
92
|
|
|
@see bang.get_dendrogram()
|
93
|
|
|
|
94
|
|
|
"""
|
95
|
|
|
plt.figure()
|
96
|
|
|
axis = plt.subplot(1, 1, 1)
|
97
|
|
|
|
98
|
|
|
current_position = 0
|
99
|
|
|
for index_cluster in range(len(dendrogram)):
|
100
|
|
|
densities = [ block.get_density() for block in dendrogram[index_cluster] ]
|
101
|
|
|
xrange = range(current_position, current_position + len(densities))
|
102
|
|
|
|
103
|
|
|
axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.get_color(index_cluster))
|
104
|
|
|
|
105
|
|
|
current_position += len(densities)
|
106
|
|
|
|
107
|
|
|
axis.set_ylabel("density")
|
108
|
|
|
axis.set_xlabel("block")
|
109
|
|
|
axis.xaxis.set_ticklabels([])
|
110
|
|
|
|
111
|
|
|
plt.xlim([-0.5, current_position - 0.5])
|
112
|
|
|
plt.show()
|
113
|
|
|
|
114
|
|
|
|
115
|
|
|
@staticmethod
|
116
|
|
|
def show_clusters(data, clusters, noise=None):
|
117
|
|
|
"""!
|
118
|
|
|
@brief Display K-Means clustering results.
|
119
|
|
|
|
120
|
|
|
@param[in] data (list): Dataset that was used for clustering.
|
121
|
|
|
@param[in] clusters (array_like): Clusters that were allocated by the algorithm.
|
122
|
|
|
@param[in] noise (array_like): Noise that were allocated by the algorithm.
|
123
|
|
|
|
124
|
|
|
"""
|
125
|
|
|
visualizer = cluster_visualizer()
|
126
|
|
|
visualizer.append_clusters(clusters, data)
|
127
|
|
|
visualizer.append_cluster(noise or [], data, marker='x')
|
128
|
|
|
visualizer.show()
|
129
|
|
|
|
130
|
|
|
|
131
|
|
|
@staticmethod
|
132
|
|
|
def __draw_two_dimension_data(ax, data, pair):
|
133
|
|
|
"""!
|
134
|
|
|
@brief Display data in two-dimensional canvas.
|
135
|
|
|
|
136
|
|
|
@param[in] ax (Axis): Canvas where data should be displayed.
|
137
|
|
|
@param[in] data (list): Data points that should be displayed.
|
138
|
|
|
@param[in] pair (tuple): Pair of dimension indexes.
|
139
|
|
|
|
140
|
|
|
"""
|
141
|
|
|
ax.set_xlabel("x%d" % pair[0])
|
142
|
|
|
ax.set_ylabel("x%d" % pair[1])
|
143
|
|
|
|
144
|
|
|
for point in data:
|
145
|
|
|
if len(data[0]) > 1:
|
146
|
|
|
ax.plot(point[pair[0]], point[pair[1]], color='red', marker='.')
|
147
|
|
|
else:
|
148
|
|
|
ax.plot(point[pair[0]], 0, color='red', marker='.')
|
149
|
|
|
ax.yaxis.set_ticklabels([])
|
150
|
|
|
|
151
|
|
|
|
152
|
|
|
@staticmethod
|
153
|
|
|
def __draw_blocks(ax, blocks, pair):
|
154
|
|
|
"""!
|
155
|
|
|
@brief Display BANG-blocks on specified figure.
|
156
|
|
|
|
157
|
|
|
@param[in] ax (Axis): Axis where bang-blocks should be displayed.
|
158
|
|
|
@param[in] blocks (list): List of blocks that should be displyed.
|
159
|
|
|
@param[in] pair (tuple): Pair of coordinate index that should be displyed.
|
160
|
|
|
|
161
|
|
|
"""
|
162
|
|
|
ax.grid(False)
|
163
|
|
|
|
164
|
|
|
density_scale = blocks[-1].get_density()
|
165
|
|
|
for block in blocks:
|
166
|
|
|
bang_visualizer.__draw_block(ax, pair, block, density_scale)
|
167
|
|
|
|
168
|
|
|
|
169
|
|
View Code Duplication |
@staticmethod
|
|
|
|
|
170
|
|
|
def __draw_block(ax, pair, block, density_scale):
|
171
|
|
|
"""!
|
172
|
|
|
@brief Display BANG-block on the specified ax.
|
173
|
|
|
|
174
|
|
|
@param[in] ax (Axis): Axis where block should be displayed.
|
175
|
|
|
@param[in] pair (tuple): Pair of coordinate index that should be displayed.
|
176
|
|
|
@param[in] block (bang_block): BANG-block that should be displayed.
|
177
|
|
|
@param[in] density_scale (double): Max density to display density of the block by appropriate tone.
|
178
|
|
|
|
179
|
|
|
"""
|
180
|
|
|
max_corner, min_corner = bang_visualizer.__get_rectangle_description(block, pair)
|
181
|
|
|
|
182
|
|
|
belong_cluster = block.get_cluster() is not None
|
183
|
|
|
|
184
|
|
|
density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
|
185
|
|
|
|
186
|
|
|
face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
|
187
|
|
|
edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
|
188
|
|
|
|
189
|
|
|
rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
|
190
|
|
|
fill=belong_cluster,
|
191
|
|
|
facecolor=face_color,
|
192
|
|
|
edgecolor=edge_color,
|
193
|
|
|
linewidth=0.5)
|
194
|
|
|
ax.add_patch(rect)
|
195
|
|
|
|
196
|
|
|
|
197
|
|
|
@staticmethod
|
198
|
|
|
def __get_rectangle_description(block, pair):
|
199
|
|
|
"""!
|
200
|
|
|
@brief Create rectangle description for block in specific dimension.
|
201
|
|
|
|
202
|
|
|
@param[in] pair (tuple): Pair of coordinate index that should be displayed.
|
203
|
|
|
@param[in] block (bang_block): BANG-block that should be displayed
|
204
|
|
|
|
205
|
|
|
@return (tuple) Pair of corners that describes rectangle.
|
206
|
|
|
|
207
|
|
|
"""
|
208
|
|
|
max_corner, min_corner = block.get_spatial_block().get_corners()
|
209
|
|
|
|
210
|
|
|
max_corner = [max_corner[pair[0]], max_corner[pair[1]]]
|
211
|
|
|
min_corner = [min_corner[pair[0]], min_corner[pair[1]]]
|
212
|
|
|
|
213
|
|
|
if pair == (0, 0):
|
214
|
|
|
max_corner[1], min_corner[1] = 1.0, -1.0
|
215
|
|
|
|
216
|
|
|
return max_corner, min_corner
|
217
|
|
|
|
218
|
|
|
|
219
|
|
|
class bang_animator:
|
220
|
|
|
"""!
|
221
|
|
|
@brief Provides service for creating 2-D animation using BANG clustering results.
|
222
|
|
|
@details The animator does not support visualization of clustering process where non 2-dimensional was used.
|
223
|
|
|
|
224
|
|
|
Code example of animation of BANG clustering process:
|
225
|
|
|
@code
|
226
|
|
|
from pyclustering.cluster.bang import bang, bang_animator
|
227
|
|
|
from pyclustering.utils import read_sample
|
228
|
|
|
from pyclustering.samples.definitions import FCPS_SAMPLES
|
229
|
|
|
|
230
|
|
|
# Read data two dimensional data.
|
231
|
|
|
data = read_sample(FCPS_SAMPLES.SAMPLE_LSUN)
|
232
|
|
|
|
233
|
|
|
# Create instance of BANG algorithm.
|
234
|
|
|
bang_instance = bang(data, 9)
|
235
|
|
|
bang_instance.process()
|
236
|
|
|
|
237
|
|
|
# Obtain clustering results.
|
238
|
|
|
clusters = bang_instance.get_clusters()
|
239
|
|
|
noise = bang_instance.get_noise()
|
240
|
|
|
directory = bang_instance.get_directory()
|
241
|
|
|
|
242
|
|
|
# Create BANG animation using class 'bang_animator':
|
243
|
|
|
animator = bang_animator(directory, clusters, noise)
|
244
|
|
|
animator.animate()
|
245
|
|
|
@endcode
|
246
|
|
|
|
247
|
|
|
|
248
|
|
|
"""
|
249
|
|
|
def __init__(self, directory, clusters, noise):
|
|
|
|
|
250
|
|
|
"""!
|
251
|
|
|
@brief Creates BANG animator instance.
|
252
|
|
|
|
253
|
|
|
@param[in] directory (bang_directory): BANG directory that was formed during BANG clustering process.
|
254
|
|
|
@param[in] clusters (list): Allocated clusters during BANG clustering process.
|
255
|
|
|
@param[in] noise (list): Allocated noise (outliers) during BANG clustering process.
|
256
|
|
|
|
257
|
|
|
"""
|
258
|
|
|
self.__directory = directory
|
259
|
|
|
self.__clusters = clusters
|
260
|
|
|
self.__noise = []
|
261
|
|
|
|
262
|
|
|
self.__current_block = 0
|
263
|
|
|
self.__current_level = 0
|
264
|
|
|
self.__level_blocks = directory.get_level(0)
|
265
|
|
|
|
266
|
|
|
self.__figure = plt.figure()
|
267
|
|
|
self.__ax = self.__figure.add_subplot(1, 1, 1)
|
268
|
|
|
self.__special_frame = 0
|
269
|
|
|
|
270
|
|
|
self.__validate_arguments()
|
271
|
|
|
|
272
|
|
|
|
273
|
|
|
def __validate_arguments(self):
|
274
|
|
|
"""!
|
275
|
|
|
@brief Check correctness of input arguments and throw exception if incorrect is found.
|
276
|
|
|
|
277
|
|
|
"""
|
278
|
|
|
if len(self.__directory.get_data()[0]) != 2:
|
279
|
|
|
raise ValueError("Impossible to animate BANG clustering process for non 2D data.")
|
280
|
|
|
|
281
|
|
|
|
282
|
|
|
def __increment_block(self):
|
283
|
|
|
"""!
|
284
|
|
|
@brief Increment BANG block safely by updating block index, level and level block.
|
285
|
|
|
|
286
|
|
|
"""
|
287
|
|
|
self.__current_block += 1
|
288
|
|
|
if self.__current_block >= len(self.__level_blocks):
|
289
|
|
|
self.__current_block = 0
|
290
|
|
|
self.__current_level += 1
|
291
|
|
|
|
292
|
|
|
if self.__current_level < self.__directory.get_height():
|
293
|
|
|
self.__level_blocks = self.__directory.get_level(self.__current_level)
|
294
|
|
|
|
295
|
|
|
|
296
|
|
View Code Duplication |
def __draw_block(self, block, block_alpha=0.0):
|
|
|
|
|
297
|
|
|
"""!
|
298
|
|
|
@brief Display single BANG block on axis.
|
299
|
|
|
|
300
|
|
|
@param[in] block (bang_block): BANG block that should be displayed.
|
301
|
|
|
@param[in] block_alpha (double): Transparency level - value of alpha.
|
302
|
|
|
|
303
|
|
|
"""
|
304
|
|
|
max_corner, min_corner = block.get_spatial_block().get_corners()
|
305
|
|
|
|
306
|
|
|
face_color = matplotlib.colors.to_rgba('blue', alpha=block_alpha)
|
307
|
|
|
edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
|
308
|
|
|
|
309
|
|
|
rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
|
310
|
|
|
fill=True,
|
311
|
|
|
facecolor=face_color,
|
312
|
|
|
edgecolor=edge_color,
|
313
|
|
|
linewidth=0.5)
|
314
|
|
|
self.__ax.add_patch(rect)
|
315
|
|
|
|
316
|
|
|
|
317
|
|
|
def __draw_leaf_density(self):
|
318
|
|
|
"""!
|
319
|
|
|
@brief Display densities by filling blocks by appropriate colors.
|
320
|
|
|
|
321
|
|
|
"""
|
322
|
|
|
leafs = self.__directory.get_leafs()
|
323
|
|
|
density_scale = leafs[-1].get_density()
|
324
|
|
|
|
325
|
|
|
for block in leafs:
|
326
|
|
|
alpha = 0.8 * block.get_density() / density_scale
|
327
|
|
|
self.__draw_block(block, alpha)
|
328
|
|
|
|
329
|
|
|
|
330
|
|
|
def __draw_clusters(self):
|
331
|
|
|
"""!
|
332
|
|
|
@brief Display clusters and outliers using different colors.
|
333
|
|
|
|
334
|
|
|
"""
|
335
|
|
|
data = self.__directory.get_data()
|
336
|
|
|
for index_cluster in range(len(self.__clusters)):
|
337
|
|
|
color = color_list.get_color(index_cluster)
|
338
|
|
|
self.__draw_cluster(data, self.__clusters[index_cluster], color, '.')
|
339
|
|
|
|
340
|
|
|
self.__draw_cluster(self.__directory.get_data(), self.__noise, 'gray', 'x')
|
341
|
|
|
|
342
|
|
|
|
343
|
|
|
def __draw_cluster(self, data, cluster, color, marker):
|
344
|
|
|
"""!
|
345
|
|
|
@brief Draw 2-D single cluster on axis using specified color and marker.
|
346
|
|
|
|
347
|
|
|
"""
|
348
|
|
|
for item in cluster:
|
349
|
|
|
self.__ax.plot(data[item][0], data[item][1], color=color, marker=marker)
|
350
|
|
|
|
351
|
|
|
|
352
|
|
|
def animate(self, animation_velocity=75, movie_fps=25, movie_filename=None):
|
353
|
|
|
"""!
|
354
|
|
|
@brief Animates clustering process that is performed by BANG algorithm.
|
355
|
|
|
|
356
|
|
|
@param[in] animation_velocity (uint): Interval between frames in milliseconds (for run-time animation only).
|
357
|
|
|
@param[in] movie_fps (uint): Defines frames per second (for rendering movie only).
|
358
|
|
|
@param[in] movie_filename (string): If it is specified then animation will be stored to file that is specified in this parameter.
|
359
|
|
|
|
360
|
|
|
"""
|
361
|
|
|
def init_frame():
|
362
|
|
|
self.__figure.clf()
|
363
|
|
|
self.__ax = self.__figure.add_subplot(1, 1, 1)
|
364
|
|
|
self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
|
365
|
|
|
|
366
|
|
|
for point in self.__directory.get_data():
|
367
|
|
|
self.__ax.plot(point[0], point[1], color='red', marker='.')
|
368
|
|
|
|
369
|
|
|
return frame_generation(0)
|
370
|
|
|
|
371
|
|
|
|
372
|
|
|
def frame_generation(index_iteration):
|
|
|
|
|
373
|
|
|
if self.__current_level < self.__directory.get_height():
|
374
|
|
|
block = self.__level_blocks[self.__current_block]
|
375
|
|
|
self.__draw_block(block)
|
376
|
|
|
self.__increment_block()
|
377
|
|
|
|
378
|
|
|
else:
|
379
|
|
|
if self.__special_frame == 0:
|
380
|
|
|
self.__draw_leaf_density()
|
381
|
|
|
|
382
|
|
|
elif self.__special_frame == 15:
|
383
|
|
|
self.__draw_clusters()
|
384
|
|
|
|
385
|
|
|
elif self.__special_frame == 30:
|
386
|
|
|
self.__figure.clf()
|
387
|
|
|
self.__ax = self.__figure.add_subplot(1, 1, 1)
|
388
|
|
|
self.__figure.suptitle("BANG algorithm", fontsize=18, fontweight='bold')
|
389
|
|
|
|
390
|
|
|
self.__draw_clusters()
|
391
|
|
|
|
392
|
|
|
self.__special_frame += 1
|
393
|
|
|
|
394
|
|
|
|
395
|
|
|
|
396
|
|
|
iterations = len(self.__directory) + 60
|
397
|
|
|
# print("Total number of iterations: %d" % iterations)
|
398
|
|
|
cluster_animation = animation.FuncAnimation(self.__figure, frame_generation, iterations,
|
399
|
|
|
interval=animation_velocity,
|
400
|
|
|
init_func=init_frame,
|
401
|
|
|
repeat_delay=5000)
|
402
|
|
|
|
403
|
|
|
if movie_filename is not None:
|
404
|
|
|
cluster_animation.save(movie_filename, writer = 'ffmpeg', fps = movie_fps, bitrate = 3500)
|
405
|
|
|
else:
|
406
|
|
|
plt.show()
|
407
|
|
|
|
408
|
|
|
|
409
|
|
|
|
410
|
|
|
class bang_directory:
|
411
|
|
|
"""!
|
412
|
|
|
@brief BANG directory stores BANG-blocks that represents grid in data space.
|
413
|
|
|
@details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide
|
414
|
|
|
a direct access to the leafs that should be analysed. Leafs cache data-points.
|
415
|
|
|
|
416
|
|
|
"""
|
417
|
|
|
def __init__(self, data, levels, density_threshold=0.0, **kwargs):
|
418
|
|
|
"""!
|
419
|
|
|
@brief Create BANG directory - basically tree structure with direct access to leafs.
|
420
|
|
|
|
421
|
|
|
@param[in] data (list): Input data that is clustered.
|
422
|
|
|
@param[in] levels (uint): Height of the blocks tree.
|
423
|
|
|
@param[in] density_threshold (double): The lowest level of density when contained data by bang-block is
|
424
|
|
|
considered as a noise and there is no need to split it till the last level.
|
425
|
|
|
@param[in] **kwargs: Arbitrary keyword arguments (available arguments: 'observe').
|
426
|
|
|
|
427
|
|
|
<b>Keyword Args:</b><br>
|
428
|
|
|
- observe (bool): If 'True' then blocks on each level are stored.
|
429
|
|
|
|
430
|
|
|
"""
|
431
|
|
|
self.__data = data
|
432
|
|
|
self.__levels = levels
|
433
|
|
|
self.__density_threshold = density_threshold
|
434
|
|
|
self.__leafs = []
|
435
|
|
|
self.__root = None
|
436
|
|
|
self.__level_blocks = []
|
437
|
|
|
self.__size = 0
|
438
|
|
|
self.__observe = kwargs.get('observe', True)
|
439
|
|
|
|
440
|
|
|
self.__create_directory()
|
441
|
|
|
|
442
|
|
|
|
443
|
|
|
def __len__(self):
|
444
|
|
|
"""!
|
445
|
|
|
@brief Returns amount of blocks that is stored in the directory
|
446
|
|
|
|
447
|
|
|
@return (uint) Amount of blocks in the BANG directory.
|
448
|
|
|
|
449
|
|
|
"""
|
450
|
|
|
return self.__size
|
451
|
|
|
|
452
|
|
|
|
453
|
|
|
def get_data(self):
|
454
|
|
|
"""!
|
455
|
|
|
@brief Return data that is stored in the directory.
|
456
|
|
|
|
457
|
|
|
@return (list) List of points that represents stored data.
|
458
|
|
|
|
459
|
|
|
"""
|
460
|
|
|
return self.__data
|
461
|
|
|
|
462
|
|
|
|
463
|
|
|
def get_leafs(self):
|
464
|
|
|
"""!
|
465
|
|
|
@brief Return leafs - the smallest blocks.
|
466
|
|
|
@details Some leafs can be bigger than others because splitting is not performed for blocks whose density is
|
467
|
|
|
less than threshold.
|
468
|
|
|
|
469
|
|
|
@return (list) List of blocks that are leafs of BANG directory.
|
470
|
|
|
|
471
|
|
|
"""
|
472
|
|
|
return self.__leafs
|
473
|
|
|
|
474
|
|
|
|
475
|
|
|
def get_level(self, level):
|
476
|
|
|
"""!
|
477
|
|
|
@brief Returns BANG blocks on the specific level.
|
478
|
|
|
|
479
|
|
|
@param[in] level (uint): Level of tree where BANG blocks are located.
|
480
|
|
|
|
481
|
|
|
@return (list) List of BANG blocks on the specific level.
|
482
|
|
|
|
483
|
|
|
"""
|
484
|
|
|
return self.__level_blocks[level]
|
485
|
|
|
|
486
|
|
|
|
487
|
|
|
def get_height(self):
|
488
|
|
|
"""!
|
489
|
|
|
@brief Returns height of BANG tree where blocks are stored.
|
490
|
|
|
|
491
|
|
|
@return (uint) Height of BANG tree.
|
492
|
|
|
|
493
|
|
|
"""
|
494
|
|
|
return len(self.__level_blocks)
|
495
|
|
|
|
496
|
|
|
|
497
|
|
|
def __create_directory(self):
|
498
|
|
|
"""!
|
499
|
|
|
@brief Create BANG directory as a tree with separate storage for leafs.
|
500
|
|
|
|
501
|
|
|
"""
|
502
|
|
|
|
503
|
|
|
min_corner, max_corner = data_corners(self.__data)
|
504
|
|
|
data_block = spatial_block(max_corner, min_corner)
|
505
|
|
|
|
506
|
|
|
cache_require = (self.__levels == 1)
|
507
|
|
|
self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
|
508
|
|
|
|
509
|
|
|
if cache_require:
|
510
|
|
|
self.__leafs.append(self.__root)
|
511
|
|
|
self.__store_level_blocks([self.__root])
|
512
|
|
|
else:
|
513
|
|
|
self.__build_directory_levels()
|
514
|
|
|
|
515
|
|
|
|
516
|
|
|
def __store_level_blocks(self, level_blocks):
|
517
|
|
|
"""!
|
518
|
|
|
@brief Store level blocks if observing is enabled.
|
519
|
|
|
|
520
|
|
|
@param[in] level_blocks (list): Created blocks on a new level.
|
521
|
|
|
|
522
|
|
|
"""
|
523
|
|
|
self.__size += len(level_blocks)
|
524
|
|
|
if self.__observe is True:
|
525
|
|
|
self.__level_blocks.append(level_blocks)
|
526
|
|
|
|
527
|
|
|
|
528
|
|
|
|
529
|
|
|
def __build_directory_levels(self):
|
530
|
|
|
"""!
|
531
|
|
|
@brief Build levels of direction if amount of level is greater than one.
|
532
|
|
|
|
533
|
|
|
"""
|
534
|
|
|
|
535
|
|
|
previous_level_blocks = [ self.__root ]
|
536
|
|
|
|
537
|
|
|
for level in range(1, self.__levels):
|
538
|
|
|
previous_level_blocks = self.__build_level(previous_level_blocks, level)
|
539
|
|
|
self.__store_level_blocks(previous_level_blocks)
|
540
|
|
|
|
541
|
|
|
self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
|
542
|
|
|
|
543
|
|
|
|
544
|
|
|
def __build_level(self, previous_level_blocks, level):
|
545
|
|
|
"""!
|
546
|
|
|
@brief Build new level of directory.
|
547
|
|
|
|
548
|
|
|
@param[in] previous_level_blocks (list): BANG-blocks on the previous level.
|
549
|
|
|
@param[in] level (uint): Level number that should be built.
|
550
|
|
|
|
551
|
|
|
@return (list) New block on the specified level.
|
552
|
|
|
|
553
|
|
|
"""
|
554
|
|
|
current_level_blocks = []
|
555
|
|
|
|
556
|
|
|
split_dimension = level % len(self.__data[0])
|
557
|
|
|
cache_require = (level == self.__levels - 1)
|
558
|
|
|
|
559
|
|
|
for block in previous_level_blocks:
|
560
|
|
|
self.__split_block(block, split_dimension, cache_require, current_level_blocks)
|
561
|
|
|
|
562
|
|
|
if cache_require:
|
563
|
|
|
self.__leafs += current_level_blocks
|
564
|
|
|
|
565
|
|
|
return current_level_blocks
|
566
|
|
|
|
567
|
|
|
|
568
|
|
|
def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
|
569
|
|
|
"""!
|
570
|
|
|
@brief Split specific block in specified dimension.
|
571
|
|
|
@details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
|
572
|
|
|
leafs.
|
573
|
|
|
|
574
|
|
|
@param[in] block (bang_block): BANG-block that should be split.
|
575
|
|
|
@param[in] split_dimension (uint): Dimension at which splitting should be performed.
|
576
|
|
|
@param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
|
577
|
|
|
@param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
|
578
|
|
|
|
579
|
|
|
"""
|
580
|
|
|
if block.get_density() <= self.__density_threshold:
|
581
|
|
|
self.__leafs.append(block)
|
582
|
|
|
|
583
|
|
|
else:
|
584
|
|
|
left, right = block.split(split_dimension, cache_require)
|
585
|
|
|
current_level_blocks.append(left)
|
586
|
|
|
current_level_blocks.append(right)
|
587
|
|
|
|
588
|
|
|
|
589
|
|
|
|
590
|
|
|
class spatial_block:
|
591
|
|
|
"""!
|
592
|
|
|
@brief Geometrical description of BANG block in data space.
|
593
|
|
|
@details Provides services related to spatial function and used by bang_block
|
594
|
|
|
|
595
|
|
|
@see bang_block
|
596
|
|
|
|
597
|
|
|
"""
|
598
|
|
|
|
599
|
|
|
def __init__(self, max_corner, min_corner):
|
600
|
|
|
"""!
|
601
|
|
|
@brief Creates spatial block in data space.
|
602
|
|
|
|
603
|
|
|
@param[in] max_corner (array_like): Maximum corner coordinates of the block.
|
604
|
|
|
@param[in] min_corner (array_like): Minimal corner coordinates of the block.
|
605
|
|
|
|
606
|
|
|
"""
|
607
|
|
|
self.__max_corner = max_corner
|
608
|
|
|
self.__min_corner = min_corner
|
609
|
|
|
self.__volume = self.__calculate_volume()
|
610
|
|
|
|
611
|
|
|
|
612
|
|
|
def __str__(self):
|
613
|
|
|
"""!
|
614
|
|
|
@brief Returns string block description.
|
615
|
|
|
|
616
|
|
|
@return String representation of the block.
|
617
|
|
|
|
618
|
|
|
"""
|
619
|
|
|
return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
|
620
|
|
|
|
621
|
|
|
|
622
|
|
|
def __contains__(self, point):
|
623
|
|
|
"""!
|
624
|
|
|
@brief Point is considered as contained if it lies in block (belong to it).
|
625
|
|
|
|
626
|
|
|
@return (bool) True if point is in block, otherwise False.
|
627
|
|
|
|
628
|
|
|
"""
|
629
|
|
|
for i in range(len(point)):
|
630
|
|
|
if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
|
631
|
|
|
return False
|
632
|
|
|
|
633
|
|
|
return True
|
634
|
|
|
|
635
|
|
|
|
636
|
|
|
def get_corners(self):
|
637
|
|
|
"""!
|
638
|
|
|
@brief Return spatial description of current block.
|
639
|
|
|
|
640
|
|
|
@return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
|
641
|
|
|
|
642
|
|
|
"""
|
643
|
|
|
return self.__max_corner, self.__min_corner
|
644
|
|
|
|
645
|
|
|
|
646
|
|
|
def get_volume(self):
|
647
|
|
|
"""!
|
648
|
|
|
@brief Returns volume of current block.
|
649
|
|
|
@details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
|
650
|
|
|
for 3D is volume of 3D figure, and for ND is volume of ND figure.
|
651
|
|
|
|
652
|
|
|
@return (double) Volume of current block.
|
653
|
|
|
|
654
|
|
|
"""
|
655
|
|
|
return self.__volume
|
656
|
|
|
|
657
|
|
|
|
658
|
|
|
def split(self, dimension):
|
659
|
|
|
"""!
|
660
|
|
|
@brief Split current block into two spatial blocks in specified dimension.
|
661
|
|
|
|
662
|
|
|
@param[in] dimension (uint): Dimension where current block should be split.
|
663
|
|
|
|
664
|
|
|
@return (tuple) Pair of new split blocks from current block.
|
665
|
|
|
|
666
|
|
|
"""
|
667
|
|
|
first_max_corner = self.__max_corner[:]
|
668
|
|
|
second_min_corner = self.__min_corner[:]
|
669
|
|
|
|
670
|
|
|
split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
|
671
|
|
|
|
672
|
|
|
first_max_corner[dimension] = split_border
|
673
|
|
|
second_min_corner[dimension] = split_border
|
674
|
|
|
|
675
|
|
|
return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
|
676
|
|
|
|
677
|
|
|
|
678
|
|
|
def is_neighbor(self, block):
|
679
|
|
|
"""!
|
680
|
|
|
@brief Performs calculation to identify whether specified block is neighbor of current block.
|
681
|
|
|
|
682
|
|
|
@param[in] block (spatial_block): Another block that is check whether it is neighbor.
|
683
|
|
|
|
684
|
|
|
@return (bool) True is blocks are neighbors, False otherwise.
|
685
|
|
|
|
686
|
|
|
"""
|
687
|
|
|
if block is not self:
|
688
|
|
|
block_max_corner, _ = block.get_corners()
|
689
|
|
|
dimension = len(block_max_corner)
|
690
|
|
|
neighborhood_score = self.__calculate_neighborhood(block_max_corner)
|
691
|
|
|
|
692
|
|
|
if neighborhood_score == dimension:
|
693
|
|
|
return True
|
694
|
|
|
|
695
|
|
|
return False
|
696
|
|
|
|
697
|
|
|
|
698
|
|
|
def __calculate_neighborhood(self, block_max_corner):
|
699
|
|
|
"""!
|
700
|
|
|
@brief Calculates neighborhood score that defined whether blocks are neighbors.
|
701
|
|
|
|
702
|
|
|
@param[in] block_max_corner (list): Maximum coordinates of other block.
|
703
|
|
|
|
704
|
|
|
@return (uint) Neighborhood score.
|
705
|
|
|
|
706
|
|
|
"""
|
707
|
|
|
dimension = len(block_max_corner)
|
708
|
|
|
|
709
|
|
|
length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
|
710
|
|
|
|
711
|
|
|
neighborhood_score = 0
|
712
|
|
|
for i in range(dimension):
|
713
|
|
|
diff = abs(block_max_corner[i] - self.__max_corner[i])
|
714
|
|
|
|
715
|
|
|
if diff <= length_edges[i] + length_edges[i] * 0.0001:
|
716
|
|
|
neighborhood_score += 1
|
717
|
|
|
|
718
|
|
|
return neighborhood_score
|
719
|
|
|
|
720
|
|
|
|
721
|
|
|
def __calculate_volume(self):
|
722
|
|
|
"""!
|
723
|
|
|
@brief Calculates volume of current spatial block.
|
724
|
|
|
|
725
|
|
|
@return (double) Volume of current spatial block.
|
726
|
|
|
|
727
|
|
|
"""
|
728
|
|
|
volume = self.__max_corner[0] - self.__min_corner[0]
|
729
|
|
|
for i in range(1, len(self.__max_corner)):
|
730
|
|
|
volume *= self.__max_corner[i] - self.__min_corner[i]
|
731
|
|
|
|
732
|
|
|
return volume
|
733
|
|
|
|
734
|
|
|
|
735
|
|
|
|
736
|
|
|
class bang_block:
|
737
|
|
|
"""!
|
738
|
|
|
@brief BANG-block that represent spatial region in data space.
|
739
|
|
|
|
740
|
|
|
"""
|
741
|
|
|
def __init__(self, data, region, level, space_block, cache_points=False):
|
742
|
|
|
"""!
|
743
|
|
|
@brief Create BANG-block.
|
744
|
|
|
|
745
|
|
|
@param[in] data (list): List of points that are processed.
|
746
|
|
|
@param[in] region (uint): Region number - unique value on a level.
|
747
|
|
|
@param[in] level (uint): Level number where block is created.
|
748
|
|
|
@param[in] space_block (spatial_block): Spatial block description in data space.
|
749
|
|
|
@param[in] cache_points (bool): if True then points are stored in memory (used for leaf blocks).
|
750
|
|
|
|
751
|
|
|
"""
|
752
|
|
|
self.__data = data
|
753
|
|
|
self.__region_number = region
|
754
|
|
|
self.__level = level
|
755
|
|
|
self.__spatial_block = space_block
|
756
|
|
|
self.__cache_points = cache_points
|
757
|
|
|
|
758
|
|
|
self.__cluster = None
|
759
|
|
|
self.__points = None
|
760
|
|
|
self.__density = self.__calculate_density()
|
761
|
|
|
|
762
|
|
|
|
763
|
|
|
def __str__(self):
|
764
|
|
|
"""!
|
765
|
|
|
@brief Returns string representation of BANG-block using region number and level where block is located.
|
766
|
|
|
|
767
|
|
|
"""
|
768
|
|
|
return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
|
769
|
|
|
|
770
|
|
|
|
771
|
|
|
def get_region(self):
|
772
|
|
|
"""!
|
773
|
|
|
@brief Returns region number of BANG-block.
|
774
|
|
|
@details Region number is unique on among region numbers on a directory level. Pair of region number and level
|
775
|
|
|
is unique for all directory.
|
776
|
|
|
|
777
|
|
|
@return (uint) Region number.
|
778
|
|
|
|
779
|
|
|
"""
|
780
|
|
|
return self.__region_number
|
781
|
|
|
|
782
|
|
|
|
783
|
|
|
def get_density(self):
|
784
|
|
|
"""!
|
785
|
|
|
@brief Returns density of the BANG-block.
|
786
|
|
|
|
787
|
|
|
@return (double) BANG-block density.
|
788
|
|
|
|
789
|
|
|
"""
|
790
|
|
|
return self.__density
|
791
|
|
|
|
792
|
|
|
|
793
|
|
|
def get_cluster(self):
|
794
|
|
|
"""!
|
795
|
|
|
@brief Return index of cluster to which the BANG-block belongs to.
|
796
|
|
|
@details Index of cluster may have None value if the block was not assigned to any cluster.
|
797
|
|
|
|
798
|
|
|
@return (uint) Index of cluster or None if the block does not belong to any cluster.
|
799
|
|
|
|
800
|
|
|
"""
|
801
|
|
|
return self.__cluster
|
802
|
|
|
|
803
|
|
|
|
804
|
|
|
def get_spatial_block(self):
|
805
|
|
|
"""!
|
806
|
|
|
@brief Return spatial block - BANG-block description in data space.
|
807
|
|
|
|
808
|
|
|
@return (spatial_block) Spatial block of the BANG-block.
|
809
|
|
|
|
810
|
|
|
"""
|
811
|
|
|
return self.__spatial_block
|
812
|
|
|
|
813
|
|
|
|
814
|
|
|
def get_points(self):
|
815
|
|
|
"""!
|
816
|
|
|
@brief Return points that covers by the BANG-block.
|
817
|
|
|
|
818
|
|
|
@return (list) List of point indexes that are covered by the block.
|
819
|
|
|
|
820
|
|
|
"""
|
821
|
|
|
if self.__points is None:
|
822
|
|
|
self.__cache_covered_data()
|
823
|
|
|
|
824
|
|
|
return self.__points
|
825
|
|
|
|
826
|
|
|
|
827
|
|
|
def set_cluster(self, index):
|
828
|
|
|
"""!
|
829
|
|
|
@brief Assign cluster to the BANG-block by index.
|
830
|
|
|
|
831
|
|
|
@param[in] index (uint): Index cluster that is assigned to BANG-block.
|
832
|
|
|
|
833
|
|
|
"""
|
834
|
|
|
self.__cluster = index
|
835
|
|
|
|
836
|
|
|
|
837
|
|
|
def is_neighbor(self, block):
|
838
|
|
|
"""!
|
839
|
|
|
@brief Performs calculation to check whether specified block is neighbor to the current.
|
840
|
|
|
|
841
|
|
|
@param[in] block (bang_block): Other BANG-block that should be checked for neighborhood.
|
842
|
|
|
|
843
|
|
|
@return (bool) True if blocks are neighbors, False if blocks are not neighbors.
|
844
|
|
|
|
845
|
|
|
"""
|
846
|
|
|
return self.get_spatial_block().is_neighbor(block.get_spatial_block())
|
847
|
|
|
|
848
|
|
|
|
849
|
|
|
def split(self, split_dimension, cache_points):
|
850
|
|
|
"""!
|
851
|
|
|
@brief Split BANG-block into two new blocks in specified dimension.
|
852
|
|
|
|
853
|
|
|
@param[in] split_dimension (uint): Dimension where block should be split.
|
854
|
|
|
@param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks.
|
855
|
|
|
|
856
|
|
|
@return (tuple) Pair of BANG-block that were formed from the current.
|
857
|
|
|
|
858
|
|
|
"""
|
859
|
|
|
left_region_number = self.__region_number
|
860
|
|
|
right_region_number = self.__region_number + 2 ** self.__level
|
861
|
|
|
|
862
|
|
|
first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
|
863
|
|
|
|
864
|
|
|
left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
|
865
|
|
|
right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
|
866
|
|
|
|
867
|
|
|
return left, right
|
868
|
|
|
|
869
|
|
|
|
870
|
|
|
def __calculate_density(self):
|
871
|
|
|
"""!
|
872
|
|
|
@brief Calculates BANG-block density.
|
873
|
|
|
|
874
|
|
|
@return (double) BANG-block density.
|
875
|
|
|
|
876
|
|
|
"""
|
877
|
|
|
return self.__get_amount_points() / self.__spatial_block.get_volume()
|
878
|
|
|
|
879
|
|
|
|
880
|
|
|
def __get_amount_points(self):
|
881
|
|
|
"""!
|
882
|
|
|
@brief Count covered points by the BANG-block and if cache is enable then covered points are stored.
|
883
|
|
|
|
884
|
|
|
@return (uint) Amount of covered points.
|
885
|
|
|
|
886
|
|
|
"""
|
887
|
|
|
amount = 0
|
888
|
|
|
for index in range(len(self.__data)):
|
889
|
|
|
if self.__data[index] in self.__spatial_block:
|
890
|
|
|
self.__cache_point(index)
|
891
|
|
|
amount += 1
|
892
|
|
|
|
893
|
|
|
return amount
|
894
|
|
|
|
895
|
|
|
|
896
|
|
|
def __cache_covered_data(self):
|
897
|
|
|
"""!
|
898
|
|
|
@brief Cache covered data.
|
899
|
|
|
|
900
|
|
|
"""
|
901
|
|
|
self.__cache_points = True
|
902
|
|
|
self.__points = []
|
903
|
|
|
|
904
|
|
|
for index_point in range(len(self.__data)):
|
905
|
|
|
if self.__data[index_point] in self.__spatial_block:
|
906
|
|
|
self.__cache_point(index_point)
|
907
|
|
|
|
908
|
|
|
|
909
|
|
|
def __cache_point(self, index):
|
910
|
|
|
"""!
|
911
|
|
|
@brief Store index points.
|
912
|
|
|
|
913
|
|
|
@param[in] index (uint): Index point that should be stored.
|
914
|
|
|
|
915
|
|
|
"""
|
916
|
|
|
if self.__cache_points:
|
917
|
|
|
if self.__points is None:
|
918
|
|
|
self.__points = []
|
919
|
|
|
|
920
|
|
|
self.__points.append(index)
|
921
|
|
|
|
922
|
|
|
|
923
|
|
|
|
924
|
|
|
class bang:
|
925
|
|
|
"""!
|
926
|
|
|
@brief Class implements BANG grid based clustering algorithm.
|
927
|
|
|
@details BANG clustering algorithms uses a multidimensional grid structure to organize the value space surrounding
|
928
|
|
|
the pattern values. The patterns are grouped into blocks and clustered with respect to the blocks by
|
929
|
|
|
a topological neighbor search algorithm @cite inproceedings::bang::1.
|
930
|
|
|
|
931
|
|
|
Code example of BANG usage:
|
932
|
|
|
@code
|
933
|
|
|
from pyclustering.cluster.bang import bang, bang_visualizer
|
934
|
|
|
from pyclustering.utils import read_sample
|
935
|
|
|
from pyclustering.samples.definitions import FCPS_SAMPLES
|
936
|
|
|
|
937
|
|
|
# Read data three dimensional data.
|
938
|
|
|
data = read_sample(FCPS_SAMPLES.SAMPLE_CHAINLINK)
|
939
|
|
|
|
940
|
|
|
# Prepare algorithm's parameters.
|
941
|
|
|
levels = 11
|
942
|
|
|
|
943
|
|
|
# Create instance of BANG algorithm.
|
944
|
|
|
bang_instance = bang(data, levels)
|
945
|
|
|
bang_instance.process()
|
946
|
|
|
|
947
|
|
|
# Obtain clustering results.
|
948
|
|
|
clusters = bang_instance.get_clusters()
|
949
|
|
|
noise = bang_instance.get_noise()
|
950
|
|
|
directory = bang_instance.get_directory()
|
951
|
|
|
dendrogram = bang_instance.get_dendrogram()
|
952
|
|
|
|
953
|
|
|
# Visualize BANG clustering results.
|
954
|
|
|
bang_visualizer.show_blocks(directory)
|
955
|
|
|
bang_visualizer.show_dendrogram(dendrogram)
|
956
|
|
|
bang_visualizer.show_clusters(data, clusters, noise)
|
957
|
|
|
@endcode
|
958
|
|
|
|
959
|
|
|
There is visualization of BANG-clustering of three-dimensional data 'chainlink'. BANG-blocks that were formed during
|
960
|
|
|
processing are shown on following figure. The darkest color means highest density, blocks that does not cover points
|
961
|
|
|
are transparent:
|
962
|
|
|
@image html bang_blocks_chainlink.png "Fig. 1. BANG-blocks that cover input data."
|
963
|
|
|
|
964
|
|
|
Here is obtained dendrogram that can be used for further analysis to improve clustering results:
|
965
|
|
|
@image html bang_dendrogram_chainlink.png "Fig. 2. BANG dendrogram where the X-axis contains BANG-blocks, the Y-axis contains density."
|
966
|
|
|
|
967
|
|
|
BANG clustering result of 'chainlink' data:
|
968
|
|
|
@image html bang_clustering_chainlink.png "Fig. 3. BANG clustering result. Data: 'chainlink'."
|
969
|
|
|
|
970
|
|
|
"""
|
971
|
|
|
|
972
|
|
|
def __init__(self, data, levels, density_threshold=0.0, ccore=False):
|
973
|
|
|
"""!
|
974
|
|
|
@brief Create BANG clustering algorithm.
|
975
|
|
|
|
976
|
|
|
@param[in] data (list): Input data (list of points) that should be clustered.
|
977
|
|
|
@param[in] levels (uint): Amount of levels in tree (how many times block should be split).
|
978
|
|
|
@param[in] density_threshold (double): If block density is smaller than this value then contained data by this
|
979
|
|
|
block is considered as a noise.
|
980
|
|
|
@param[in] ccore (bool): Reserved positional argument - not used yet.
|
981
|
|
|
|
982
|
|
|
"""
|
983
|
|
|
self.__data = data
|
984
|
|
|
self.__levels = levels
|
985
|
|
|
self.__directory = None
|
986
|
|
|
self.__clusters = []
|
987
|
|
|
self.__noise = []
|
988
|
|
|
self.__cluster_blocks = []
|
989
|
|
|
self.__dendrogram = []
|
990
|
|
|
self.__density_threshold = density_threshold
|
991
|
|
|
self.__ccore = ccore
|
992
|
|
|
|
993
|
|
|
self.__validate_arguments()
|
994
|
|
|
|
995
|
|
|
|
996
|
|
|
def process(self):
|
997
|
|
|
"""!
|
998
|
|
|
@brief Performs clustering process in line with rules of BANG clustering algorithm.
|
999
|
|
|
|
1000
|
|
|
@see get_clusters()
|
1001
|
|
|
@see get_noise()
|
1002
|
|
|
@see get_directory()
|
1003
|
|
|
@see get_dendrogram()
|
1004
|
|
|
|
1005
|
|
|
"""
|
1006
|
|
|
self.__directory = bang_directory(self.__data, self.__levels, self.__density_threshold)
|
1007
|
|
|
self.__allocate_clusters()
|
1008
|
|
|
|
1009
|
|
|
|
1010
|
|
|
def get_clusters(self):
|
1011
|
|
|
"""!
|
1012
|
|
|
@brief Returns allocated clusters.
|
1013
|
|
|
|
1014
|
|
|
@remark Allocated clusters are returned only after data processing (method process()). Otherwise empty list is returned.
|
1015
|
|
|
|
1016
|
|
|
@return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
|
1017
|
|
|
|
1018
|
|
|
@see process()
|
1019
|
|
|
@see get_noise()
|
1020
|
|
|
|
1021
|
|
|
"""
|
1022
|
|
|
return self.__clusters
|
1023
|
|
|
|
1024
|
|
|
|
1025
|
|
|
def get_noise(self):
|
1026
|
|
|
"""!
|
1027
|
|
|
@brief Returns allocated noise.
|
1028
|
|
|
|
1029
|
|
|
@remark Allocated noise is returned only after data processing (method process()). Otherwise empty list is returned.
|
1030
|
|
|
|
1031
|
|
|
@return (list) List of indexes that are marked as a noise.
|
1032
|
|
|
|
1033
|
|
|
@see process()
|
1034
|
|
|
@see get_clusters()
|
1035
|
|
|
|
1036
|
|
|
"""
|
1037
|
|
|
return self.__noise
|
1038
|
|
|
|
1039
|
|
|
|
1040
|
|
|
def get_directory(self):
|
1041
|
|
|
"""!
|
1042
|
|
|
@brief Returns grid directory that describes grid of the processed data.
|
1043
|
|
|
|
1044
|
|
|
@remark Grid directory is returned only after data processing (method process()). Otherwise None value is returned.
|
1045
|
|
|
|
1046
|
|
|
@return (bang_directory) BANG directory that describes grid of process data.
|
1047
|
|
|
|
1048
|
|
|
@see process()
|
1049
|
|
|
|
1050
|
|
|
"""
|
1051
|
|
|
return self.__directory
|
1052
|
|
|
|
1053
|
|
|
|
1054
|
|
|
def get_dendrogram(self):
|
1055
|
|
|
"""!
|
1056
|
|
|
@brief Returns dendrogram of clusters.
|
1057
|
|
|
@details Dendrogram is created in following way: the density indices of all regions are calculated and sorted
|
1058
|
|
|
in decreasing order for each cluster during clustering process.
|
1059
|
|
|
|
1060
|
|
|
@remark Dendrogram is returned only after data processing (method process()). Otherwise empty list is returned.
|
1061
|
|
|
|
1062
|
|
|
"""
|
1063
|
|
|
return self.__dendrogram
|
1064
|
|
|
|
1065
|
|
|
|
1066
|
|
|
def get_cluster_encoding(self):
|
1067
|
|
|
"""!
|
1068
|
|
|
@brief Returns clustering result representation type that indicate how clusters are encoded.
|
1069
|
|
|
|
1070
|
|
|
@return (type_encoding) Clustering result representation.
|
1071
|
|
|
|
1072
|
|
|
@see get_clusters()
|
1073
|
|
|
|
1074
|
|
|
"""
|
1075
|
|
|
|
1076
|
|
|
return type_encoding.CLUSTER_INDEX_LIST_SEPARATION
|
1077
|
|
|
|
1078
|
|
|
|
1079
|
|
|
def __validate_arguments(self):
|
1080
|
|
|
"""!
|
1081
|
|
|
@brief Check input arguments of BANG algorithm and if one of them is not correct then appropriate exception
|
1082
|
|
|
is thrown.
|
1083
|
|
|
|
1084
|
|
|
"""
|
1085
|
|
|
if self.__levels <= 0:
|
1086
|
|
|
raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % self.__levels)
|
1087
|
|
|
|
1088
|
|
|
if len(self.__data) == 0:
|
1089
|
|
|
raise ValueError("Empty input data. Data should contain at least one point.")
|
1090
|
|
|
|
1091
|
|
|
if self.__density_threshold < 0:
|
1092
|
|
|
raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
|
1093
|
|
|
|
1094
|
|
|
|
1095
|
|
|
def __allocate_clusters(self):
|
1096
|
|
|
"""!
|
1097
|
|
|
@brief Performs cluster allocation using leafs of tree in BANG directory (the smallest cells).
|
1098
|
|
|
|
1099
|
|
|
"""
|
1100
|
|
|
leaf_blocks = self.__directory.get_leafs()
|
1101
|
|
|
unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
|
1102
|
|
|
|
1103
|
|
|
current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
|
1104
|
|
|
cluster_index = 0
|
1105
|
|
|
|
1106
|
|
|
while current_block is not None:
|
1107
|
|
|
if current_block.get_density() <= self.__density_threshold:
|
1108
|
|
|
break
|
1109
|
|
|
|
1110
|
|
|
self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
|
1111
|
|
|
|
1112
|
|
|
current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
|
1113
|
|
|
cluster_index += 1
|
1114
|
|
|
|
1115
|
|
|
self.__store_clustering_results(cluster_index, leaf_blocks)
|
1116
|
|
|
|
1117
|
|
|
|
1118
|
|
|
def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
|
1119
|
|
|
"""!
|
1120
|
|
|
@brief Expand cluster from specific block that is considered as a central block.
|
1121
|
|
|
|
1122
|
|
|
@param[in] block (bang_block): Block that is considered as a central block for cluster.
|
1123
|
|
|
@param[in] cluster_index (uint): Index of cluster that is assigned to blocks that forms new cluster.
|
1124
|
|
|
@param[in] leaf_blocks (list): Leaf BANG-blocks that are considered during cluster formation.
|
1125
|
|
|
@param[in] unhandled_block_indexes (set): Set of candidates (BANG block indexes) to become a cluster member. The
|
1126
|
|
|
parameter helps to reduce traversing among BANG-block providing only restricted set of block that
|
1127
|
|
|
should be considered.
|
1128
|
|
|
|
1129
|
|
|
"""
|
1130
|
|
|
|
1131
|
|
|
block.set_cluster(cluster_index)
|
1132
|
|
|
self.__update_cluster_dendrogram(cluster_index, [block])
|
1133
|
|
|
|
1134
|
|
|
neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
|
1135
|
|
|
self.__update_cluster_dendrogram(cluster_index, neighbors)
|
1136
|
|
|
|
1137
|
|
|
for neighbor in neighbors:
|
1138
|
|
|
neighbor.set_cluster(cluster_index)
|
1139
|
|
|
neighbor_neighbors = self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
|
1140
|
|
|
self.__update_cluster_dendrogram(cluster_index, neighbor_neighbors)
|
1141
|
|
|
|
1142
|
|
|
neighbors += neighbor_neighbors
|
1143
|
|
|
|
1144
|
|
|
|
1145
|
|
|
def __store_clustering_results(self, amount_clusters, leaf_blocks):
|
1146
|
|
|
"""!
|
1147
|
|
|
@brief Stores clustering results in a convenient way.
|
1148
|
|
|
|
1149
|
|
|
@param[in] amount_clusters (uint): Amount of cluster that was allocated during processing.
|
1150
|
|
|
@param[in] leaf_blocks (list): Leaf BANG-blocks (the smallest cells).
|
1151
|
|
|
|
1152
|
|
|
"""
|
1153
|
|
|
self.__clusters = [[] for _ in range(amount_clusters)]
|
1154
|
|
|
for block in leaf_blocks:
|
1155
|
|
|
index = block.get_cluster()
|
1156
|
|
|
|
1157
|
|
|
if index is not None:
|
1158
|
|
|
self.__clusters[index] += block.get_points()
|
1159
|
|
|
else:
|
1160
|
|
|
self.__noise += block.get_points()
|
1161
|
|
|
|
1162
|
|
|
self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
|
1163
|
|
|
self.__noise = list(set(self.__noise))
|
1164
|
|
|
|
1165
|
|
|
|
1166
|
|
|
def __find_block_center(self, level_blocks, unhandled_block_indexes):
|
1167
|
|
|
"""!
|
1168
|
|
|
@brief Search block that is cluster center for new cluster.
|
1169
|
|
|
|
1170
|
|
|
@return (bang_block) Central block for new cluster, if cluster is not found then None value is returned.
|
1171
|
|
|
|
1172
|
|
|
"""
|
1173
|
|
|
for i in reversed(range(len(level_blocks))):
|
1174
|
|
|
if level_blocks[i].get_density() <= self.__density_threshold:
|
1175
|
|
|
return None
|
1176
|
|
|
|
1177
|
|
|
if level_blocks[i].get_cluster() is None:
|
1178
|
|
|
unhandled_block_indexes.remove(i)
|
1179
|
|
|
return level_blocks[i]
|
1180
|
|
|
|
1181
|
|
|
return None
|
1182
|
|
|
|
1183
|
|
|
|
1184
|
|
|
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
|
1185
|
|
|
"""!
|
1186
|
|
|
@brief Search block neighbors that are parts of new clusters (density is greater than threshold and that are
|
1187
|
|
|
not cluster members yet), other neighbors are ignored.
|
1188
|
|
|
|
1189
|
|
|
@param[in] block (bang_block): BANG-block for which neighbors should be found (which can be part of cluster).
|
1190
|
|
|
@param[in] level_blocks (list): BANG-blocks on specific level.
|
1191
|
|
|
@param[in] unhandled_block_indexes (set): Blocks that have not been processed yet.
|
1192
|
|
|
|
1193
|
|
|
@return (list) Block neighbors that can become part of cluster.
|
1194
|
|
|
|
1195
|
|
|
"""
|
1196
|
|
|
neighbors = []
|
1197
|
|
|
|
1198
|
|
|
handled_block_indexes = []
|
1199
|
|
|
for unhandled_index in unhandled_block_indexes:
|
1200
|
|
|
if block.is_neighbor(level_blocks[unhandled_index]):
|
1201
|
|
|
handled_block_indexes.append(unhandled_index)
|
1202
|
|
|
neighbors.append(level_blocks[unhandled_index])
|
1203
|
|
|
|
1204
|
|
|
# Maximum number of neighbors is eight
|
1205
|
|
|
if len(neighbors) == 8:
|
1206
|
|
|
break
|
1207
|
|
|
|
1208
|
|
|
for handled_index in handled_block_indexes:
|
1209
|
|
|
unhandled_block_indexes.remove(handled_index)
|
1210
|
|
|
|
1211
|
|
|
return neighbors
|
1212
|
|
|
|
1213
|
|
|
|
1214
|
|
|
def __update_cluster_dendrogram(self, index_cluster, blocks):
|
1215
|
|
|
"""!
|
1216
|
|
|
@brief Append clustered blocks to dendrogram.
|
1217
|
|
|
|
1218
|
|
|
@param[in] index_cluster (uint): Cluster index that was assigned to blocks.
|
1219
|
|
|
@param[in] blocks (list): Blocks that were clustered.
|
1220
|
|
|
|
1221
|
|
|
"""
|
1222
|
|
|
if len(self.__dendrogram) <= index_cluster:
|
1223
|
|
|
self.__dendrogram.append([])
|
1224
|
|
|
|
1225
|
|
|
blocks = sorted(blocks, key=lambda block: block.get_density(), reverse=True)
|
1226
|
|
|
self.__dendrogram[index_cluster] += blocks
|
1227
|
|
|
|
1228
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.