1
|
|
|
"""!
|
2
|
|
|
|
3
|
|
|
@brief Cluster analysis algorithm: BANG.
|
4
|
|
|
@details Implementation based on paper @cite inproceedings::bang::1.
|
5
|
|
|
|
6
|
|
|
@authors Andrei Novikov ([email protected])
|
7
|
|
|
@date 2014-2018
|
8
|
|
|
@copyright GNU Public License
|
9
|
|
|
|
10
|
|
|
@cond GNU_PUBLIC_LICENSE
|
11
|
|
|
PyClustering is free software: you can redistribute it and/or modify
|
12
|
|
|
it under the terms of the GNU General Public License as published by
|
13
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
14
|
|
|
(at your option) any later version.
|
15
|
|
|
|
16
|
|
|
PyClustering is distributed in the hope that it will be useful,
|
17
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
18
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
19
|
|
|
GNU General Public License for more details.
|
20
|
|
|
|
21
|
|
|
You should have received a copy of the GNU General Public License
|
22
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
23
|
|
|
@endcond
|
24
|
|
|
|
25
|
|
|
"""
|
26
|
|
|
|
27
|
|
|
|
28
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
29
|
|
|
import matplotlib.patches as patches
|
|
|
|
|
30
|
|
|
|
31
|
|
|
from pyclustering.utils import data_corners
|
32
|
|
|
|
33
|
|
|
from pyclustering.cluster import cluster_visualizer
|
34
|
|
|
|
35
|
|
|
|
36
|
|
|
# TODO: Store blocks in block directory and control them via directory
|
37
|
|
|
|
38
|
|
|
class bang_visualizer:
|
39
|
|
|
@staticmethod
|
40
|
|
|
def show_level_blocks(data, level_blocks):
|
41
|
|
|
visualizer = cluster_visualizer()
|
42
|
|
|
visualizer.append_cluster(data)
|
43
|
|
|
|
44
|
|
|
figure = visualizer.show(display=False)
|
45
|
|
|
|
46
|
|
|
bang_visualizer.__draw_blocks(figure, 0, level_blocks)
|
47
|
|
|
plt.show();
|
48
|
|
|
|
49
|
|
|
|
50
|
|
|
@staticmethod
|
51
|
|
|
def __draw_blocks(figure, offset, level_blocks):
|
52
|
|
|
ax = figure.get_axes()[offset];
|
53
|
|
|
ax.grid(False)
|
54
|
|
|
|
55
|
|
|
for block in level_blocks:
|
56
|
|
|
bang_visualizer.__draw_block(ax, block)
|
57
|
|
|
|
58
|
|
|
|
59
|
|
|
@staticmethod
|
60
|
|
|
def __draw_block(ax, block):
|
61
|
|
|
max_corner, min_corner = block.get_corners()
|
62
|
|
|
belong_cluster = block.get_cluster() is not None
|
63
|
|
|
|
64
|
|
|
if len(max_corner) == 2:
|
65
|
|
|
rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
|
66
|
|
|
fill=belong_cluster,
|
67
|
|
|
alpha=0.5)
|
68
|
|
|
ax.add_patch(rect)
|
69
|
|
|
|
70
|
|
|
else:
|
71
|
|
|
raise ValueError("Impossible to display blocks on non-2D dimensional data.")
|
72
|
|
|
|
73
|
|
|
|
74
|
|
|
class bang_block:
|
75
|
|
|
def __init__(self, data, dimension, region, level, max_corner, min_corner, cache_points=False):
|
76
|
|
|
self.__region_number = region
|
77
|
|
|
self.__level = level
|
78
|
|
|
self.__data = data
|
79
|
|
|
self.__dimension = dimension
|
80
|
|
|
self.__max_corner = max_corner
|
81
|
|
|
self.__min_corner = min_corner
|
82
|
|
|
self.__cache_points = cache_points
|
83
|
|
|
self.__cluster = None
|
84
|
|
|
self.__points = None
|
85
|
|
|
self.__density = self.__calculate_density()
|
86
|
|
|
|
87
|
|
|
|
88
|
|
|
def __str__(self):
|
89
|
|
|
return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
|
90
|
|
|
|
91
|
|
|
|
92
|
|
|
def get_region(self):
|
93
|
|
|
return self.__region_number
|
94
|
|
|
|
95
|
|
|
|
96
|
|
|
def get_level(self):
|
97
|
|
|
return self.__level
|
98
|
|
|
|
99
|
|
|
|
100
|
|
|
def get_density(self):
|
101
|
|
|
return self.__density
|
102
|
|
|
|
103
|
|
|
|
104
|
|
|
def get_cluster(self):
|
105
|
|
|
return self.__cluster
|
106
|
|
|
|
107
|
|
|
|
108
|
|
|
def get_corners(self):
|
109
|
|
|
return self.__max_corner, self.__min_corner
|
110
|
|
|
|
111
|
|
|
|
112
|
|
|
def get_points(self):
|
113
|
|
|
if self.__points is not None:
|
114
|
|
|
return self.__points
|
115
|
|
|
|
116
|
|
|
return [index for index in range(len(self.__data)) if self.contained(self.__data[index])]
|
117
|
|
|
|
118
|
|
|
|
119
|
|
|
def set_cluster(self, index):
|
120
|
|
|
self.__cluster = index
|
121
|
|
|
|
122
|
|
|
|
123
|
|
|
def is_neighbor(self, block):
|
124
|
|
|
if block is self:
|
125
|
|
|
return False;
|
126
|
|
|
|
127
|
|
|
block_max_corner, block_min_corner = block.get_corners()
|
128
|
|
|
for i in range(len(self.__max_corner)):
|
129
|
|
|
if self.__max_corner[i] == block_min_corner[i]:
|
130
|
|
|
return True
|
131
|
|
|
|
132
|
|
|
if self.__min_corner[i] == block_max_corner[i]:
|
133
|
|
|
return True
|
134
|
|
|
|
135
|
|
|
return False
|
136
|
|
|
|
137
|
|
|
|
138
|
|
|
def split(self, cache_points):
|
139
|
|
|
left_region_number = self.__region_number
|
140
|
|
|
right_region_number = self.__region_number + 2 ** self.__level
|
141
|
|
|
|
142
|
|
|
dimension = self.__dimension + 1
|
143
|
|
|
if dimension >= len(self.__data[0]):
|
144
|
|
|
dimension = 0
|
145
|
|
|
|
146
|
|
|
first_max_corner = self.__max_corner[:]
|
147
|
|
|
first_min_corner = self.__min_corner[:]
|
148
|
|
|
second_max_corner = self.__max_corner[:]
|
149
|
|
|
second_min_corner = self.__min_corner[:]
|
150
|
|
|
|
151
|
|
|
split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
|
152
|
|
|
first_max_corner[dimension] = split_border
|
153
|
|
|
second_min_corner[dimension] = split_border
|
154
|
|
|
|
155
|
|
|
left = bang_block(self.__data, dimension, left_region_number, self.__level + 1, first_max_corner, first_min_corner, cache_points)
|
156
|
|
|
right = bang_block(self.__data, dimension, right_region_number, self.__level + 1, second_max_corner, second_min_corner, cache_points)
|
157
|
|
|
|
158
|
|
|
return left, right
|
159
|
|
|
|
160
|
|
|
|
161
|
|
|
def contained(self, point):
|
162
|
|
|
for i in range(len(point)):
|
163
|
|
|
if point[i] <= self.__min_corner[i] or point[i] > self.__max_corner[i]:
|
164
|
|
|
return False
|
165
|
|
|
|
166
|
|
|
return True
|
167
|
|
|
|
168
|
|
|
|
169
|
|
|
def __calculate_density(self):
|
170
|
|
|
volume = self.__max_corner[0] - self.__min_corner[0]
|
171
|
|
|
for i in range(1, len(self.__max_corner)):
|
172
|
|
|
volume *= self.__max_corner[i] - self.__min_corner[i]
|
173
|
|
|
|
174
|
|
|
amount = self.__get_amount_points()
|
175
|
|
|
return amount / volume
|
176
|
|
|
|
177
|
|
|
|
178
|
|
|
def __get_amount_points(self):
|
179
|
|
|
amount = 0
|
180
|
|
|
for index in range(len(self.__data)):
|
181
|
|
|
if self.contained(self.__data[index]):
|
182
|
|
|
amount += 1
|
183
|
|
|
|
184
|
|
|
if self.__cache_points:
|
185
|
|
|
if self.__points is None:
|
186
|
|
|
self.__points = []
|
187
|
|
|
|
188
|
|
|
self.__points.append(index)
|
189
|
|
|
|
190
|
|
|
return amount
|
191
|
|
|
|
192
|
|
|
|
193
|
|
|
|
194
|
|
|
class bang:
|
195
|
|
|
def __init__(self, data, levels, density_threshold = 0.0):
|
196
|
|
|
self.__data = data
|
197
|
|
|
self.__levels = levels
|
198
|
|
|
self.__blocks = []
|
199
|
|
|
self.__clusters = []
|
200
|
|
|
self.__noise = []
|
201
|
|
|
self.__cluster_blocks = []
|
202
|
|
|
self.__density_threshold = density_threshold
|
203
|
|
|
|
204
|
|
|
|
205
|
|
|
def process(self):
|
206
|
|
|
self.__validate_arguments()
|
207
|
|
|
|
208
|
|
|
self.__build_blocks()
|
209
|
|
|
self.__allocate_clusters()
|
210
|
|
|
|
211
|
|
|
|
212
|
|
|
def get_clusters(self):
|
213
|
|
|
return self.__clusters
|
214
|
|
|
|
215
|
|
|
|
216
|
|
|
def get_noise(self):
|
217
|
|
|
return self.__noise
|
218
|
|
|
|
219
|
|
|
|
220
|
|
|
def get_level_blocks(self, level=-1):
|
221
|
|
|
return self.__blocks[level]
|
222
|
|
|
|
223
|
|
|
|
224
|
|
|
def __validate_arguments(self):
|
225
|
|
|
if self.__levels <= 0:
|
226
|
|
|
raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % (self.__levels))
|
227
|
|
|
|
228
|
|
|
if len(self.__data) == 0:
|
229
|
|
|
raise ValueError("Empty input data. Data should contain at least one point.")
|
230
|
|
|
|
231
|
|
|
if self.__density_threshold < 0:
|
232
|
|
|
raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % (self.__density_threshold))
|
233
|
|
|
|
234
|
|
|
|
235
|
|
|
def __build_blocks(self):
|
236
|
|
|
min_corner, max_corner = data_corners(self.__data)
|
237
|
|
|
root_block = bang_block(self.__data, 0, 0, 0, max_corner, min_corner, self.__levels == 1)
|
238
|
|
|
|
239
|
|
|
level_blocks = [root_block]
|
240
|
|
|
self.__blocks.append(level_blocks)
|
241
|
|
|
|
242
|
|
|
for level in range(1, self.__levels):
|
243
|
|
|
cache_points = (level == self.__levels - 1)
|
244
|
|
|
level_blocks = self.__build_next_level_blocks(level_blocks, cache_points)
|
245
|
|
|
level_blocks = sorted(level_blocks, key=lambda block: block.get_density())
|
246
|
|
|
self.__blocks.append(level_blocks)
|
247
|
|
|
|
248
|
|
|
|
249
|
|
|
def __build_next_level_blocks(self, level_blocks, cache_points):
|
250
|
|
|
next_level_blocks = []
|
251
|
|
|
for block in level_blocks:
|
252
|
|
|
left, right = block.split(cache_points)
|
253
|
|
|
|
254
|
|
|
next_level_blocks.append(left)
|
255
|
|
|
next_level_blocks.append(right)
|
256
|
|
|
|
257
|
|
|
return next_level_blocks
|
258
|
|
|
|
259
|
|
|
|
260
|
|
|
def __allocate_clusters(self):
|
261
|
|
|
level_blocks = self.__blocks[-1]
|
262
|
|
|
unhandled_block_indexes = set(range(len(level_blocks)))
|
263
|
|
|
|
264
|
|
|
current_block = self.__find_block_center(level_blocks)
|
265
|
|
|
cluster_index = 0
|
266
|
|
|
|
267
|
|
|
while current_block is not None:
|
268
|
|
|
if (current_block.get_density() <= self.__density_threshold):
|
269
|
|
|
break
|
270
|
|
|
|
271
|
|
|
current_block.set_cluster(cluster_index)
|
272
|
|
|
|
273
|
|
|
neighbors = self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
|
274
|
|
|
for neighbor in neighbors:
|
275
|
|
|
neighbor.set_cluster(cluster_index)
|
276
|
|
|
neighbors += self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
|
277
|
|
|
|
278
|
|
|
current_block = self.__find_block_center(level_blocks)
|
279
|
|
|
cluster_index += 1
|
280
|
|
|
|
281
|
|
|
self.__clusters = [[] for _ in range(cluster_index)]
|
282
|
|
|
for block in level_blocks:
|
283
|
|
|
index = block.get_cluster()
|
284
|
|
|
if index is not None:
|
285
|
|
|
self.__clusters[index] += block.get_points()
|
286
|
|
|
else:
|
287
|
|
|
self.__noise += block.get_points()
|
288
|
|
|
|
289
|
|
|
|
290
|
|
|
def __find_block_center(self, level_blocks):
|
291
|
|
|
for i in reversed(range(len(level_blocks))):
|
292
|
|
|
if level_blocks[i].get_cluster() is None:
|
293
|
|
|
return level_blocks[i]
|
294
|
|
|
|
295
|
|
|
return None
|
296
|
|
|
|
297
|
|
|
|
298
|
|
|
def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
|
299
|
|
|
neighbors = []
|
300
|
|
|
|
301
|
|
|
handled_block_indexes = []
|
302
|
|
|
for unhandled_index in unhandled_block_indexes:
|
303
|
|
|
if block.is_neighbor(level_blocks[unhandled_index]): # TODO: is_neighbor should consider density!
|
304
|
|
|
handled_block_indexes.append(unhandled_index)
|
305
|
|
|
neighbors.append(block)
|
306
|
|
|
|
307
|
|
|
# Maximum number of neighbors is four
|
308
|
|
|
if len(neighbors) == 4:
|
309
|
|
|
break
|
310
|
|
|
|
311
|
|
|
for handled_index in handled_block_indexes:
|
312
|
|
|
unhandled_block_indexes.remove(handled_index)
|
313
|
|
|
|
314
|
|
|
return neighbors
|
315
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.