Completed
Push — 0.8.dev ( 5e2740...a76884 )
by Andrei
01:04
created

bang.get_directory()   A

Complexity

Conditions 1

Size

Total Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 2
rs 10
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: BANG.
4
@details Implementation based on paper @cite inproceedings::bang::1.
5
6
@authors Andrei Novikov ([email protected])
7
@date 2014-2018
8
@copyright GNU Public License
9
10
@cond GNU_PUBLIC_LICENSE
11
    PyClustering is free software: you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation, either version 3 of the License, or
14
    (at your option) any later version.
15
16
    PyClustering is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20
21
    You should have received a copy of the GNU General Public License
22
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
@endcond
24
25
"""
26
27
28
import matplotlib.pyplot as plt
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
29
import matplotlib.patches as patches
0 ignored issues
show
Configuration introduced by
The import matplotlib.patches could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
31
from pyclustering.utils import data_corners
32
33
from pyclustering.cluster import cluster_visualizer
34
35
36
class bang_visualizer:
37
    @staticmethod
38
    def show_blocks(data, directory):
39
        visualizer = cluster_visualizer()
40
        visualizer.append_cluster(data)
41
42
        figure = visualizer.show(display=False)
43
44
        bang_visualizer.__draw_blocks(figure, 0, directory.get_leafs())
45
        plt.show()
46
47
48
    @staticmethod
49
    def __draw_blocks(figure, offset, level_blocks):
50
        ax = figure.get_axes()[offset]
51
        ax.grid(False)
52
53
        for block in level_blocks:
54
            bang_visualizer.__draw_block(ax, block)
55
56
57
    @staticmethod
58
    def __draw_block(ax, block):
59
        max_corner, min_corner = block.get_spatial_block().get_corners()
60
        belong_cluster = block.get_cluster() is not None
61
62
        if len(max_corner) == 2:
63
            rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
64
                                     fill=belong_cluster,
65
                                     alpha=0.5)
66
            ax.add_patch(rect)
67
68
            xlabel = (max_corner[0] + min_corner[0]) / 2.0
69
            ylabel = (max_corner[1] + min_corner[1]) / 2.0
70
            ax.text(xlabel, ylabel, str(block.get_region()), ha="center", va="center")
71
72
        else:
73
            raise ValueError("Impossible to display blocks on non-2D dimensional data.")
74
75
76
77
class bang_directory:
78
    def __init__(self, data, levels, density_threshold=0.0):
79
        """!
80
        @brief Create BANG directory - basically tree structure with direct access to leafs.
81
82
        @param[in] levels (uint): Height of the blocks tree.
83
        @param[in] density_threshold (double): The lowest level of density when contained data by bang-block is
84
                    considered as a noise and there is no need to split it till the last level.
85
86
        """
87
        self.__data = data
88
        self.__levels = levels
89
        self.__density_threshold = density_threshold
90
        self.__leafs = []
91
        self.__root = None
92
93
        self.__create_directory()
94
95
96
    def get_leafs(self):
97
        return self.__leafs
98
99
100
    def __create_directory(self):
101
        """!
102
        @brief Create BANG directory as a tree with separate storage for leafs.
103
104
        """
105
106
        min_corner, max_corner = data_corners(self.__data)
107
        data_block = spatial_block(max_corner, min_corner)
108
109
        cache_require = (self.__levels == 1)
110
        self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
111
112
        if cache_require:
113
            self.__leafs.append(self.__root)
114
        else:
115
            self.__build_directory_levels()
116
117
118
    def __build_directory_levels(self):
119
        """!
120
        @brief Build levels of direction if amount of level is greater than one.
121
122
        """
123
        previous_level_blocks = [ self.__root ]
124
        for level in range(1, self.__levels):
125
            previous_level_blocks = self.__build_level(previous_level_blocks, level)
126
127
        self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
128
129
130
    def __build_level(self, previous_level_blocks, level):
131
        """!
132
        @brief Build new level of directory.
133
134
        @param[in] previous_level_blocks (list): BANG-blocks on the previous level.
135
        @param[in] level (uint): Level number that should be built.
136
137
        @return (list) New block on the specified level.
138
139
        """
140
        current_level_blocks = []
141
142
        split_dimension = level % len(self.__data[0])
143
        cache_require = (level == self.__levels - 1)
144
145
        for block in previous_level_blocks:
146
            self.__split_block(block, split_dimension, cache_require, current_level_blocks)
147
148
        if cache_require:
149
            self.__leafs += current_level_blocks
150
151
        return current_level_blocks
152
153
154
    def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
155
        """!
156
        @brief Split specific block in specified dimension.
157
        @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
158
                  leafs.
159
160
        @param[in] block (bang_block): BANG-block that should be split.
161
        @param[in] split_dimension (uint): Dimension at which splitting should be performed.
162
        @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
163
        @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
164
165
        """
166
        if block.get_density() <= self.__density_threshold:
167
            self.__leafs.append(block)
168
169
        else:
170
            left, right = block.split(split_dimension, cache_require)
171
            current_level_blocks.append(left)
172
            current_level_blocks.append(right)
173
174
175
176
class spatial_block:
177
    """!
178
    @brief Geometrical description of BANG block in data space.
179
    @details Provides services related to spatial function and used by bang_block
180
181
    @see bang_block
182
183
    """
184
185
    def __init__(self, max_corner, min_corner):
186
        """!
187
        @brief Creates spatial block in data space.
188
189
        @param[in] max_corner (array_like): Maximum corner coordinates of the block.
190
        @param[in] min_corner (array_like): Minimal corner coordinates of the block.
191
192
        """
193
        self.__max_corner = max_corner
194
        self.__min_corner = min_corner
195
        self.__volume = self.__calculate_volume()
196
197
198
    def __str__(self):
199
        """!
200
        @brief Returns string block description.
201
202
        """
203
        return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
204
205
206
    def __contains__(self, point):
207
        """!
208
        @brief Point is considered as contained if it lies in block (belong to it).
209
210
        """
211
        for i in range(len(point)):
212
            if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
213
                return False
214
215
        return True
216
217
218
    def get_corners(self):
219
        """!
220
        @brief Return spatial description of current block.
221
222
        @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
223
224
        """
225
        return self.__max_corner, self.__min_corner
226
227
228
    def get_volume(self):
229
        """!
230
        @brief Returns volume of current block.
231
        @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
232
                  for 3D is volume of 3D figure, and for ND is volume of ND figure.
233
234
        @return (double) Volume of current block.
235
236
        """
237
        return self.__volume
238
239
240
    def split(self, dimension):
241
        """!
242
        @brief Split current block into two spatial blocks in specified dimension.
243
244
        @param[in] dimension (uint): Dimension where current block should be split.
245
246
        @return (tuple) Pair of new split blocks from current block.
247
248
        """
249
        first_max_corner = self.__max_corner[:]
250
        second_min_corner = self.__min_corner[:]
251
252
        split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
253
254
        first_max_corner[dimension] = split_border
255
        second_min_corner[dimension] = split_border
256
257
        return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
258
259
260
    def is_neighbor(self, block):
261
        """!
262
        @brief Performs calculation to identify whether specified block is neighbor of current block.
263
264
        @param[in] block (spatial_block): Another block that is check whether it is neighbor.
265
266
        @return (bool) True is blocks are neighbors, False otherwise.
267
268
        """
269
        if block is not self:
270
            block_max_corner, _ = block.get_corners()
271
            dimension = len(block_max_corner)
272
            neighborhood_score = self.__calculate_neighborhood(block_max_corner)
273
274
            if neighborhood_score == dimension:
275
                return True
276
277
        return False
278
279
280
    def __calculate_neighborhood(self, block_max_corner):
281
        """!
282
        @brief Calculates neighborhood score that defined whether blocks are neighbors.
283
284
        @param[in] block_max_corner (list): Maximum coordinates of other block.
285
286
        @return (uint) Neighborhood score.
287
288
        """
289
        dimension = len(block_max_corner)
290
291
        length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
292
293
        neighborhood_score = 0
294
        for i in range(dimension):
295
            diff = abs(block_max_corner[i] - self.__max_corner[i])
296
297
            if diff <= length_edges[i] + length_edges[i] * 0.0001:
298
                neighborhood_score += 1
299
300
        return neighborhood_score
301
302
303
    def __calculate_volume(self):
304
        """!
305
        @brief Calculates volume of current spatial block.
306
307
        @return (double) Volume of current spatial block.
308
309
        """
310
        volume = self.__max_corner[0] - self.__min_corner[0]
311
        for i in range(1, len(self.__max_corner)):
312
            volume *= self.__max_corner[i] - self.__min_corner[i]
313
314
        return volume
315
316
317
318
class bang_block:
319
    def __init__(self, data, region, level, space_block, cache_points=False):
320
        self.__data = data
321
        self.__region_number = region
322
        self.__level = level
323
        self.__spatial_block = space_block
324
        self.__cache_points = cache_points
325
326
        self.__cluster = None
327
        self.__points = None
328
        self.__density = self.__calculate_density()
329
330
331
    def __str__(self):
332
        return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
333
334
335
    def get_region(self):
336
        return self.__region_number
337
338
339
    def get_density(self):
340
        return self.__density
341
342
343
    def get_cluster(self):
344
        return self.__cluster
345
346
347
    def get_spatial_block(self):
348
        return self.__spatial_block
349
350
351
    def get_points(self):
352
            return self.__points
353
354
355
    def set_cluster(self, index):
356
        self.__cluster = index
357
358
359
    def is_neighbor(self, block):
360
        return self.get_spatial_block().is_neighbor(block.get_spatial_block())
361
362
363
    def split(self, split_dimension, cache_points):
364
        left_region_number = self.__region_number
365
        right_region_number = self.__region_number + 2 ** self.__level
366
367
        first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
368
369
        left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
370
        right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
371
372
        return left, right
373
374
375
    def __calculate_density(self):
376
        return self.__get_amount_points() / self.__spatial_block.get_volume()
377
378
379
    def __get_amount_points(self):
380
        amount = 0
381
        for index in range(len(self.__data)):
382
            if self.__data[index] in self.__spatial_block:
383
                self.__cache_point(index)
384
                amount += 1
385
386
        return amount
387
388
389
    def __cache_point(self, index):
390
        if self.__cache_points:
391
            if self.__points is None:
392
                self.__points = []
393
394
            self.__points.append(index)
395
396
397
398
class bang:
399
    def __init__(self, data, levels, density_threshold = 0.0):
400
        self.__data = data
401
        self.__levels = levels
402
        self.__directory = None
403
        self.__clusters = []
404
        self.__noise = []
405
        self.__cluster_blocks = []
406
        self.__density_threshold = density_threshold
407
408
409
    def process(self):
410
        self.__validate_arguments()
411
412
        self.__directory = bang_directory(self.__data, self.__levels, self.__density_threshold)
413
        self.__allocate_clusters()
414
415
416
    def get_clusters(self):
417
        return self.__clusters
418
419
420
    def get_noise(self):
421
        return self.__noise
422
423
424
    def get_directory(self):
425
        return self.__directory
426
427
428
    def __validate_arguments(self):
429
        if self.__levels <= 0:
430
            raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % self.__levels)
431
432
        if len(self.__data) == 0:
433
            raise ValueError("Empty input data. Data should contain at least one point.")
434
435
        if self.__density_threshold < 0:
436
            raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
437
438
439
    def __allocate_clusters(self):
440
        leaf_blocks = self.__directory.get_leafs()
441
        unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
442
        appropriate_block_indexes = set(unhandled_block_indexes)
443
444
        current_block = self.__find_block_center(leaf_blocks)
445
        cluster_index = 0
446
447
        while current_block is not None:
448
            if current_block.get_density() <= self.__density_threshold:
449
                break
450
451
            self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
452
453
            current_block = self.__find_block_center(leaf_blocks)
454
            cluster_index += 1
455
456
        self.__store_clustering_results(cluster_index, appropriate_block_indexes, leaf_blocks)
457
458
459
    def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
460
        block.set_cluster(cluster_index)
461
462
        neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
463
        for neighbor in neighbors:
464
            neighbor.set_cluster(cluster_index)
465
            neighbors += self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
466
467
468
    def __store_clustering_results(self, amount_clusters, appropriate_block_indexes, leaf_blocks):
469
        self.__clusters = [[] for _ in range(amount_clusters)]
470
        for appropriate_index in appropriate_block_indexes:
471
            block = leaf_blocks[appropriate_index]
472
            index = block.get_cluster()
473
474
            if index is not None:
475
                self.__clusters[index] += block.get_points()
476
            else:
477
                self.__noise += block.get_points()
478
479
        self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
480
        self.__noise = list(set(self.__noise))
481
482
483
    def __find_block_center(self, level_blocks):
484
        for i in reversed(range(len(level_blocks))):
485
            if level_blocks[i].get_cluster() is None:
486
                return level_blocks[i]
487
488
        return None
489
490
491
    def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
492
        neighbors = []
493
494
        handled_block_indexes = []
495
        for unhandled_index in unhandled_block_indexes:
496
            if block.is_neighbor(level_blocks[unhandled_index]):
497
                handled_block_indexes.append(unhandled_index)
498
                neighbors.append(level_blocks[unhandled_index])
499
500
                # Maximum number of neighbors is eight
501
                if len(neighbors) == 8:
502
                    break
503
504
        for handled_index in handled_block_indexes:
505
            unhandled_block_indexes.remove(handled_index)
506
507
        return neighbors
508