Completed
Push — 0.8.dev ( 645b74...6b2bc2 )
by Andrei
01:39
created

bang_visualizer.show_dendrogram()   A

Complexity

Conditions 3

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 23
rs 9.0856
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: BANG.
4
@details Implementation based on paper @cite inproceedings::bang::1.
5
6
@authors Andrei Novikov ([email protected])
7
@date 2014-2018
8
@copyright GNU Public License
9
10
@cond GNU_PUBLIC_LICENSE
11
    PyClustering is free software: you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation, either version 3 of the License, or
14
    (at your option) any later version.
15
16
    PyClustering is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20
21
    You should have received a copy of the GNU General Public License
22
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
@endcond
24
25
"""
26
27
28
import matplotlib
0 ignored issues
show
Configuration introduced by
The import matplotlib could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
29
import matplotlib.pyplot as plt
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
import matplotlib.patches as patches
0 ignored issues
show
Configuration introduced by
The import matplotlib.patches could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
31
32
from pyclustering.cluster import cluster_visualizer
33
34
from pyclustering.utils import data_corners
35
from pyclustering.utils.color import color as color_list
36
37
38
39
class bang_visualizer:
40
    """!
41
    @brief Visualizer of BANG algorithm's results.
42
    @details BANG visualizer provides visualization services that are specific for BANG algorithm.
43
44
    """
45
46
47
    __maximum_density_alpha = 0.6
48
49
50
    @staticmethod
51
    def show_blocks(data, directory):
52
        """!
53
        @brief Show BANG-blocks (leafs only) in data space.
54
        @details BANG-blocks represents grid that was used for clustering process.
55
56
        @param[in] data (list): Input data space that contains points where each point is also represented by list.
57
        @param[in] directory (bang_directory): Directory that was created by BANG algorithm during clustering process.
58
59
        """
60
        visualizer = cluster_visualizer()
61
        visualizer.append_cluster(data)
62
63
        figure = visualizer.show(display=False)
64
65
        bang_visualizer.__draw_blocks(figure, 0, directory.get_leafs())
66
        plt.show()
67
68
69
    @staticmethod
70
    def show_dendrogram(dendrogram):
71
        """!
72
        @brief Display dendrogram of BANG-blocks.
73
74
        @param[in] dendrogram (list): List representation of dendrogram of BANG-blocks.
75
76
        @see bang.get_dendrogram()
77
78
        """
79
        axis = plt.subplot(111)
80
81
        current_position = 0
82
        for index_cluster in range(len(dendrogram)):
83
            densities = [ block.get_density() for block in dendrogram[index_cluster] ]
84
            xrange = range(current_position, current_position + len(densities))
85
86
            axis.bar(xrange, densities, 1.0, linewidth=0.0, color=color_list.TITLES[index_cluster])
87
88
            current_position += len(densities)
89
90
        plt.xlim([-0.5, current_position - 0.5])
91
        plt.show()
92
93
94
    @staticmethod
95
    def __draw_blocks(figure, ax_index, blocks):
96
        """!
97
        @brief Display BANG-blocks on specified figure.
98
99
        @param[in] figure (figure): Figure that is used for drawing blocks.
100
        @param[in] ax_index (uint): Index of axis where blocks should be displayed.
101
        @param[in] blocks (list): List of blocks that should be displyed.
102
103
        """
104
        ax = figure.get_axes()[ax_index]
105
        ax.grid(False)
106
107
        density_scale = blocks[-1].get_density()
108
        for block in blocks:
109
            bang_visualizer.__draw_block(ax, block, density_scale)
110
111
112
    @staticmethod
113
    def __draw_block(ax, block, density_scale):
114
        """!
115
        @brief Display BANG-block on the specified ax.
116
117
        @param[in] ax (Axis): Axis where block should be displayed.
118
        @param[in] block (bang_block): BANG-block that should be displayed.
119
        @param[in] density_scale (double): Max density to display density of the block by appropriate tone.
120
121
        """
122
        max_corner, min_corner = block.get_spatial_block().get_corners()
123
        belong_cluster = block.get_cluster() is not None
124
125
        density_scale = bang_visualizer.__maximum_density_alpha * block.get_density() / density_scale
126
127
        face_color = matplotlib.colors.to_rgba('blue', alpha=density_scale)
128
        edge_color = matplotlib.colors.to_rgba('black', alpha=1.0)
129
130
        if len(max_corner) == 2:
131
            rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
132
                                     fill=belong_cluster,
133
                                     facecolor=face_color,
134
                                     edgecolor=edge_color,
135
                                     linewidth=0.5)
136
            ax.add_patch(rect)
137
138
        else:
139
            raise ValueError("Impossible to display blocks on non-2D dimensional data.")
140
141
142
143
class bang_directory:
144
    """!
145
    @brief BANG directory stores BANG-blocks that represents grid in data space.
146
    @details The directory build BANG-blocks in binary tree manner. Leafs of the tree stored separately to provide
147
              a direct access to the leafs that should be analysed. Leafs cache data-points.
148
149
    """
150
    def __init__(self, data, levels, density_threshold=0.0):
151
        """!
152
        @brief Create BANG directory - basically tree structure with direct access to leafs.
153
154
        @param[in] levels (uint): Height of the blocks tree.
155
        @param[in] density_threshold (double): The lowest level of density when contained data by bang-block is
156
                    considered as a noise and there is no need to split it till the last level.
157
158
        """
159
        self.__data = data
160
        self.__levels = levels
161
        self.__density_threshold = density_threshold
162
        self.__leafs = []
163
        self.__root = None
164
165
        self.__create_directory()
166
167
168
    def get_leafs(self):
169
        """!
170
        @brief Return leafs - the smallest blocks.
171
        @details Some leafs can be bigger than others because splitting is not performed for blocks whose density is
172
                  less than threshold.
173
174
        @return (list) List of blocks that are leafs of BANG directory.
175
176
        """
177
        return self.__leafs
178
179
180
    def __create_directory(self):
181
        """!
182
        @brief Create BANG directory as a tree with separate storage for leafs.
183
184
        """
185
186
        min_corner, max_corner = data_corners(self.__data)
187
        data_block = spatial_block(max_corner, min_corner)
188
189
        cache_require = (self.__levels == 1)
190
        self.__root = bang_block(self.__data, 0, 0, data_block, cache_require)
191
192
        if cache_require:
193
            self.__leafs.append(self.__root)
194
        else:
195
            self.__build_directory_levels()
196
197
198
    def __build_directory_levels(self):
199
        """!
200
        @brief Build levels of direction if amount of level is greater than one.
201
202
        """
203
        previous_level_blocks = [ self.__root ]
204
        for level in range(1, self.__levels):
205
            previous_level_blocks = self.__build_level(previous_level_blocks, level)
206
207
        self.__leafs = sorted(self.__leafs, key=lambda block: block.get_density())
208
209
210
    def __build_level(self, previous_level_blocks, level):
211
        """!
212
        @brief Build new level of directory.
213
214
        @param[in] previous_level_blocks (list): BANG-blocks on the previous level.
215
        @param[in] level (uint): Level number that should be built.
216
217
        @return (list) New block on the specified level.
218
219
        """
220
        current_level_blocks = []
221
222
        split_dimension = level % len(self.__data[0])
223
        cache_require = (level == self.__levels - 1)
224
225
        for block in previous_level_blocks:
226
            self.__split_block(block, split_dimension, cache_require, current_level_blocks)
227
228
        if cache_require:
229
            self.__leafs += current_level_blocks
230
231
        return current_level_blocks
232
233
234
    def __split_block(self, block, split_dimension, cache_require, current_level_blocks):
235
        """!
236
        @brief Split specific block in specified dimension.
237
        @details Split is not performed for block whose density is lower than threshold value, such blocks are putted to
238
                  leafs.
239
240
        @param[in] block (bang_block): BANG-block that should be split.
241
        @param[in] split_dimension (uint): Dimension at which splitting should be performed.
242
        @param[in] cache_require (bool): Defines when points in cache should be stored during density calculation.
243
        @param[in|out] current_level_blocks (list): Block storage at the current level where new blocks should be added.
244
245
        """
246
        if block.get_density() <= self.__density_threshold:
247
            self.__leafs.append(block)
248
249
        else:
250
            left, right = block.split(split_dimension, cache_require)
251
            current_level_blocks.append(left)
252
            current_level_blocks.append(right)
253
254
255
256
class spatial_block:
257
    """!
258
    @brief Geometrical description of BANG block in data space.
259
    @details Provides services related to spatial function and used by bang_block
260
261
    @see bang_block
262
263
    """
264
265
    def __init__(self, max_corner, min_corner):
266
        """!
267
        @brief Creates spatial block in data space.
268
269
        @param[in] max_corner (array_like): Maximum corner coordinates of the block.
270
        @param[in] min_corner (array_like): Minimal corner coordinates of the block.
271
272
        """
273
        self.__max_corner = max_corner
274
        self.__min_corner = min_corner
275
        self.__volume = self.__calculate_volume()
276
277
278
    def __str__(self):
279
        """!
280
        @brief Returns string block description.
281
282
        @return String representation of the block.
283
284
        """
285
        return "(max: %s; min: %s)" % (self.__max_corner, self.__min_corner)
286
287
288
    def __contains__(self, point):
289
        """!
290
        @brief Point is considered as contained if it lies in block (belong to it).
291
292
        @return (bool) True if point is in block, otherwise False.
293
294
        """
295
        for i in range(len(point)):
296
            if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
297
                return False
298
299
        return True
300
301
302
    def get_corners(self):
303
        """!
304
        @brief Return spatial description of current block.
305
306
        @return (tuple) Pair of maximum and minimum corners (max_corner, min_corner).
307
308
        """
309
        return self.__max_corner, self.__min_corner
310
311
312
    def get_volume(self):
313
        """!
314
        @brief Returns volume of current block.
315
        @details Volume block has uncommon mining here: for 1D is length of a line, for 2D is square of rectangle,
316
                  for 3D is volume of 3D figure, and for ND is volume of ND figure.
317
318
        @return (double) Volume of current block.
319
320
        """
321
        return self.__volume
322
323
324
    def split(self, dimension):
325
        """!
326
        @brief Split current block into two spatial blocks in specified dimension.
327
328
        @param[in] dimension (uint): Dimension where current block should be split.
329
330
        @return (tuple) Pair of new split blocks from current block.
331
332
        """
333
        first_max_corner = self.__max_corner[:]
334
        second_min_corner = self.__min_corner[:]
335
336
        split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
337
338
        first_max_corner[dimension] = split_border
339
        second_min_corner[dimension] = split_border
340
341
        return spatial_block(first_max_corner, self.__min_corner), spatial_block(self.__max_corner, second_min_corner)
342
343
344
    def is_neighbor(self, block):
345
        """!
346
        @brief Performs calculation to identify whether specified block is neighbor of current block.
347
348
        @param[in] block (spatial_block): Another block that is check whether it is neighbor.
349
350
        @return (bool) True is blocks are neighbors, False otherwise.
351
352
        """
353
        if block is not self:
354
            block_max_corner, _ = block.get_corners()
355
            dimension = len(block_max_corner)
356
            neighborhood_score = self.__calculate_neighborhood(block_max_corner)
357
358
            if neighborhood_score == dimension:
359
                return True
360
361
        return False
362
363
364
    def __calculate_neighborhood(self, block_max_corner):
365
        """!
366
        @brief Calculates neighborhood score that defined whether blocks are neighbors.
367
368
        @param[in] block_max_corner (list): Maximum coordinates of other block.
369
370
        @return (uint) Neighborhood score.
371
372
        """
373
        dimension = len(block_max_corner)
374
375
        length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
376
377
        neighborhood_score = 0
378
        for i in range(dimension):
379
            diff = abs(block_max_corner[i] - self.__max_corner[i])
380
381
            if diff <= length_edges[i] + length_edges[i] * 0.0001:
382
                neighborhood_score += 1
383
384
        return neighborhood_score
385
386
387
    def __calculate_volume(self):
388
        """!
389
        @brief Calculates volume of current spatial block.
390
391
        @return (double) Volume of current spatial block.
392
393
        """
394
        volume = self.__max_corner[0] - self.__min_corner[0]
395
        for i in range(1, len(self.__max_corner)):
396
            volume *= self.__max_corner[i] - self.__min_corner[i]
397
398
        return volume
399
400
401
402
class bang_block:
403
    """!
404
    @brief BANG-block that represent spatial region in data space.
405
406
    """
407
    def __init__(self, data, region, level, space_block, cache_points=False):
408
        self.__data = data
409
        self.__region_number = region
410
        self.__level = level
411
        self.__spatial_block = space_block
412
        self.__cache_points = cache_points
413
414
        self.__cluster = None
415
        self.__points = None
416
        self.__density = self.__calculate_density()
417
418
419
    def __str__(self):
420
        return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
421
422
423
    def get_region(self):
424
        """!
425
        @brief Returns region number of BANG-block.
426
        @details Region number is unique on among region numbers on a directory level. Pair of region number and level
427
                  is unique for all directory.
428
429
        @return (uint) Region number.
430
431
        """
432
        return self.__region_number
433
434
435
    def get_density(self):
436
        """!
437
        @brief Returns density of the BANG-block.
438
439
        @return (double) BANG-block density.
440
441
        """
442
        return self.__density
443
444
445
    def get_cluster(self):
446
        """!
447
        @brief Return index of cluster to which the BANG-block belongs to.
448
        @details Index of cluster may have None value if the block was not assigned to any cluster.
449
450
        @return (uint) Index of cluster or None if the block does not belong to any cluster.
451
452
        """
453
        return self.__cluster
454
455
456
    def get_spatial_block(self):
457
        """!
458
        @brief Return spatial block - BANG-block description in data space.
459
460
        @return (spatial_block) Spatial block of the BANG-block.
461
462
        """
463
        return self.__spatial_block
464
465
466
    def get_points(self):
467
        """!
468
        @brief Return points that covers by the BANG-block.
469
        @details Returns None if block is not leaf.
470
471
        @return (list) List of point indexes that are covered by the block.
472
473
        """
474
        return self.__points
475
476
477
    def set_cluster(self, index):
478
        """!
479
        @brief Assign cluster to the BANG-block by index.
480
481
        @param[in] index (uint): Index cluster that is assigned to BANG-block.
482
483
        """
484
        self.__cluster = index
485
486
487
    def is_neighbor(self, block):
488
        """!
489
        @brief Performs calculation to check whether specified block is neighbor to the current.
490
491
        @param[in] block (bang_block): Other BANG-block that should be checked for neighborhood.
492
493
        @return (bool) True if blocks are neighbors, False if blocks are not neighbors.
494
495
        """
496
        return self.get_spatial_block().is_neighbor(block.get_spatial_block())
497
498
499
    def split(self, split_dimension, cache_points):
500
        """!
501
        @brief Split BANG-block into two new blocks in specified dimension.
502
503
        @param[in] split_dimension (uint): Dimension where block should be split.
504
        @param[in] cache_points (bool): If True then covered points are cached. Used for leaf blocks.
505
506
        @return (tuple) Pair of BANG-block that were formed from the current.
507
508
        """
509
        left_region_number = self.__region_number
510
        right_region_number = self.__region_number + 2 ** self.__level
511
512
        first_spatial_block, second_spatial_block = self.__spatial_block.split(split_dimension)
513
514
        left = bang_block(self.__data, left_region_number, self.__level + 1, first_spatial_block, cache_points)
515
        right = bang_block(self.__data, right_region_number, self.__level + 1, second_spatial_block, cache_points)
516
517
        return left, right
518
519
520
    def __calculate_density(self):
521
        """!
522
        @brief Calculates BANG-block density.
523
524
        @return (double) BANG-block density.
525
526
        """
527
        return self.__get_amount_points() / self.__spatial_block.get_volume()
528
529
530
    def __get_amount_points(self):
531
        """!
532
        @brief Count covered points by the BANG-block and if cache is enable then covered points are stored.
533
534
        @return (uint) Amount of covered points.
535
536
        """
537
        amount = 0
538
        for index in range(len(self.__data)):
539
            if self.__data[index] in self.__spatial_block:
540
                self.__cache_point(index)
541
                amount += 1
542
543
        return amount
544
545
546
    def __cache_point(self, index):
547
        """!
548
        @brief Store index points.
549
550
        @param[in] index (uint): Index point that should be stored.
551
552
        """
553
        if self.__cache_points:
554
            if self.__points is None:
555
                self.__points = []
556
557
            self.__points.append(index)
558
559
560
561
class bang:
562
    def __init__(self, data, levels, density_threshold = 0.0):
563
        self.__data = data
564
        self.__levels = levels
565
        self.__directory = None
566
        self.__clusters = []
567
        self.__noise = []
568
        self.__cluster_blocks = []
569
        self.__dendrogram = [[]]
570
        self.__density_threshold = density_threshold
571
572
573
    def process(self):
574
        self.__validate_arguments()
575
576
        self.__directory = bang_directory(self.__data, self.__levels, self.__density_threshold)
577
        self.__allocate_clusters()
578
579
580
    def get_clusters(self):
581
        return self.__clusters
582
583
584
    def get_noise(self):
585
        return self.__noise
586
587
588
    def get_directory(self):
589
        return self.__directory
590
591
592
    def get_dendrogram(self):
593
        return self.__dendrogram
594
595
596
    def __validate_arguments(self):
597
        if self.__levels <= 0:
598
            raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % self.__levels)
599
600
        if len(self.__data) == 0:
601
            raise ValueError("Empty input data. Data should contain at least one point.")
602
603
        if self.__density_threshold < 0:
604
            raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % self.__density_threshold)
605
606
607
    def __allocate_clusters(self):
608
        leaf_blocks = self.__directory.get_leafs()
609
        unhandled_block_indexes = set([i for i in range(len(leaf_blocks)) if leaf_blocks[i].get_density() > self.__density_threshold])
610
        appropriate_block_indexes = set(unhandled_block_indexes)
611
612
        current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
613
        cluster_index = 0
614
615
        while current_block is not None:
616
            if current_block.get_density() <= self.__density_threshold:
617
                break
618
619
            self.__expand_cluster_block(current_block, cluster_index, leaf_blocks, unhandled_block_indexes)
620
621
            current_block = self.__find_block_center(leaf_blocks, unhandled_block_indexes)
622
            cluster_index += 1
623
624
        self.__store_clustering_results(cluster_index, appropriate_block_indexes, leaf_blocks)
625
626
627
    def __expand_cluster_block(self, block, cluster_index, leaf_blocks, unhandled_block_indexes):
628
        block.set_cluster(cluster_index)
629
        self.__update_cluster_dendrogram(cluster_index, [block])
630
631
        neighbors = self.__find_block_neighbors(block, leaf_blocks, unhandled_block_indexes)
632
        self.__update_cluster_dendrogram(cluster_index, neighbors)
633
634
        for neighbor in neighbors:
635
            neighbor.set_cluster(cluster_index)
636
            neighbor_neighbors = self.__find_block_neighbors(neighbor, leaf_blocks, unhandled_block_indexes)
637
            self.__update_cluster_dendrogram(cluster_index, neighbor_neighbors)
638
639
            neighbors += neighbor_neighbors
640
641
642
    def __store_clustering_results(self, amount_clusters, appropriate_block_indexes, leaf_blocks):
643
        self.__clusters = [[] for _ in range(amount_clusters)]
644
        for appropriate_index in appropriate_block_indexes:
645
            block = leaf_blocks[appropriate_index]
646
            index = block.get_cluster()
647
648
            if index is not None:
649
                self.__clusters[index] += block.get_points()
650
            else:
651
                self.__noise += block.get_points()
652
653
        self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
654
        self.__noise = list(set(self.__noise))
655
656
657
    def __find_block_center(self, level_blocks, unhandled_block_indexes):
658
        for i in reversed(range(len(level_blocks))):
659
            if level_blocks[i].get_density() == 0:
660
                return None
661
662
            if level_blocks[i].get_cluster() is None:
663
                unhandled_block_indexes.remove(i)
664
                return level_blocks[i]
665
666
        return None
667
668
669
    def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
670
        neighbors = []
671
672
        handled_block_indexes = []
673
        for unhandled_index in unhandled_block_indexes:
674
            if block.is_neighbor(level_blocks[unhandled_index]):
675
                handled_block_indexes.append(unhandled_index)
676
                neighbors.append(level_blocks[unhandled_index])
677
678
                # Maximum number of neighbors is eight
679
                if len(neighbors) == 8:
680
                    break
681
682
        for handled_index in handled_block_indexes:
683
            unhandled_block_indexes.remove(handled_index)
684
685
        return neighbors
686
687
688
    def __update_cluster_dendrogram(self, index_cluster, blocks):
689
        if len(self.__dendrogram) <= index_cluster:
690
            self.__dendrogram.append([])
691
692
        blocks = sorted(blocks, key=lambda block: block.get_density(), reverse=True)
693
        self.__dendrogram[index_cluster] += blocks
694