Completed
Push — 0.8.dev ( a8704c...f6cd20 )
by Andrei
58s
created

bang_block   A

Complexity

Total Complexity 30

Size/Duplication

Total Lines 117
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
dl 0
loc 117
rs 10
c 1
b 0
f 0
wmc 30

14 Methods

Rating   Name   Duplication   Size   Complexity  
A get_cluster() 0 2 1
A __calculate_density() 0 7 2
A get_points() 0 5 4
A get_level() 0 2 1
B is_neighbor() 0 13 5
A __init__() 0 11 1
A contained() 0 6 4
B __get_amount_points() 0 13 5
A get_density() 0 2 1
A set_cluster() 0 2 1
A split() 0 21 2
A __str__() 0 2 1
A get_corners() 0 2 1
A get_region() 0 2 1
1
"""!
2
3
@brief Cluster analysis algorithm: BANG.
4
@details Implementation based on paper @cite inproceedings::bang::1.
5
6
@authors Andrei Novikov ([email protected])
7
@date 2014-2018
8
@copyright GNU Public License
9
10
@cond GNU_PUBLIC_LICENSE
11
    PyClustering is free software: you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation, either version 3 of the License, or
14
    (at your option) any later version.
15
16
    PyClustering is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20
21
    You should have received a copy of the GNU General Public License
22
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
@endcond
24
25
"""
26
27
28
import matplotlib.pyplot as plt
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
29
import matplotlib.patches as patches
0 ignored issues
show
Configuration introduced by
The import matplotlib.patches could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
31
from pyclustering.utils import data_corners
32
33
from pyclustering.cluster import cluster_visualizer
34
35
36
# TODO: Store blocks in block directory and control them via directory
37
38
class bang_visualizer:
39
    @staticmethod
40
    def show_level_blocks(data, level_blocks):
41
        visualizer = cluster_visualizer()
42
        visualizer.append_cluster(data)
43
44
        figure = visualizer.show(display=False)
45
46
        bang_visualizer.__draw_blocks(figure, 0, level_blocks)
47
        plt.show();
48
49
50
    @staticmethod
51
    def __draw_blocks(figure, offset, level_blocks):
52
        ax = figure.get_axes()[offset];
53
        ax.grid(False)
54
55
        for block in level_blocks:
56
            bang_visualizer.__draw_block(ax, block)
57
58
59
    @staticmethod
60
    def __draw_block(ax, block):
61
        max_corner, min_corner = block.get_corners()
62
        belong_cluster = block.get_cluster() is not None
63
64
        if len(max_corner) == 2:
65
            rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
66
                                     fill=belong_cluster,
67
                                     alpha=0.5)
68
            ax.add_patch(rect)
69
70
        else:
71
            raise ValueError("Impossible to display blocks on non-2D dimensional data.")
72
73
74
class bang_block:
75
    def __init__(self, data, dimension, region, level, max_corner, min_corner, cache_points=False):
76
        self.__region_number = region
77
        self.__level = level
78
        self.__data = data
79
        self.__dimension = dimension
80
        self.__max_corner = max_corner
81
        self.__min_corner = min_corner
82
        self.__cache_points = cache_points
83
        self.__cluster = None
84
        self.__points = None
85
        self.__density = self.__calculate_density()
86
87
88
    def __str__(self):
89
        return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
90
91
92
    def get_region(self):
93
        return self.__region_number
94
95
96
    def get_level(self):
97
        return self.__level
98
99
100
    def get_density(self):
101
        return self.__density
102
103
104
    def get_cluster(self):
105
        return self.__cluster
106
107
108
    def get_corners(self):
109
        return self.__max_corner, self.__min_corner
110
111
112
    def get_points(self):
113
        if self.__points is not None:
114
            return self.__points
115
116
        return [index for index in range(len(self.__data)) if self.contained(self.__data[index])]
117
118
119
    def set_cluster(self, index):
120
        self.__cluster = index
121
122
123
    def is_neighbor(self, block):
124
        if block is self:
125
            return False;
126
127
        block_max_corner, block_min_corner = block.get_corners()
128
        for i in range(len(self.__max_corner)):
129
            if self.__max_corner[i] == block_min_corner[i]:
130
                return True
131
132
            if self.__min_corner[i] == block_max_corner[i]:
133
                return True
134
135
        return False
136
137
138
    def split(self, cache_points):
139
        left_region_number = self.__region_number
140
        right_region_number = self.__region_number + 2 ** self.__level
141
142
        dimension = self.__dimension + 1
143
        if dimension >= len(self.__data[0]):
144
            dimension = 0
145
146
        first_max_corner = self.__max_corner[:]
147
        first_min_corner = self.__min_corner[:]
148
        second_max_corner = self.__max_corner[:]
149
        second_min_corner = self.__min_corner[:]
150
151
        split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
152
        first_max_corner[dimension] = split_border
153
        second_min_corner[dimension] = split_border
154
155
        left = bang_block(self.__data, dimension, left_region_number, self.__level + 1, first_max_corner, first_min_corner, cache_points)
156
        right = bang_block(self.__data, dimension, right_region_number, self.__level + 1, second_max_corner, second_min_corner, cache_points)
157
158
        return left, right
159
160
161
    def contained(self, point):
162
        for i in range(len(point)):
163
            if point[i] <= self.__min_corner[i] or point[i] > self.__max_corner[i]:
164
                return False
165
166
        return True
167
168
169
    def __calculate_density(self):
170
        volume = self.__max_corner[0] - self.__min_corner[0]
171
        for i in range(1, len(self.__max_corner)):
172
            volume *= self.__max_corner[i] - self.__min_corner[i]
173
174
        amount = self.__get_amount_points()
175
        return amount / volume
176
177
178
    def __get_amount_points(self):
179
        amount = 0
180
        for index in range(len(self.__data)):
181
            if self.contained(self.__data[index]):
182
                amount += 1
183
184
                if self.__cache_points:
185
                    if self.__points is None:
186
                        self.__points = []
187
188
                    self.__points.append(index)
189
190
        return amount
191
192
193
194
class bang:
195
    def __init__(self, data, levels, density_threshold = 0.0):
196
        self.__data = data
197
        self.__levels = levels
198
        self.__blocks = []
199
        self.__clusters = []
200
        self.__noise = []
201
        self.__cluster_blocks = []
202
        self.__density_threshold = density_threshold
203
204
205
    def process(self):
206
        self.__validate_arguments()
207
208
        self.__build_blocks()
209
        self.__allocate_clusters()
210
211
212
    def get_clusters(self):
213
        return self.__clusters
214
215
216
    def get_noise(self):
217
        return self.__noise
218
219
220
    def get_level_blocks(self, level=-1):
221
        return self.__blocks[level]
222
223
224
    def __validate_arguments(self):
225
        if self.__levels <= 0:
226
            raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % (self.__levels))
227
228
        if len(self.__data) == 0:
229
            raise ValueError("Empty input data. Data should contain at least one point.")
230
231
        if self.__density_threshold < 0:
232
            raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % (self.__density_threshold))
233
234
235
    def __build_blocks(self):
236
        min_corner, max_corner = data_corners(self.__data)
237
        root_block = bang_block(self.__data, 0, 0, 0, max_corner, min_corner, self.__levels == 1)
238
239
        level_blocks = [root_block]
240
        self.__blocks.append(level_blocks)
241
242
        for level in range(1, self.__levels):
243
            cache_points = (level == self.__levels - 1)
244
            level_blocks = self.__build_next_level_blocks(level_blocks, cache_points)
245
            level_blocks = sorted(level_blocks, key=lambda block: block.get_density())
246
            self.__blocks.append(level_blocks)
247
248
249
    def __build_next_level_blocks(self, level_blocks, cache_points):
250
        next_level_blocks = []
251
        for block in level_blocks:
252
            left, right = block.split(cache_points)
253
254
            next_level_blocks.append(left)
255
            next_level_blocks.append(right)
256
257
        return next_level_blocks
258
259
260
    def __allocate_clusters(self):
261
        level_blocks = self.__blocks[-1]
262
        unhandled_block_indexes = set(range(len(level_blocks)))
263
264
        current_block = self.__find_block_center(level_blocks)
265
        cluster_index = 0
266
267
        while current_block is not None:
268
            if (current_block.get_density() <= self.__density_threshold):
269
                break
270
271
            current_block.set_cluster(cluster_index)
272
273
            neighbors = self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
274
            for neighbor in neighbors:
275
                neighbor.set_cluster(cluster_index)
276
                neighbors += self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
277
278
            current_block = self.__find_block_center(level_blocks)
279
            cluster_index += 1
280
281
        self.__clusters = [[] for _ in range(cluster_index)]
282
        for block in level_blocks:
283
            index = block.get_cluster()
284
            if index is not None:
285
                self.__clusters[index] += block.get_points()
286
            else:
287
                self.__noise += block.get_points()
288
289
290
    def __find_block_center(self, level_blocks):
291
        for i in reversed(range(len(level_blocks))):
292
            if level_blocks[i].get_cluster() is None:
293
                return level_blocks[i]
294
295
        return None
296
297
298
    def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
299
        neighbors = []
300
301
        handled_block_indexes = []
302
        for unhandled_index in unhandled_block_indexes:
303
            if block.is_neighbor(level_blocks[unhandled_index]):    # TODO: is_neighbor should consider density!
304
                handled_block_indexes.append(unhandled_index)
305
                neighbors.append(block)
306
307
                # Maximum number of neighbors is four
308
                if len(neighbors) == 4:
309
                    break
310
311
        for handled_index in handled_block_indexes:
312
            unhandled_block_indexes.remove(handled_index)
313
314
        return neighbors
315