Completed
Push — 0.8.dev ( d2eb6e...49a6c2 )
by Andrei
58s
created

bang_block.is_neighbor()   C

Complexity

Conditions 7

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
dl 0
loc 24
rs 5.5
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: BANG.
4
@details Implementation based on paper @cite inproceedings::bang::1.
5
6
@authors Andrei Novikov ([email protected])
7
@date 2014-2018
8
@copyright GNU Public License
9
10
@cond GNU_PUBLIC_LICENSE
11
    PyClustering is free software: you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation, either version 3 of the License, or
14
    (at your option) any later version.
15
16
    PyClustering is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20
21
    You should have received a copy of the GNU General Public License
22
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
@endcond
24
25
"""
26
27
28
import matplotlib.pyplot as plt
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
29
import matplotlib.patches as patches
0 ignored issues
show
Configuration introduced by
The import matplotlib.patches could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
31
from pyclustering.utils import data_corners
32
33
from pyclustering.cluster import cluster_visualizer
34
35
36
# TODO: Store blocks in block directory and control them via directory
37
38
39
class bang_visualizer:
40
    @staticmethod
41
    def show_level_blocks(data, level_blocks):
42
        visualizer = cluster_visualizer()
43
        visualizer.append_cluster(data)
44
45
        figure = visualizer.show(display=False)
46
47
        bang_visualizer.__draw_blocks(figure, 0, level_blocks)
48
        plt.show();
49
50
51
    @staticmethod
52
    def __draw_blocks(figure, offset, level_blocks):
53
        ax = figure.get_axes()[offset];
54
        ax.grid(False)
55
56
        for block in level_blocks:
57
            bang_visualizer.__draw_block(ax, block)
58
59
60
    @staticmethod
61
    def __draw_block(ax, block):
62
        max_corner, min_corner = block.get_corners()
63
        belong_cluster = block.get_cluster() is not None
64
65
        if len(max_corner) == 2:
66
            rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
67
                                     fill=belong_cluster,
68
                                     alpha=0.5)
69
            ax.add_patch(rect)
70
71
            xlabel = (max_corner[0] + min_corner[0]) / 2.0
72
            ylabel = (max_corner[1] + min_corner[1]) / 2.0
73
            ax.text(xlabel, ylabel, str(block.get_region()), ha="center", va="center")
74
75
        else:
76
            raise ValueError("Impossible to display blocks on non-2D dimensional data.")
77
78
79
class bang_block:
80
    def __init__(self, data, dimension, region, level, max_corner, min_corner, cache_points=False):
81
        self.__region_number = region
82
        self.__level = level
83
        self.__data = data
84
        self.__dimension = dimension
85
        self.__max_corner = max_corner
86
        self.__min_corner = min_corner
87
        self.__cache_points = cache_points
88
        self.__cluster = None
89
        self.__points = None
90
        self.__density = self.__calculate_density()
91
92
93
    def __str__(self):
94
        return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
95
96
97
    def get_region(self):
98
        return self.__region_number
99
100
101
    def get_level(self):
102
        return self.__level
103
104
105
    def get_density(self):
106
        return self.__density
107
108
109
    def get_cluster(self):
110
        return self.__cluster
111
112
113
    def get_corners(self):
114
        return self.__max_corner, self.__min_corner
115
116
117
    def get_points(self):
118
        if self.__points is not None:
119
            return self.__points
120
121
        # TODO: return for upper level - traverse tree is preferable than whole data with calculation
122
        return [index for index in range(len(self.__data)) if self.contained(self.__data[index])]
123
124
125
    def set_cluster(self, index):
126
        self.__cluster = index
127
128
129
    def is_neighbor(self, block):
130
        if block is self:
131
            return False;
132
133
        block_max_corner, block_min_corner = block.get_corners()
0 ignored issues
show
Unused Code introduced by
The variable block_min_corner seems to be unused.
Loading history...
134
135
        similarity_counter = 0
136
        dimension = len(block_max_corner)
137
138
        length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
139
        tolerances = [length_edge * 0.0001 for length_edge in length_edges]
140
141
        for i in range(dimension):
142
            diff = abs(block_max_corner[i] - self.__max_corner[i])
143
144
            if diff <= length_edges[i] + tolerances[i]:
145
                similarity_counter += 1
146
147
        print(self.get_region(), block.get_region(), similarity_counter)
148
149
        if similarity_counter == dimension:
150
            return True
151
152
        return False
153
154
155
    def split(self, cache_points):
156
        left_region_number = self.__region_number
157
        right_region_number = self.__region_number + 2 ** self.__level
158
159
        dimension = self.__dimension + 1
160
        if dimension >= len(self.__data[0]):
161
            dimension = 0
162
163
        first_max_corner = self.__max_corner[:]
164
        first_min_corner = self.__min_corner[:]
165
        second_max_corner = self.__max_corner[:]
166
        second_min_corner = self.__min_corner[:]
167
168
        split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
169
        first_max_corner[dimension] = split_border
170
        second_min_corner[dimension] = split_border
171
172
        left = bang_block(self.__data, dimension, left_region_number, self.__level + 1, first_max_corner, first_min_corner, cache_points)
173
        right = bang_block(self.__data, dimension, right_region_number, self.__level + 1, second_max_corner, second_min_corner, cache_points)
174
175
        return left, right
176
177
178
    def contained(self, point):
179
        for i in range(len(point)):
180
            if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
181
                return False
182
183
        return True
184
185
186
    def __calculate_density(self):
187
        volume = self.__max_corner[0] - self.__min_corner[0]
188
        for i in range(1, len(self.__max_corner)):
189
            volume *= self.__max_corner[i] - self.__min_corner[i]
190
191
        amount = self.__get_amount_points()
192
        return amount / volume
193
194
195
    def __get_amount_points(self):
196
        amount = 0
197
        for index in range(len(self.__data)):
198
            if self.contained(self.__data[index]):
199
                amount += 1
200
201
                if self.__cache_points:
202
                    if self.__points is None:
203
                        self.__points = []
204
205
                    self.__points.append(index)
206
207
        return amount
208
209
210
211
class bang:
212
    def __init__(self, data, levels, density_threshold = 0.0):
213
        self.__data = data
214
        self.__levels = levels
215
        self.__blocks = []
216
        self.__clusters = []
217
        self.__noise = []
218
        self.__cluster_blocks = []
219
        self.__density_threshold = density_threshold
220
221
222
    def process(self):
223
        self.__validate_arguments()
224
225
        self.__build_blocks()
226
        self.__allocate_clusters()
227
228
229
    def get_clusters(self):
230
        return self.__clusters
231
232
233
    def get_noise(self):
234
        return self.__noise
235
236
237
    def get_level_blocks(self, level=-1):
238
        return self.__blocks[level]
239
240
241
    def __validate_arguments(self):
242
        if self.__levels <= 0:
243
            raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % (self.__levels))
244
245
        if len(self.__data) == 0:
246
            raise ValueError("Empty input data. Data should contain at least one point.")
247
248
        if self.__density_threshold < 0:
249
            raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % (self.__density_threshold))
250
251
252
    def __build_blocks(self):
253
        min_corner, max_corner = data_corners(self.__data)
254
        root_block = bang_block(self.__data, 0, 0, 0, max_corner, min_corner, self.__levels == 1)
255
256
        level_blocks = [root_block]
257
        self.__blocks.append(level_blocks)
258
259
        for level in range(1, self.__levels):
260
            cache_points = (level == self.__levels - 1)
261
            level_blocks = self.__build_next_level_blocks(level_blocks, cache_points)
262
            level_blocks = sorted(level_blocks, key=lambda block: block.get_density())
263
            self.__blocks.append(level_blocks)
264
265
266
    def __build_next_level_blocks(self, level_blocks, cache_points):
267
        next_level_blocks = []
268
        for block in level_blocks:
269
            left, right = block.split(cache_points)
270
271
            next_level_blocks.append(left)
272
            next_level_blocks.append(right)
273
274
        return next_level_blocks
275
276
277
    def __allocate_clusters(self):
278
        level_blocks = self.__blocks[-1]
279
        unhandled_block_indexes = set([i for i in range(len(level_blocks)) if level_blocks[i].get_density() > self.__density_threshold])
280
        appropriate_block_indexes = set(unhandled_block_indexes)
281
282
        current_block = self.__find_block_center(level_blocks)
283
        cluster_index = 0
284
285
        while current_block is not None:
286
            if (current_block.get_density() <= self.__density_threshold):
287
                break
288
289
            current_block.set_cluster(cluster_index)
290
291
            neighbors = self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
292
            for neighbor in neighbors:
293
                neighbor.set_cluster(cluster_index)
294
                neighbors += self.__find_block_neighbors(neighbor, level_blocks, unhandled_block_indexes)
295
296
            current_block = self.__find_block_center(level_blocks)
297
            cluster_index += 1
298
299
        self.__clusters = [[] for _ in range(cluster_index)]
300
        for appropriate_index in appropriate_block_indexes:
301
            block = level_blocks[appropriate_index]
302
            index = block.get_cluster()
303
            if index is not None:
304
                self.__clusters[index] += block.get_points()
305
            else:
306
                self.__noise += block.get_points()
307
308
        self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
309
310
311
    def __find_block_center(self, level_blocks):
312
        for i in reversed(range(len(level_blocks))):
313
            if level_blocks[i].get_cluster() is None:
314
                return level_blocks[i]
315
316
        return None
317
318
319
    def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
320
        neighbors = []
321
322
        handled_block_indexes = []
323
        for unhandled_index in unhandled_block_indexes:
324
            if block.is_neighbor(level_blocks[unhandled_index]):
325
                handled_block_indexes.append(unhandled_index)
326
                neighbors.append(level_blocks[unhandled_index])
327
328
                # Maximum number of neighbors is eight
329
                if len(neighbors) == 8:
330
                    break
331
332
        for handled_index in handled_block_indexes:
333
            unhandled_block_indexes.remove(handled_index)
334
335
        return neighbors
336