Completed
Push — 0.8.dev ( f6cd20...d2eb6e )
by Andrei
01:07
created

bang_block.is_neighbor()   F

Complexity

Conditions 9

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 9
dl 0
loc 26
rs 3
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: BANG.
4
@details Implementation based on paper @cite inproceedings::bang::1.
5
6
@authors Andrei Novikov ([email protected])
7
@date 2014-2018
8
@copyright GNU Public License
9
10
@cond GNU_PUBLIC_LICENSE
11
    PyClustering is free software: you can redistribute it and/or modify
12
    it under the terms of the GNU General Public License as published by
13
    the Free Software Foundation, either version 3 of the License, or
14
    (at your option) any later version.
15
16
    PyClustering is distributed in the hope that it will be useful,
17
    but WITHOUT ANY WARRANTY; without even the implied warranty of
18
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19
    GNU General Public License for more details.
20
21
    You should have received a copy of the GNU General Public License
22
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
23
@endcond
24
25
"""
26
27
28
import matplotlib.pyplot as plt
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
29
import matplotlib.patches as patches
0 ignored issues
show
Configuration introduced by
The import matplotlib.patches could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
30
31
from pyclustering.utils import data_corners
32
33
from pyclustering.cluster import cluster_visualizer
34
35
36
# TODO: Store blocks in block directory and control them via directory
37
38
class bang_visualizer:
39
    @staticmethod
40
    def show_level_blocks(data, level_blocks):
41
        visualizer = cluster_visualizer()
42
        visualizer.append_cluster(data)
43
44
        figure = visualizer.show(display=False)
45
46
        bang_visualizer.__draw_blocks(figure, 0, level_blocks)
47
        plt.show();
48
49
50
    @staticmethod
51
    def __draw_blocks(figure, offset, level_blocks):
52
        ax = figure.get_axes()[offset];
53
        ax.grid(False)
54
55
        for block in level_blocks:
56
            bang_visualizer.__draw_block(ax, block)
57
58
59
    @staticmethod
60
    def __draw_block(ax, block):
61
        max_corner, min_corner = block.get_corners()
62
        belong_cluster = block.get_cluster() is not None
63
64
        if len(max_corner) == 2:
65
            rect = patches.Rectangle(min_corner, max_corner[0] - min_corner[0], max_corner[1] - min_corner[1],
66
                                     fill=belong_cluster,
67
                                     alpha=0.5)
68
            ax.add_patch(rect)
69
70
            xlabel = (max_corner[0] + min_corner[0]) / 2.0
71
            ylabel = (max_corner[1] + min_corner[1]) / 2.0
72
            ax.text(xlabel, ylabel, str(block.get_region()), ha="center", va="center")
73
74
        else:
75
            raise ValueError("Impossible to display blocks on non-2D dimensional data.")
76
77
78
class bang_block:
79
    def __init__(self, data, dimension, region, level, max_corner, min_corner, cache_points=False):
80
        self.__region_number = region
81
        self.__level = level
82
        self.__data = data
83
        self.__dimension = dimension
84
        self.__max_corner = max_corner
85
        self.__min_corner = min_corner
86
        self.__cache_points = cache_points
87
        self.__cluster = None
88
        self.__points = None
89
        self.__density = self.__calculate_density()
90
91
92
    def __str__(self):
93
        return "(" + str(self.__region_number) + ", " + str(self.__level) + ")"
94
95
96
    def get_region(self):
97
        return self.__region_number
98
99
100
    def get_level(self):
101
        return self.__level
102
103
104
    def get_density(self):
105
        return self.__density
106
107
108
    def get_cluster(self):
109
        return self.__cluster
110
111
112
    def get_corners(self):
113
        return self.__max_corner, self.__min_corner
114
115
116
    def get_points(self):
117
        if self.__points is not None:
118
            return self.__points
119
120
        # TODO: return for upper level - traverse tree is preferable than whole data with calculation
121
        return [index for index in range(len(self.__data)) if self.contained(self.__data[index])]
122
123
124
    def set_cluster(self, index):
125
        self.__cluster = index
126
127
128
    def is_neighbor(self, block):
129
        if block is self:
130
            return False;
131
132
        if block.get_region() == 17 and self.__region_number == 1:
133
            print("Error case")
134
135
        block_max_corner, block_min_corner = block.get_corners()
0 ignored issues
show
Unused Code introduced by
The variable block_min_corner seems to be unused.
Loading history...
136
137
        similarity_counter = 0
138
        dimension = len(block_max_corner)
139
140
        length_edges = [self.__max_corner[i] - self.__min_corner[i] for i in range(dimension)]
141
        tolerances = [length_edge * 0.0001 for length_edge in length_edges]
142
143
        for i in range(dimension):
144
            diff = abs(block_max_corner[i] - self.__max_corner[i])
145
            if diff <= length_edges[i] + tolerances[i]:
146
                similarity_counter += 1
147
148
        print(self.get_region(), block.get_region(), similarity_counter)
149
150
        if similarity_counter == dimension:
151
            return True
152
153
        return False
154
155
156
    def split(self, cache_points):
157
        left_region_number = self.__region_number
158
        right_region_number = self.__region_number + 2 ** self.__level
159
160
        dimension = self.__dimension + 1
161
        if dimension >= len(self.__data[0]):
162
            dimension = 0
163
164
        first_max_corner = self.__max_corner[:]
165
        first_min_corner = self.__min_corner[:]
166
        second_max_corner = self.__max_corner[:]
167
        second_min_corner = self.__min_corner[:]
168
169
        split_border = (self.__max_corner[dimension] + self.__min_corner[dimension]) / 2.0
170
        first_max_corner[dimension] = split_border
171
        second_min_corner[dimension] = split_border
172
173
        left = bang_block(self.__data, dimension, left_region_number, self.__level + 1, first_max_corner, first_min_corner, cache_points)
174
        right = bang_block(self.__data, dimension, right_region_number, self.__level + 1, second_max_corner, second_min_corner, cache_points)
175
176
        return left, right
177
178
179
    def contained(self, point):
180
        for i in range(len(point)):
181
            if point[i] < self.__min_corner[i] or point[i] > self.__max_corner[i]:
182
                return False
183
184
        return True
185
186
187
    def __calculate_density(self):
188
        volume = self.__max_corner[0] - self.__min_corner[0]
189
        for i in range(1, len(self.__max_corner)):
190
            volume *= self.__max_corner[i] - self.__min_corner[i]
191
192
        amount = self.__get_amount_points()
193
        return amount / volume
194
195
196
    def __get_amount_points(self):
197
        amount = 0
198
        for index in range(len(self.__data)):
199
            if self.contained(self.__data[index]):
200
                amount += 1
201
202
                if self.__cache_points:
203
                    if self.__points is None:
204
                        self.__points = []
205
206
                    self.__points.append(index)
207
208
        return amount
209
210
211
212
class bang:
213
    def __init__(self, data, levels, density_threshold = 0.0):
214
        self.__data = data
215
        self.__levels = levels
216
        self.__blocks = []
217
        self.__clusters = []
218
        self.__noise = []
219
        self.__cluster_blocks = []
220
        self.__density_threshold = density_threshold
221
222
223
    def process(self):
224
        self.__validate_arguments()
225
226
        self.__build_blocks()
227
        self.__allocate_clusters()
228
229
230
    def get_clusters(self):
231
        return self.__clusters
232
233
234
    def get_noise(self):
235
        return self.__noise
236
237
238
    def get_level_blocks(self, level=-1):
239
        return self.__blocks[level]
240
241
242
    def __validate_arguments(self):
243
        if self.__levels <= 0:
244
            raise ValueError("Incorrect amount of levels '%d'. Level value should be greater than 0." % (self.__levels))
245
246
        if len(self.__data) == 0:
247
            raise ValueError("Empty input data. Data should contain at least one point.")
248
249
        if self.__density_threshold < 0:
250
            raise ValueError("Incorrect density threshold '%f'. Density threshold should not be negative." % (self.__density_threshold))
251
252
253
    def __build_blocks(self):
254
        min_corner, max_corner = data_corners(self.__data)
255
        root_block = bang_block(self.__data, 0, 0, 0, max_corner, min_corner, self.__levels == 1)
256
257
        level_blocks = [root_block]
258
        self.__blocks.append(level_blocks)
259
260
        for level in range(1, self.__levels):
261
            cache_points = (level == self.__levels - 1)
262
            level_blocks = self.__build_next_level_blocks(level_blocks, cache_points)
263
            level_blocks = sorted(level_blocks, key=lambda block: block.get_density())
264
            self.__blocks.append(level_blocks)
265
266
267
    def __build_next_level_blocks(self, level_blocks, cache_points):
268
        next_level_blocks = []
269
        for block in level_blocks:
270
            left, right = block.split(cache_points)
271
272
            next_level_blocks.append(left)
273
            next_level_blocks.append(right)
274
275
        return next_level_blocks
276
277
278
    def __allocate_clusters(self):
279
        level_blocks = self.__blocks[-1]
280
        unhandled_block_indexes = set([i for i in range(len(level_blocks)) if level_blocks[i].get_density() > self.__density_threshold])
281
        appropriate_block_indexes = set(unhandled_block_indexes)
282
283
        current_block = self.__find_block_center(level_blocks)
284
        cluster_index = 0
285
286
        while current_block is not None:
287
            if (current_block.get_density() <= self.__density_threshold):
288
                break
289
290
            current_block.set_cluster(cluster_index)
291
292
            neighbors = self.__find_block_neighbors(current_block, level_blocks, unhandled_block_indexes)
293
            for neighbor in neighbors:
294
                neighbor.set_cluster(cluster_index)
295
                neighbors += self.__find_block_neighbors(neighbor, level_blocks, unhandled_block_indexes)
296
297
            current_block = self.__find_block_center(level_blocks)
298
            cluster_index += 1
299
300
        self.__clusters = [[] for _ in range(cluster_index)]
301
        for appropriate_index in appropriate_block_indexes:
302
            block = level_blocks[appropriate_index]
303
            index = block.get_cluster()
304
            if index is not None:
305
                self.__clusters[index] += block.get_points()
306
            else:
307
                self.__noise += block.get_points()
308
309
        self.__clusters = [ list(set(cluster)) for cluster in self.__clusters ]
310
311
312
    def __find_block_center(self, level_blocks):
313
        for i in reversed(range(len(level_blocks))):
314
            if level_blocks[i].get_cluster() is None:
315
                return level_blocks[i]
316
317
        return None
318
319
320
    def __find_block_neighbors(self, block, level_blocks, unhandled_block_indexes):
321
        neighbors = []
322
323
        handled_block_indexes = []
324
        for unhandled_index in unhandled_block_indexes:
325
            if block.is_neighbor(level_blocks[unhandled_index]):
326
                handled_block_indexes.append(unhandled_index)
327
                neighbors.append(level_blocks[unhandled_index])
328
329
                # Maximum number of neighbors is four
330
                if len(neighbors) == 4:
331
                    break
332
333
        for handled_index in handled_block_indexes:
334
            unhandled_block_indexes.remove(handled_index)
335
336
        return neighbors
337