Completed
Push — master ( b517ee...6fa201 )
by Andrei
02:20
created

optics.__calculate_connectivity_radius()   B

Complexity

Conditions 6

Size

Total Lines 32

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
dl 0
loc 32
rs 7.5384
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4
@details Based on article description:
5
         - M.Ankerst, M.Breunig, H.Kriegel, J.Sander. OPTICS: Ordering Points To Identify the Clustering Structure. 1999.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2016
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
    
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
    
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
from pyclustering.utils import euclidean_distance;
29
30
import matplotlib.pyplot as plt;
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
31
32
33
class ordering_visualizer:
34
    """!
35
    @brief Cluster ordering diagram visualizer that represents dataset graphically.
36
    
37
    """
38
    
39
    @staticmethod
40
    def show_ordering_diagram(analyser):
41
        ordering = analyser.cluster_ordering;
42
        indexes = [i for i in range(0, len(ordering))];
43
        
44
        plt.bar(indexes, ordering);
45
        plt.show();
46
47
48
class ordering_analyser:
49
    """!
50
    @brief Analyser of cluster ordering diagram.
51
    
52
    """
53
    
54
    @property
55
    def cluster_ordering(self):
56
        """!
57
        @brief (list) Returns values of dataset cluster ordering.
58
        
59
        """
60
        return self.__ordering;
61
    
62
    
63
    def __init__(self, ordering_diagram):
64
        """!
65
        @brief Analyser of ordering diagram that is based on reachability-distances.
66
        
67
        @see calculate_connvectivity_radius
68
        
69
        """
70
        self.__ordering = ordering_diagram;
71
    
72
    
73
    def __len__(self):
74
        """!
75
        @brief Returns length of clustering-ordering diagram.
76
        
77
        """
78
        return len(self.__ordering);
79
    
80
    
81
    def calculate_connvectivity_radius(self, amount_clusters):
82
        """!
83
        @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram.
84
        
85
        @return (double) Value of connectivity radius.
86
        
87
        """
88
        
89
        maximum_distance = max(self.__ordering);
90
        
91
        upper_distance = maximum_distance;
92
        lower_distance = 0.0;
93
        
94
        radius = None;
95
        
96
        if (self.extract_cluster_amount(maximum_distance) <= amount_clusters):
97
            while(True):
98
                radius = (lower_distance + upper_distance) / 2.0;
99
                
100
                amount = self.extract_cluster_amount(radius);
101
                if (amount == amount_clusters):
102
                    break;
103
                
104
                elif (amount > amount_clusters):
105
                    lower_distance = radius;
106
                
107
                elif (amount < amount_clusters):
108
                    upper_distance = radius;
109
        
110
        return radius;
111
    
112
    
113
    def extract_cluster_amount(self, radius):
114
        """!
115
        @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram.
116
        @details When growth of reachability-distances is detected than it is considered as a start point of cluster, 
117
                 than pick is detected and after that recession is observed until new growth (that means end of the
118
                 current cluster and start of a new one) or end of diagram.
119
        
120
        @param[in] ordering (list): list of reachability-distances of optics objects.
121
        @param[in] radius (double): connectivity radius that is used for cluster allocation.
122
        
123
        
124
        @return (unit) Amount of clusters that can be allocated by the connectivity radius on ordering diagram.
125
        
126
        """
127
        
128
        amount_clusters = 1;
129
        
130
        cluster_start = False;
131
        cluster_pick = False;
132
        previous_distance = 0;
133
        
134
        for distance in self.__ordering:
135
            if (distance >= radius):
136
                if (cluster_start is False):
137
                    amount_clusters += 1;
138
                    cluster_start = True;
139
                
140
                elif ( (cluster_start is True) and (distance < previous_distance) and (cluster_pick is False) ):
141
                    cluster_pick = True;
142
                
143
                elif ( (cluster_start is True) and (distance > previous_distance) and (cluster_pick is True) ):
144
                    cluster_start = True;
145
                    cluster_pick = False;
146
                    
147
                    amount_clusters += 1;
148
                
149
                previous_distance = distance;
150
            
151
            else:
152
                cluster_start = False;
153
                cluster_pick = False;
154
        
155
        return amount_clusters;
156
157
158
class optics_descriptor:
159
    """!
160
    @brief Object description that used by OPTICS algorithm for cluster analysis.
161
    
162
    """
163
    
164
    def __init__(self, index, core_distance = None, reachability_distance = None):
165
        """!
166
        @brief Constructor of object description in optics terms.
167
        
168
        @param[in] index (uint): Index of the object in the data set.
169
        @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
170
        @param[in] reachability_distance (double): Reachability distance to this object.
171
        
172
        """
173
        
174
        ## Reachability distance - the smallest distance to be reachable by core object.
175
        self.index_object = index;
176
        
177
        ## Core distance - the smallest distance to reach specified number of neighbors that is not greater then connectivity radius.
178
        self.core_distance = core_distance;
179
        
180
        ## Index of object from the input data.
181
        self.reachability_distance = reachability_distance;
182
        
183
        ## True is object has been already traversed.
184
        self.processed = False;
185
        
186
    def __repr__(self):
187
        """!
188
        @brief Returns string representation of the optics descriptor.
189
        
190
        """
191
        
192
        return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance);
193
194
195
class optics:
196
    """!
197
    @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure).
198
    @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data. 
199
             Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
200
             for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
201
             bigger than real - because it will be calculated by the algorithms.
202
203
    Example:
204
    @code
205
        # Read sample for clustering from some file
206
        sample = read_sample(path_sample);
207
        
208
        # Create OPTICS algorithm for cluster analysis
209
        optics_instance = optics(sample, 0.5, 6);
210
        
211
        # Run cluster analysis
212
        optics_instance.process();
213
        
214
        # Obtain results of clustering
215
        clusters = optics_instance.get_clusters();
216
        noise = optics_instance.get_noise();
217
        
218
        # Obtain rechability-distances
219
        ordering = ordering_analyser(optics_instance.get_ordering());
220
        
221
        # Visualization of cluster ordering in line with reachability distance.
222
        ordering_visualizer.show_ordering_diagram(ordering);
223
    @endcode
224
    
225
    Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
226
    @code
227
        # Import required packages
228
        from pyclustering.cluster.optics import optics;
229
        from pyclustering.samples.definitions import FCPS_SAMPLES;
230
        from pyclustering.utils import read_sample;
231
        
232
        # Read sample for clustering from some file
233
        sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN);
234
        
235
        # Run cluster analysis where connvectivity radius is bigger than real
236
        radius = 2.0;
237
        neighbors = 3;
238
        amount_of_clusters = 3;
239
        
240
        optics_instance = optics(sample, radius, neighbors, amount_of_clusters);
241
        
242
        # Obtain results of clustering
243
        clusters = optics_instance.get_clusters();
244
        noise = optics_instance.get_noise();
245
    @endcode
246
    
247
    """
248
    
249
    def __init__(self, sample, eps, minpts, amount_clusters = None):
250
        """!
251
        @brief Constructor of clustering algorithm OPTICS.
252
        
253
        @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
254
        @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
255
        @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
256
        @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
257
                    In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
258
                    in connectivity radius usage.
259
        
260
        """
261
        
262
        self.__sample_pointer = sample;     # Algorithm parameter - pointer to sample for processing.
263
        self.__eps = eps;                   # Algorithm parameter - connectivity radius between object for establish links between object.
264
        self.__minpts = minpts;             # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
265
        self.__amount_clusters = amount_clusters;
266
        self.__ordering = None;
267
        
268
        self.__initialize(sample);
269
270
271
    def process(self):
272
        """!
273
        @brief Performs cluster analysis in line with rules of OPTICS algorithm.
274
        
275
        @remark Results of clustering can be obtained using corresponding gets methods.
276
        
277
        @see get_clusters()
278
        @see get_noise()
279
        @see get_ordering()
280
        
281
        """
282
        
283
        self.__allocate_clusters();
284
        
285
        if ( (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())) ):
286
            analyser = ordering_analyser(self.get_ordering());
287
            radius = analyser.calculate_connvectivity_radius(self.__amount_clusters);
288
            if (radius is not None):
289
                self.__eps = radius;
290
                self.__initialize(self.__sample_pointer);
291
                self.__allocate_clusters();
292
293
294
    def __initialize(self, sample):
295
        """!
296
        @brief Initializes internal states and resets clustering results in line with input sample.
297
        
298
        """
299
        
300
        self.__processed = [False] * len(sample);
301
        self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))];     # List of OPTICS objects that corresponds to objects from input sample.
302
        self.__ordered_database = [];       # List of OPTICS objects in traverse order. 
303
        
304
        self.__clusters = None;     # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
305
        self.__noise = None;        # Result of clustering (noise).
306
307
308
    def __allocate_clusters(self):
309
        """!
310
        @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
311
        
312
        """
313
        
314
        for optic_object in self.__optics_objects:
315
            if (optic_object.processed is False):
316
                self.__expand_cluster_order(optic_object);
317
        
318
        self.__extract_clusters();
319
    
320
    
321
    def get_clusters(self):
322
        """!
323
        @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
324
        
325
        @return (list) List of allocated clusters.
326
        
327
        @see process()
328
        @see get_noise()
329
        @see get_ordering()
330
        @see get_radius()
331
        
332
        """
333
        
334
        return self.__clusters;
335
    
336
    
337
    def get_noise(self):
338
        """!
339
        @brief Returns list of noise that contains indexes of objects that corresponds to input data.
340
        
341
        @return (list) List of allocated noise objects.
342
        
343
        @see process()
344
        @see get_clusters()
345
        @see get_ordering()
346
        @see get_radius()
347
        
348
        """
349
        
350
        return self.__noise;
351
    
352
    
353
    def get_ordering(self):
354
        """!
355
        @brief Returns clustering ordering information about the input data set.
356
        @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
357
        
358
        @return (ordering_analyser) Analyser of clustering ordering.
359
        
360
        @see process()
361
        @see get_clusters()
362
        @see get_noise()
363
        @see get_radius()
364
        
365
        """
366
        
367
        if (self.__ordering is None):
368
            self.__ordering = [];
369
        
370
            for cluster in self.__clusters:
371
                for index_object in cluster:
372
                    optics_object = self.__optics_objects[index_object];
373
                    if (optics_object.reachability_distance is not None):
374
                        self.__ordering.append(optics_object.reachability_distance);
375
            
376
        return self.__ordering;
377
    
378
    
379
    def get_radius(self):
380
        """!
381
        @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
382
        @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
383
        
384
        @return (double) Connectivity radius.
385
        
386
        @see get_ordering()
387
        @see get_clusters()
388
        @see get_noise()
389
        
390
        """
391
        
392
        return self.__eps;
393
    
394
    
395
    def __expand_cluster_order(self, optics_object):
396
        """!
397
        @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
398
               Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
399
               Order database is updated during expanding.
400
               
401
        @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
402
        
403
        """
404
        
405
        optics_object.processed = True;
406
        
407
        neighbors_descriptor = self.__neighbor_indexes(optics_object);
408
        optics_object.reachability_distance = None;
409
        
410
        self.__ordered_database.append(optics_object);
411
        
412
        # Check core distance
413
        if (len(neighbors_descriptor) >= self.__minpts):
414
            neighbors_descriptor.sort(key = lambda obj: obj[1]);
415
            optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1];
416
            
417
            # Continue processing
418
            order_seed = list();
419
            self.__update_order_seed(optics_object, neighbors_descriptor, order_seed);
420
            
421
            while(len(order_seed) > 0):
422
                optic_descriptor = order_seed[0];
423
                order_seed.remove(optic_descriptor);
424
                
425
                neighbors_descriptor = self.__neighbor_indexes(optic_descriptor);
426
                optic_descriptor.processed = True;
427
                
428
                self.__ordered_database.append(optic_descriptor);
429
                
430
                if (len(neighbors_descriptor) >= self.__minpts):
431
                    neighbors_descriptor.sort(key = lambda obj: obj[1]);
432
                    optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1];
433
                    
434
                    self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed);
435
                else:
436
                    optic_descriptor.core_distance = None;
437
                    
438
        else:
439
            optics_object.core_distance = None;
440
441
    
442
    def __extract_clusters(self):
443
        """!
444
        @brief Extract clusters and noise from order database.
445
        
446
        """
447
     
448
        self.__clusters = [];
449
        self.__noise = [];
450
451
        current_cluster = [];
452
        for optics_object in self.__ordered_database:
453
            if ((optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps)):
454
                if ((optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps)):
455
                    if (len(current_cluster) > 0):
456
                        self.__clusters.append(current_cluster);
457
                        current_cluster = [];
458
                        
459
                    current_cluster.append(optics_object.index_object);
460
                else:
461
                    self.__noise.append(optics_object.index_object);
462
            else:
463
                current_cluster.append(optics_object.index_object);
464
        
465
        if (len(current_cluster) > 0):
466
            self.__clusters.append(current_cluster);
467
                
468
469
    def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
470
        """!
471
        @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
472
        
473
        @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
474
        @param[in] neighbors_descriptors (list): List of neighbors of core-object.
475
        @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
476
        
477
        """
478
        
479
        for neighbor_descriptor in neighbors_descriptors:
480
            index_neighbor = neighbor_descriptor[0];
481
            current_reachable_distance = neighbor_descriptor[1];
482
            
483
            if (self.__optics_objects[index_neighbor].processed != True):
484
                reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance);
485
                if (self.__optics_objects[index_neighbor].reachability_distance is None):
486
                    self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
487
                    
488
                    # insert element in queue O(n) - worst case.
489
                    index_insertion = len(order_seed);
490
                    for index_seed in range(0, len(order_seed)):
491
                        if (reachable_distance < order_seed[index_seed].reachability_distance):
492
                            index_insertion = index_seed;
493
                            break;
494
                    
495
                    order_seed.insert(index_insertion, self.__optics_objects[index_neighbor]);
496
497
                else:
498
                    if (reachable_distance < self.__optics_objects[index_neighbor].reachability_distance):
499
                        self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
500
                        order_seed.sort(key = lambda obj: obj.reachability_distance);
501
502
503
    def __neighbor_indexes(self, optic_object):
504
        """!
505
        @brief Return list of indexes of neighbors of specified point for the data.
506
        
507
        @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
508
        
509
        @return (list) List of indexes of neighbors in line the connectivity radius.
510
        
511
        """
512
              
513
        neighbor_description = [];
514
        
515
        for index in range(0, len(self.__sample_pointer), 1):
516
            if (index == optic_object.index_object):
517
                continue;
518
            
519
            distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
520
            if (distance <= self.__eps):
521
                neighbor_description.append( [index, distance] );
522
            
523
        return neighbor_description;