Completed
Push — master ( 6ed467...f3f897 )
by Andrei
01:29
created

optics.get_cluster_encoding()   A

Complexity

Conditions 1

Size

Total Lines 11

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
dl 0
loc 11
rs 9.4285
c 0
b 0
f 0
1
"""!
2
3
@brief Cluster analysis algorithm: OPTICS (Ordering Points To Identify Clustering Structure)
4
@details Based on article description:
5
         - M.Ankerst, M.Breunig, H.Kriegel, J.Sander. OPTICS: Ordering Points To Identify the Clustering Structure. 1999.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2016
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
    
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
    
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
29
from pyclustering.cluster.encoder import type_encoding;
30
31
from pyclustering.utils import euclidean_distance;
32
33
import matplotlib.pyplot as plt;
0 ignored issues
show
Configuration introduced by
The import matplotlib.pyplot could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
34
35
36
class ordering_visualizer:
37
    """!
38
    @brief Cluster ordering diagram visualizer that represents dataset graphically.
39
    
40
    """
41
    
42
    @staticmethod
43
    def show_ordering_diagram(analyser):
44
        ordering = analyser.cluster_ordering;
45
        indexes = [i for i in range(0, len(ordering))];
46
        
47
        plt.bar(indexes, ordering);
48
        plt.show();
49
50
51
class ordering_analyser:
52
    """!
53
    @brief Analyser of cluster ordering diagram.
54
    
55
    """
56
    
57
    @property
58
    def cluster_ordering(self):
59
        """!
60
        @brief (list) Returns values of dataset cluster ordering.
61
        
62
        """
63
        return self.__ordering;
64
    
65
    
66
    def __init__(self, ordering_diagram):
67
        """!
68
        @brief Analyser of ordering diagram that is based on reachability-distances.
69
        
70
        @see calculate_connvectivity_radius
71
        
72
        """
73
        self.__ordering = ordering_diagram;
74
    
75
    
76
    def __len__(self):
77
        """!
78
        @brief Returns length of clustering-ordering diagram.
79
        
80
        """
81
        return len(self.__ordering);
82
    
83
    
84
    def calculate_connvectivity_radius(self, amount_clusters):
85
        """!
86
        @brief Calculates connectivity radius of allocation specified amount of clusters using ordering diagram.
87
        
88
        @return (double) Value of connectivity radius.
89
        
90
        """
91
        
92
        maximum_distance = max(self.__ordering);
93
        
94
        upper_distance = maximum_distance;
95
        lower_distance = 0.0;
96
        
97
        radius = None;
98
        
99
        if (self.extract_cluster_amount(maximum_distance) <= amount_clusters):
100
            while(True):
101
                radius = (lower_distance + upper_distance) / 2.0;
102
                
103
                amount = self.extract_cluster_amount(radius);
104
                if (amount == amount_clusters):
105
                    break;
106
                
107
                elif (amount > amount_clusters):
108
                    lower_distance = radius;
109
                
110
                elif (amount < amount_clusters):
111
                    upper_distance = radius;
112
        
113
        return radius;
114
    
115
    
116
    def extract_cluster_amount(self, radius):
117
        """!
118
        @brief Obtains amount of clustering that can be allocated by using specified radius for ordering diagram.
119
        @details When growth of reachability-distances is detected than it is considered as a start point of cluster, 
120
                 than pick is detected and after that recession is observed until new growth (that means end of the
121
                 current cluster and start of a new one) or end of diagram.
122
        
123
        @param[in] ordering (list): list of reachability-distances of optics objects.
124
        @param[in] radius (double): connectivity radius that is used for cluster allocation.
125
        
126
        
127
        @return (unit) Amount of clusters that can be allocated by the connectivity radius on ordering diagram.
128
        
129
        """
130
        
131
        amount_clusters = 1;
132
        
133
        cluster_start = False;
134
        cluster_pick = False;
135
        previous_distance = 0.0;
136
        
137
        for distance in self.__ordering:
138
            if (distance >= radius):
139
                if (cluster_start is False):
140
                    cluster_start = True;
141
                    amount_clusters += 1;
142
                
143
                else:
144
                    if ((distance < previous_distance) and (cluster_pick is False)):
145
                        cluster_pick = True;
146
                    
147
                    elif ((distance > previous_distance) and (cluster_pick is True)):
148
                        cluster_pick = False;
149
                        amount_clusters += 1;
150
                
151
                previous_distance = distance;
152
            
153
            else:
154
                cluster_start = False;
155
                cluster_pick = False;
156
        
157
        return amount_clusters;
158
159
160
class optics_descriptor:
161
    """!
162
    @brief Object description that used by OPTICS algorithm for cluster analysis.
163
    
164
    """
165
    
166
    def __init__(self, index, core_distance = None, reachability_distance = None):
167
        """!
168
        @brief Constructor of object description in optics terms.
169
        
170
        @param[in] index (uint): Index of the object in the data set.
171
        @param[in] core_distance (double): Core distance that is minimum distance to specified number of neighbors.
172
        @param[in] reachability_distance (double): Reachability distance to this object.
173
        
174
        """
175
        
176
        ## Reachability distance - the smallest distance to be reachable by core object.
177
        self.index_object = index;
178
        
179
        ## Core distance - the smallest distance to reach specified number of neighbors that is not greater then connectivity radius.
180
        self.core_distance = core_distance;
181
        
182
        ## Index of object from the input data.
183
        self.reachability_distance = reachability_distance;
184
        
185
        ## True is object has been already traversed.
186
        self.processed = False;
187
        
188
    def __repr__(self):
189
        """!
190
        @brief Returns string representation of the optics descriptor.
191
        
192
        """
193
        
194
        return '(%s, [c: %s, r: %s])' % (self.index_object, self.core_distance, self.reachability_distance);
195
196
197
class optics:
198
    """!
199
    @brief Class represents clustering algorithm OPTICS (Ordering Points To Identify Clustering Structure).
200
    @details OPTICS is a density-based algorithm. Purpose of the algorithm is to provide explicit clusters, but create clustering-ordering representation of the input data. 
201
             Clustering-ordering information contains information about internal structures of data set in terms of density and proper connectivity radius can be obtained
202
             for allocation required amount of clusters using this diagram. In case of usage additional input parameter 'amount of clusters' connectivity radius should be
203
             bigger than real - because it will be calculated by the algorithms.
204
205
    Example:
206
    @code
207
        # Read sample for clustering from some file
208
        sample = read_sample(path_sample);
209
        
210
        # Create OPTICS algorithm for cluster analysis
211
        optics_instance = optics(sample, 0.5, 6);
212
        
213
        # Run cluster analysis
214
        optics_instance.process();
215
        
216
        # Obtain results of clustering
217
        clusters = optics_instance.get_clusters();
218
        noise = optics_instance.get_noise();
219
        
220
        # Obtain rechability-distances
221
        ordering = ordering_analyser(optics_instance.get_ordering());
222
        
223
        # Visualization of cluster ordering in line with reachability distance.
224
        ordering_visualizer.show_ordering_diagram(ordering);
225
    @endcode
226
    
227
    Amount of clusters that should be allocated can be also specified. In this case connectivity radius should be greater than real, for example:
228
    @code
229
        # Import required packages
230
        from pyclustering.cluster.optics import optics;
231
        from pyclustering.samples.definitions import FCPS_SAMPLES;
232
        from pyclustering.utils import read_sample;
233
        
234
        # Read sample for clustering from some file
235
        sample = read_sample(FCPS_SAMPLES.SAMPLE_LSUN);
236
        
237
        # Run cluster analysis where connvectivity radius is bigger than real
238
        radius = 2.0;
239
        neighbors = 3;
240
        amount_of_clusters = 3;
241
        
242
        optics_instance = optics(sample, radius, neighbors, amount_of_clusters);
243
        
244
        # Obtain results of clustering
245
        clusters = optics_instance.get_clusters();
246
        noise = optics_instance.get_noise();
247
    @endcode
248
    
249
    """
250
    
251
    def __init__(self, sample, eps, minpts, amount_clusters = None):
252
        """!
253
        @brief Constructor of clustering algorithm OPTICS.
254
        
255
        @param[in] sample (list): Input data that is presented as a list of points (objects), where each point is represented by list or tuple.
256
        @param[in] eps (double): Connectivity radius between points, points may be connected if distance between them less than the radius.
257
        @param[in] minpts (uint): Minimum number of shared neighbors that is required for establishing links between points.
258
        @param[in] amount_clusters (uint): Optional parameter where amount of clusters that should be allocated is specified.
259
                    In case of usage 'amount_clusters' connectivity radius can be greater than real, in other words, there is place for mistake
260
                    in connectivity radius usage.
261
        
262
        """
263
        
264
        self.__sample_pointer = sample;     # Algorithm parameter - pointer to sample for processing.
265
        self.__eps = eps;                   # Algorithm parameter - connectivity radius between object for establish links between object.
266
        self.__minpts = minpts;             # Algorithm parameter - minimum number of neighbors that is required for establish links between object.
267
        self.__amount_clusters = amount_clusters;
268
        self.__ordering = None;
269
        
270
        self.__initialize(sample);
271
272
273
    def process(self):
274
        """!
275
        @brief Performs cluster analysis in line with rules of OPTICS algorithm.
276
        
277
        @remark Results of clustering can be obtained using corresponding gets methods.
278
        
279
        @see get_clusters()
280
        @see get_noise()
281
        @see get_ordering()
282
        
283
        """
284
        
285
        self.__allocate_clusters();
286
        
287
        if ( (self.__amount_clusters is not None) and (self.__amount_clusters != len(self.get_clusters())) ):
288
            analyser = ordering_analyser(self.get_ordering());
289
            radius = analyser.calculate_connvectivity_radius(self.__amount_clusters);
290
            if (radius is not None):
291
                self.__eps = radius;
292
                self.__initialize(self.__sample_pointer);
293
                self.__allocate_clusters();
294
295
296
    def __initialize(self, sample):
297
        """!
298
        @brief Initializes internal states and resets clustering results in line with input sample.
299
        
300
        """
301
        
302
        self.__processed = [False] * len(sample);
303
        self.__optics_objects = [optics_descriptor(i) for i in range(len(sample))];     # List of OPTICS objects that corresponds to objects from input sample.
304
        self.__ordered_database = [];       # List of OPTICS objects in traverse order. 
305
        
306
        self.__clusters = None;     # Result of clustering (list of clusters where each cluster contains indexes of objects from input data).
307
        self.__noise = None;        # Result of clustering (noise).
308
309
310
    def __allocate_clusters(self):
311
        """!
312
        @brief Performs cluster allocation and builds ordering diagram that is based on reachability-distances.
313
        
314
        """
315
        
316
        for optic_object in self.__optics_objects:
317
            if (optic_object.processed is False):
318
                self.__expand_cluster_order(optic_object);
319
        
320
        self.__extract_clusters();
321
    
322
    
323
    def get_clusters(self):
324
        """!
325
        @brief Returns list of allocated clusters, where each cluster contains indexes of objects and each cluster is represented by list.
326
        
327
        @return (list) List of allocated clusters.
328
        
329
        @see process()
330
        @see get_noise()
331
        @see get_ordering()
332
        @see get_radius()
333
        
334
        """
335
        
336
        return self.__clusters;
337
    
338
    
339
    def get_noise(self):
340
        """!
341
        @brief Returns list of noise that contains indexes of objects that corresponds to input data.
342
        
343
        @return (list) List of allocated noise objects.
344
        
345
        @see process()
346
        @see get_clusters()
347
        @see get_ordering()
348
        @see get_radius()
349
        
350
        """
351
        
352
        return self.__noise;
353
    
354
    
355
    def get_ordering(self):
356
        """!
357
        @brief Returns clustering ordering information about the input data set.
358
        @details Clustering ordering of data-set contains the information about the internal clustering structure in line with connectivity radius.
359
        
360
        @return (ordering_analyser) Analyser of clustering ordering.
361
        
362
        @see process()
363
        @see get_clusters()
364
        @see get_noise()
365
        @see get_radius()
366
        
367
        """
368
        
369
        if (self.__ordering is None):
370
            self.__ordering = [];
371
        
372
            for cluster in self.__clusters:
373
                for index_object in cluster:
374
                    optics_object = self.__optics_objects[index_object];
375
                    if (optics_object.reachability_distance is not None):
376
                        self.__ordering.append(optics_object.reachability_distance);
377
            
378
        return self.__ordering;
379
    
380
    
381
    def get_radius(self):
382
        """!
383
        @brief Returns connectivity radius that is calculated and used for clustering by the algorithm.
384
        @details Connectivity radius may be changed only in case of usage additional parameter of the algorithm - amount of clusters for allocation.
385
        
386
        @return (double) Connectivity radius.
387
        
388
        @see get_ordering()
389
        @see get_clusters()
390
        @see get_noise()
391
        
392
        """
393
        
394
        return self.__eps;
395
    
396
397
    def get_cluster_encoding(self):
398
        """!
399
        @brief Returns clustering result representation type that indicate how clusters are encoded.
400
        
401
        @return (type_encoding) Clustering result representation.
402
        
403
        @see get_clusters()
404
        
405
        """
406
        
407
        return type_encoding.CLUSTER_INDEX_LIST_SEPARATION;
408
409
410
    def __expand_cluster_order(self, optics_object):
411
        """!
412
        @brief Expand cluster order from not processed optic-object that corresponds to object from input data.
413
               Traverse procedure is performed until objects are reachable from core-objects in line with connectivity radius.
414
               Order database is updated during expanding.
415
               
416
        @param[in] optics_object (optics_descriptor): Object that hasn't been processed.
417
        
418
        """
419
        
420
        optics_object.processed = True;
421
        
422
        neighbors_descriptor = self.__neighbor_indexes(optics_object);
423
        optics_object.reachability_distance = None;
424
        
425
        self.__ordered_database.append(optics_object);
426
        
427
        # Check core distance
428
        if (len(neighbors_descriptor) >= self.__minpts):
429
            neighbors_descriptor.sort(key = lambda obj: obj[1]);
430
            optics_object.core_distance = neighbors_descriptor[self.__minpts - 1][1];
431
            
432
            # Continue processing
433
            order_seed = list();
434
            self.__update_order_seed(optics_object, neighbors_descriptor, order_seed);
435
            
436
            while(len(order_seed) > 0):
437
                optic_descriptor = order_seed[0];
438
                order_seed.remove(optic_descriptor);
439
                
440
                neighbors_descriptor = self.__neighbor_indexes(optic_descriptor);
441
                optic_descriptor.processed = True;
442
                
443
                self.__ordered_database.append(optic_descriptor);
444
                
445
                if (len(neighbors_descriptor) >= self.__minpts):
446
                    neighbors_descriptor.sort(key = lambda obj: obj[1]);
447
                    optic_descriptor.core_distance = neighbors_descriptor[self.__minpts - 1][1];
448
                    
449
                    self.__update_order_seed(optic_descriptor, neighbors_descriptor, order_seed);
450
                else:
451
                    optic_descriptor.core_distance = None;
452
                    
453
        else:
454
            optics_object.core_distance = None;
455
456
    
457
    def __extract_clusters(self):
458
        """!
459
        @brief Extract clusters and noise from order database.
460
        
461
        """
462
     
463
        self.__clusters = [];
464
        self.__noise = [];
465
466
        current_cluster = [];
467
        for optics_object in self.__ordered_database:
468
            if ((optics_object.reachability_distance is None) or (optics_object.reachability_distance > self.__eps)):
469
                if ((optics_object.core_distance is not None) and (optics_object.core_distance <= self.__eps)):
470
                    if (len(current_cluster) > 0):
471
                        self.__clusters.append(current_cluster);
472
                        current_cluster = [];
473
                        
474
                    current_cluster.append(optics_object.index_object);
475
                else:
476
                    self.__noise.append(optics_object.index_object);
477
            else:
478
                current_cluster.append(optics_object.index_object);
479
        
480
        if (len(current_cluster) > 0):
481
            self.__clusters.append(current_cluster);
482
                
483
484
    def __update_order_seed(self, optic_descriptor, neighbors_descriptors, order_seed):
485
        """!
486
        @brief Update sorted list of reachable objects (from core-object) that should be processed using neighbors of core-object.
487
        
488
        @param[in] optic_descriptor (optics_descriptor): Core-object whose neighbors should be analysed.
489
        @param[in] neighbors_descriptors (list): List of neighbors of core-object.
490
        @param[in|out] order_seed (list): List of sorted object in line with reachable distance.
491
        
492
        """
493
        
494
        for neighbor_descriptor in neighbors_descriptors:
495
            index_neighbor = neighbor_descriptor[0];
496
            current_reachable_distance = neighbor_descriptor[1];
497
            
498
            if (self.__optics_objects[index_neighbor].processed != True):
499
                reachable_distance = max(current_reachable_distance, optic_descriptor.core_distance);
500
                if (self.__optics_objects[index_neighbor].reachability_distance is None):
501
                    self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
502
                    
503
                    # insert element in queue O(n) - worst case.
504
                    index_insertion = len(order_seed);
505
                    for index_seed in range(0, len(order_seed)):
506
                        if (reachable_distance < order_seed[index_seed].reachability_distance):
507
                            index_insertion = index_seed;
508
                            break;
509
                    
510
                    order_seed.insert(index_insertion, self.__optics_objects[index_neighbor]);
511
512
                else:
513
                    if (reachable_distance < self.__optics_objects[index_neighbor].reachability_distance):
514
                        self.__optics_objects[index_neighbor].reachability_distance = reachable_distance;
515
                        order_seed.sort(key = lambda obj: obj.reachability_distance);
516
517
518
    def __neighbor_indexes(self, optic_object):
519
        """!
520
        @brief Return list of indexes of neighbors of specified point for the data.
521
        
522
        @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
523
        
524
        @return (list) List of indexes of neighbors in line the connectivity radius.
525
        
526
        """
527
              
528
        neighbor_description = [];
529
        
530
        for index in range(0, len(self.__sample_pointer), 1):
531
            if (index == optic_object.index_object):
532
                continue;
533
            
534
            distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
535
            if (distance <= self.__eps):
536
                neighbor_description.append( [index, distance] );
537
            
538
        return neighbor_description;