Completed
Push — master ( a63a82...61f52c )
by
unknown
01:32
created

agglomerative.process()   A

Complexity

Conditions 4

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 4
dl 0
loc 19
rs 9.2
1
"""!
2
3
@brief Cluster analysis algorithm: agglomerative algorithm.
4
@details Implementation based on book:
5
         - K.Anil, J.C.Dubes, R.C.Dubes. Algorithms for Clustering Data. 1988.
6
7
@authors Andrei Novikov ([email protected])
8
@date 2014-2016
9
@copyright GNU Public License
10
11
@cond GNU_PUBLIC_LICENSE
12
    PyClustering is free software: you can redistribute it and/or modify
13
    it under the terms of the GNU General Public License as published by
14
    the Free Software Foundation, either version 3 of the License, or
15
    (at your option) any later version.
16
    
17
    PyClustering is distributed in the hope that it will be useful,
18
    but WITHOUT ANY WARRANTY; without even the implied warranty of
19
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
    GNU General Public License for more details.
21
    
22
    You should have received a copy of the GNU General Public License
23
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
24
@endcond
25
26
"""
27
28
from enum import IntEnum;
29
30
from pyclustering.utils import euclidean_distance_sqrt;
31
32
import pyclustering.core.agglomerative_wrapper as wrapper;
33
34
35
class type_link(IntEnum):
36
    """!
37
    @brief Enumerator of types of link between clusters.
38
    
39
    """
40
41
    ## Nearest objects in clusters is considered as a link.
42
    SINGLE_LINK = 0;
43
    
44
    ## Farthest objects in clusters is considered as a link.
45
    COMPLETE_LINK = 1;
46
    
47
    ## Average distance between objects in clusters is considered as a link.
48
    AVERAGE_LINK = 2;
49
    
50
    ## Distance between centers of clusters is considered as a link.
51
    CENTROID_LINK = 3;
52
53
54
class agglomerative:
55
    """!
56
    @brief Class represents agglomerative algorithm for cluster analysis.
57
    
58
    Example:
59
    @code
60
        # sample for cluster analysis (represented by list)
61
        sample = read_sample(path_to_sample);
62
        
63
        # create object that uses python code only
64
        agglomerative_instance = agglomerative(sample, 2, link_type.CENTROID_LINK)
65
        
66
        # cluster analysis
67
        agglomerative_instance.process();
68
        
69
        # obtain results of clustering
70
        clusters = agglomerative_instance.get_clusters();  
71
    @endcode
72
    
73
    """
74
    
75
    __pointer_data = None;
76
    __number_clusters = 0;
77
    
78
    __ccore = False;
79
    
80
    __clusters = None;
81
    __similarity = None;
82
    
83
    # some cluster charactestics that may be used or may not be used
84
    __centers = None;   # used in case of usage of centroid links
85
    
86
    
87
    def __init__(self, data, number_clusters, link = None, ccore = False):
88
        """!
89
        @brief Constructor of clustering algorithm hierarchical.
90
        
91
        @param[in] data (list): Input data that is presented as a list of points (objects), each point should be represented by a list or tuple.
92
        @param[in] number_clusters (uint): Number of clusters that should be allocated.
93
        @param[in] link (type_link): Link type that is used for calculation similarity between objects and clusters, if it is not specified centroid link will be used by default.
94
        @param[in] ccore (bool): Defines should be CCORE (C++ pyclustering library) used instead of Python code or not.
95
        
96
        """  
97
        
98
        self.__pointer_data = data;
99
        self.__number_clusters = number_clusters;
100
        self.__similarity = link;
101
        
102
        if (self.__similarity is None):
103
            self.__similarity = type_link.CENTROID_LINK;
104
        
105
        self.__clusters = [];
106
        self.__ccore = ccore;
107
        
108
        if (self.__similarity == type_link.CENTROID_LINK):
109
            self.__centers = self.__pointer_data.copy();
110
    
111
    
112
    def process(self):
113
        """!
114
        @brief Performs cluster analysis in line with rules of agglomerative algorithm and similarity.
115
        
116
        @see get_clusters()
117
        
118
        """
119
        
120
        if (self.__ccore is True):
121
            self.__clusters = wrapper.agglomerative_algorithm(self.__pointer_data, self.__number_clusters, self.__similarity);
122
123
        else:
124
            self.__clusters = [[index] for index in range(0, len(self.__pointer_data))];
125
            
126
            current_number_clusters = len(self.__clusters);
127
                
128
            while (current_number_clusters > self.__number_clusters):
129
                self.__merge_similar_clusters();
130
                current_number_clusters = len(self.__clusters);
131
    
132
    
133
    def get_clusters(self):
134
        """!
135
        @brief Returns list of allocated clusters, each cluster contains indexes of objects in list of data.
136
        
137
        @remark Results of clustering can be obtained using corresponding gets methods.
138
        
139
        @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
140
        
141
        @see process()
142
        
143
        """
144
        
145
        return self.__clusters;
146
    
147
    
148
    def __merge_similar_clusters(self):
149
        """!
150
        @brief Merges the most similar clusters in line with link type.
151
        
152
        """
153
        
154
        if (self.__similarity == type_link.AVERAGE_LINK):
155
            self.__merge_by_average_link();
156
        
157
        elif (self.__similarity == type_link.CENTROID_LINK):
158
            self.__merge_by_centroid_link();
159
        
160
        elif (self.__similarity == type_link.COMPLETE_LINK):
161
            self.__merge_by_complete_link();
162
        
163
        elif (self.__similarity == type_link.SINGLE_LINK):
164
            self.__merge_by_signle_link();
165
        
166
        else:
167
            raise NameError('Not supported similarity is used');
168
    
169
    
170
    def __merge_by_average_link(self):
171
        """!
172
        @brief Merges the most similar clusters in line with average link type.
173
        
174
        """
175
        
176
        minimum_average_distance = float('Inf');
177
        
178
        for index_cluster1 in range(0, len(self.__clusters)):
179
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
180
                
181
                # Find farthest objects
182
                candidate_average_distance = 0.0;
183
                for index_object1 in self.__clusters[index_cluster1]:
184
                    for index_object2 in self.__clusters[index_cluster2]:
185
                        candidate_average_distance += euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
186
                
187
                candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2]));
188
                
189
                if (candidate_average_distance < minimum_average_distance):
190
                    minimum_average_distance = candidate_average_distance;
191
                    indexes = [index_cluster1, index_cluster2];
192
        
193
        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
194
        self.__clusters.pop(indexes[1]);   # remove merged cluster.  
195
        
196
    
197
    def __merge_by_centroid_link(self):
198
        """!
199
        @brief Merges the most similar clusters in line with centroid link type.
200
        
201
        """
202
        
203
        minimum_centroid_distance = float('Inf');
204
        indexes = None;
205
        
206
        for index1 in range(0, len(self.__centers)):
207
            for index2 in range(index1 + 1, len(self.__centers)):
208
                distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]);
209
                if (distance < minimum_centroid_distance):
210
                    minimum_centroid_distance = distance;
211
                    indexes = [index1, index2];
212
        
213
        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];
214
        self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]);
215
         
216
        self.__clusters.pop(indexes[1]);   # remove merged cluster.
217
        self.__centers.pop(indexes[1]);    # remove merged center.
218
    
219
    
220 View Code Duplication
    def __merge_by_complete_link(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
221
        """!
222
        @brief Merges the most similar clusters in line with complete link type.
223
        
224
        """
225
        
226
        minimum_complete_distance = float('Inf');
227
        indexes = None;
228
        
229
        for index_cluster1 in range(0, len(self.__clusters)):
230
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
231
                
232
                # Find farthest objects
233
                candidate_maximum_distance = 0.0;
234
                for index_object1 in self.__clusters[index_cluster1]:
235
                    for index_object2 in self.__clusters[index_cluster2]:
236
                        distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
237
                        if (distance > candidate_maximum_distance):
238
                            candidate_maximum_distance = distance;
239
                
240
                if (candidate_maximum_distance < minimum_complete_distance):
241
                    minimum_complete_distance = candidate_maximum_distance;
242
                    indexes = [index_cluster1, index_cluster2];
243
244
        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
245
        self.__clusters.pop(indexes[1]);   # remove merged cluster.        
246
    
247
    
248 View Code Duplication
    def __merge_by_signle_link(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
249
        """!
250
        @brief Merges the most similar clusters in line with single link type.
251
        
252
        """
253
        
254
        minimum_single_distance = float('Inf');
255
        indexes = None;
256
        
257
        for index_cluster1 in range(0, len(self.__clusters)):
258
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
259
                
260
                # Find nearest objects
261
                candidate_minimum_distance = float('Inf');
262
                for index_object1 in self.__clusters[index_cluster1]:
263
                    for index_object2 in self.__clusters[index_cluster2]:
264
                        distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
265
                        if (distance < candidate_minimum_distance):
266
                            candidate_minimum_distance = distance;
267
                
268
                if (candidate_minimum_distance < minimum_single_distance):
269
                    minimum_single_distance = candidate_minimum_distance;
270
                    indexes = [index_cluster1, index_cluster2];
271
272
        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
273
        self.__clusters.pop(indexes[1]);   # remove merged cluster.
274
        
275
                
276
    def __calculate_center(self, cluster):
277
        """!
278
        @brief Calculates new center.
279
        
280
        @return (list) New value of the center of the specified cluster.
281
        
282
        """
283
         
284
        dimension = len(self.__pointer_data[cluster[0]]);
285
        center = [0] * dimension;
286
        for index_point in cluster:
287
            for index_dimension in range(0, dimension):
288
                center[index_dimension] += self.__pointer_data[index_point][index_dimension];
289
         
290
        for index_dimension in range(0, dimension):
291
            center[index_dimension] /= len(cluster);
292
             
293
        return center;