Completed
Push — master ( 6ed467...f3f897 )
by Andrei
01:29
created

cluster_encoder.__convert_index_to_object()   A

Complexity

Conditions 4

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
dl 0
loc 8
rs 9.2
c 1
b 0
f 0
1
"""!
2
3
@brief Module for representing clustering results.
4
5
@authors Andrei Novikov ([email protected])
6
@date 2014-2016
7
@copyright GNU Public License
8
9
@cond GNU_PUBLIC_LICENSE
10
    PyClustering is free software: you can redistribute it and/or modify
11
    it under the terms of the GNU General Public License as published by
12
    the Free Software Foundation, either version 3 of the License, or
13
    (at your option) any later version.
14
    
15
    PyClustering is distributed in the hope that it will be useful,
16
    but WITHOUT ANY WARRANTY; without even the implied warranty of
17
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
    GNU General Public License for more details.
19
    
20
    You should have received a copy of the GNU General Public License
21
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
@endcond
23
24
"""
25
26
27
from enum import IntEnum;
28
29
30
class type_encoding(IntEnum):
31
    ## Results are represented by list of indexes and belonging to the cluster is defined by cluster index and element's position corresponds to object's position in input data, for example [0, 0, 1, 1, 1, 0].
32
    CLUSTER_INDEX_LABELING = 0;
33
    
34
    ## Results are represented by list of lists, where each list consists of object indexes, for example [ [0, 1, 2], [3, 4, 5], [6, 7] ].
35
    CLUSTER_INDEX_LIST_SEPARATION = 1;
36
    
37
    ## Results are represented by list of lists, where each list consists of objects, for example [ [obj1, obj2], [obj3, obj4, obj5], [obj6, obj7] ].
38
    CLUSTER_OBJECT_LIST_SEPARATION = 2;
39
40
41
class cluster_encoder:
42
    """!
43
    @brief Provides service to change clustering result representation.
44
    
45
    Example:
46
    @code
47
        # load list of points for cluster analysis
48
        sample = read_sample(path);
49
        
50
        # create instance of K-Means algorithm
51
        kmeans_instance = kmeans(sample, [ [0.0, 0.1], [2.5, 2.6] ]);
52
        
53
        # run cluster analysis and obtain results
54
        kmeans_instance.process();
55
        clusters = kmeans_instance.get_clusters();
56
        
57
        # by default k-means returns representation CLUSTER_INDEX_LIST_SEPARATION
58
        type_repr = kmeans_instance.get_cluster_encoding();
59
        encoder = cluster_encoder(type_repr, clusters, sample);
60
        
61
        # change representation from index list to label list
62
        representor.set_encoding(type_encoding.CLUSTER_INDEX_LABELING);
63
        
64
        # change representation from label to object list
65
        representor.set_encoding(type_encoding.CLUSTER_OBJECT_LIST_SEPARATION);
66
    @endcode
67
    """
68
    
69
    def __init__(self, encoding, clusters, data):
70
        """!
71
        @brief Constructor of clustering result representor.
72
        
73
        @param[in] encoding (type_encoding): Type of clusters representation (index list, object list or labels).
74
        @param[in] clusters (list): Current clusters representation.
75
        @param[in] data (list): Data that corresponds to clusters.
76
        
77
        """
78
    
79
        self.__type_representation = encoding;
80
        self.__clusters = clusters;
81
        self.__data = data;
82
83
84
    @property
85
    def get_encoding(self):
86
        """!
87
        @brief Returns current cluster representation.
88
        
89
        """
90
        return self.__type_representation;
91
92
93
    def get_clusters(self):
94
        """!
95
        @brief Returns clusters representation.
96
        
97
        """
98
        return self.__clusters;
99
100
101
    def get_data(self):
102
        """!
103
        @brief Returns data that corresponds to clusters.
104
        
105
        """
106
        return self.__data;
107
108
109
    def set_encoding(self, encoding):
110
        """!
111
        @brief Change clusters encoding to specified type (index list, object list, labeling).
112
        
113
        @param[in] encoding (type_encoding): New type of clusters representation.
114
        
115
        """
116
        
117
        if(encoding == self.__type_representation):
118
            return;
119
        
120
        if (self.__type_representation == type_encoding.CLUSTER_INDEX_LABELING):
121
            if (encoding == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
122
                self.__clusters = self.__convert_label_to_index();
123
            
124
            else:
125
                self.__clusters = self.__convert_label_to_object();
126
        
127
        elif (self.__type_representation == type_encoding.CLUSTER_INDEX_LIST_SEPARATION):
128
            if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
129
                self.__clusters = self.__convert_index_to_label();
130
            
131
            else:
132
                self.__clusters = self.__convert_index_to_object();
133
        
134
        else:
135
            if (encoding == type_encoding.CLUSTER_INDEX_LABELING):
136
                self.__clusters = self.__convert_object_to_label();
137
            
138
            else:
139
                self.__clusters = self.__convert_object_to_index();
140
        
141
        self.__type_representation = encoding;
142
143
144
    def __convert_index_to_label(self):
145
        clusters = [0] * len(self.__data);
146
        index_cluster = 0;
147
        
148
        for cluster in self.__clusters:
149
            for index_object in cluster:
150
                clusters[index_object] = index_cluster;
151
        
152
            index_cluster += 1;
153
        
154
        return clusters;
155
156
157
    def __convert_index_to_object(self):
158
        clusters = [ [] for _ in range(len(self.__clusters)) ];
159
        for index_cluster in range(len(self.__clusters)):
160
            for index_object in self.__clusters[index_cluster]:
161
                data_object = self.__data[index_object];
162
                clusters[index_cluster].append(data_object);
163
164
        return clusters;
165
166
167
    def __convert_object_to_label(self):
168
        positions = dict();
169
        clusters = [0] * len(self.__data);
170
        index_cluster = 0;
171
        
172
        for cluster in self.__clusters:
173
            for data_object in cluster:
174
                index_object = -1;
175
                hashable_data_object = str(data_object);
176
                if (hashable_data_object in positions):
177
                    index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
178
                else:
179
                    index_object = self.__data.index(data_object);
180
                    
181
                clusters[index_object] = index_cluster;
182
                positions[hashable_data_object] = index_object;
183
            
184
            index_cluster += 1;
185
        
186
        return clusters;
187
188
189
    def __convert_object_to_index(self):
190
        positions = dict();
191
        clusters = [ [] for _ in range(len(self.__clusters)) ];
192
        for index_cluster in range(len(self.__clusters)):
193
            for data_object in self.__clusters[index_cluster]:
194
                index_object = -1;
195
                hashable_data_object = str(data_object);
196
                if (hashable_data_object in positions):
197
                    index_object = self.__data.index(data_object, positions[hashable_data_object] + 1);
198
                else:
199
                    index_object = self.__data.index(data_object);
200
201
                clusters[index_cluster].append(index_object);
202
                positions[hashable_data_object] = index_object;
203
204
        return clusters;
205
206
207
    def __convert_label_to_index(self):
208
        clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
209
        
210
        for index_object in range(len(self.__data)):
211
            index_cluster = self.__clusters[index_object];
212
            clusters[index_cluster].append(index_object);
213
        
214
        return clusters;
215
    
216
    
217
    def __convert_label_to_object(self):
218
        clusters = [ [] for _ in range(max(self.__clusters) + 1) ];
219
        
220
        for index_object in range(len(self.__data)):
221
            index_cluster = self.__clusters[index_object];
222
            clusters[index_cluster].append(self.__data[index_object]);
223
        
224
        return clusters;