Completed
Branch master (5cc02c)
by Andrei
01:30
created

pyclustering.container.tests.Test.templateDistanceCalculation()   F

Complexity

Conditions 12

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 12
dl 0
loc 26
rs 2.7855

How to fix   Complexity   

Complexity

Complex classes like pyclustering.container.tests.Test.templateDistanceCalculation() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
'''
2
3
Unit-tests for CF-Tree.
4
5
Copyright (C) 2015    Andrei Novikov ([email protected])
6
7
pyclustering is free software: you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation, either version 3 of the License, or
10
(at your option) any later version.
11
12
pyclustering is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
17
You should have received a copy of the GNU General Public License
18
along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
'''
21
22
import unittest;
23
24
import numpy;
25
26
from pyclustering.container.cftree import cfentry, cftree;
27
from pyclustering.container.cftree import measurement_type;
28
29
from pyclustering.utils import linear_sum, square_sum;
30
from pyclustering.utils import euclidean_distance_sqrt, manhattan_distance, average_inter_cluster_distance, average_intra_cluster_distance, variance_increase_distance;
31
32
from random import random;
33
34
class Test(unittest.TestCase):
35
    def templateCfClusterRepresentation(self, cluster, centroid, radius, diameter, tolerance):
36
        entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
37
           
38
        assertion_centroid = centroid;
39
        if (type(centroid) != list):
40
            assertion_centroid = [ centroid ];
41
           
42
        if (type(centroid) == list):
43
            for dimension in range(0, len(assertion_centroid)):
44
                assert (assertion_centroid[dimension] - tolerance < ( entry.get_centroid() )[dimension]) and (( entry.get_centroid() )[dimension] < assertion_centroid[dimension] + tolerance);
45
           
46
        assert (radius - tolerance < entry.get_radius()) and (entry.get_radius() < radius + tolerance);
47
        assert (diameter - tolerance < entry.get_diameter()) and (entry.get_diameter() < diameter + tolerance);
48
           
49
    def testCfClusterRepresentationOneDimension2(self):
50
        cluster = [ [0.1], [0.2], [0.5], [0.4], [0.6] ];
51
        self.templateCfClusterRepresentation(cluster, 0.36, 0.18547, 0.29326, 0.0001);        
52
       
53
    def testCfClusterRepresentationTwoDimension(self):
54
        cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ];
55
        self.templateCfClusterRepresentation(cluster, [0.36, 0.36], 0.26230, 0.41473, 0.0001);
56
       
57
       
58
    def templateCfEntryValueDistance(self, cluster1, cluster2, value, tolerance, type_measurment):
59
        entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
60
        entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
61
           
62
        distance = entry1.get_distance(entry2, type_measurment);
63
        assert ( (value - tolerance < distance) and (value + tolerance > distance) );
64
           
65
    def testCfEntryTwoPoints1(self):
66
        self.templateCfEntryValueDistance([[0.0]], [[1.0]], 1.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
67
   
68
    def testCfEntryTwoPoints2(self):
69
        self.templateCfEntryValueDistance([[0.0]], [[0.0]], 0.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
70
           
71
    def testCfEntryTwoPoints3(self):
72
        self.templateCfEntryValueDistance([[-1.0]], [[0.0]], 1.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
73
           
74
    def testCfEntryTwoPoints4(self):
75
        self.templateCfEntryValueDistance([[1.0, 0.0]], [[0.0, 1.0]], 2.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
76
       
77
       
78
    def testCfEntryIncrease(self):
79
        tolerance = 0.00001;
80
        cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ];
81
           
82
        entry1 = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
83
        entry2 = entry1 + entry1;
84
           
85
        assert cfentry(10, [3.6, 3.6], 3.28) == entry2;
86
           
87
        entry2 = entry2 + entry2;
88
        assert cfentry(20, [7.2, 7.2], 6.56) == entry2;
89
       
90
    def testCfEntryDecrease(self):
91
        entry1 = cfentry(10, [3.6, 3.6], 3.28);
92
        entry2 = cfentry(5, [1.8, 1.8], 1.64);
93
           
94
        assert entry2 == (entry1 - entry2);
95
       
96
       
97
    def templateCfEntryDistance(self, type_measurement):
98
        cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
99
        cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
100
        cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]];
101
           
102
        entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
103
        entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
104
        entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3));
105
           
106
        distance12 = entry1.get_distance(entry2, type_measurement);
107
        distance23 = entry2.get_distance(entry3, type_measurement);
108
        distance13 = entry1.get_distance(entry3, type_measurement);
109
        
110
        assert distance12 < distance23;
111
        assert distance23 < distance13;
112
       
113
    def testCfDistanceCentroidEuclidian(self):
114
        self.templateCfEntryDistance(measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
115
           
116
    def testCfDistanceCentroidManhatten(self):
117
        self.templateCfEntryDistance(measurement_type.CENTROID_MANHATTAN_DISTANCE);
118
           
119
    def testCfDistanceAverageInterCluster(self):
120
        self.templateCfEntryDistance(measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE);
121
   
122
    def testCfDistanceAverageIntraCluster(self):
123
        self.templateCfEntryDistance(measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE);
124
           
125
    def testCfDistanceVarianceIncrease(self):
126
        self.templateCfEntryDistance(measurement_type.VARIANCE_INCREASE_DISTANCE);
127
    
128
    
129
    def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
130
        entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
131
        entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
132
        
133
        # check that the same distance from 1 to 2 and from 2 to 1.
134
        distance12 = entry1.get_distance(entry2, type_measurement);
135
        distance21 = entry2.get_distance(entry1, type_measurement);
136
        
137
        assert distance12 == distance21;
138
        
139
        # check with utils calculation
140
        float_delta = 0.0000001;
141
        if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
142
            assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
143
        
144
        elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
145
            assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
146
        
147
        elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
148
            assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
149
        
150
        elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
151
            assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
152
        
153
        elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
154
            assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
155
    
156
    def templateDistanceCalculationTheSimplestSample(self, type_measurement):
157
        cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
158
        cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
159
        
160
        self.templateDistanceCalculation(cluster1, cluster2, type_measurement);
161
    
162
    def testDistanceCalculationTheSimplestSampleCentroidEuclidian(self):
163
        self.templateDistanceCalculationTheSimplestSample(measurement_type.CENTROID_EUCLIDIAN_DISTANCE);
164
165
    def testDistanceCalculationTheSimplestSampleCentroidManhattan(self):
166
        self.templateDistanceCalculationTheSimplestSample(measurement_type.CENTROID_MANHATTAN_DISTANCE);
167
168
    def testDistanceCalculationTheSimplestSampleAverageInterClusterDistance(self):
169
        self.templateDistanceCalculationTheSimplestSample(measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE);
170
171
    def testDistanceCalculationTheSimplestSampleAverageIntraClusterDistance(self):
172
        self.templateDistanceCalculationTheSimplestSample(measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE);
173
174
    def testDistanceCalculationTheSimplestSampleVarianceIncreaseDistance(self):
175
        self.templateDistanceCalculationTheSimplestSample(measurement_type.VARIANCE_INCREASE_DISTANCE);
176
177
178
    def testCfTreeCreationWithOneEntry(self):
179
        tree = cftree(2, 1, 1.0);
180
        entry = cfentry(5, [0.0, 0.1], 0.05);
181
           
182
        tree.insert(entry);
183
           
184
        assert 1 == tree.amount_nodes;
185
        assert 1 == tree.height;
186
        assert 1 == tree.amount_entries;
187
           
188
        assert entry == tree.root.feature;
189
        assert None == tree.root.parent;
190
           
191
       
192
    def testCfTreeCreationWithoutMerging(self):
193
        clusters = [ [ [random() + j, random() + j] for i in range(10) ] for j in range(10) ];
194
        tree = cftree(2, 1, 0.0);
195
           
196
        for cluster in clusters:
197
            tree.insert_cluster(cluster);
198
           
199
        assert tree.height >= 4;
200
        assert tree.amount_entries == 10;
201
        assert len(tree.leafes) == 10;
202
           
203
    def testCfTreeInserionOneLeafThreeEntries(self):
204
        cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
205
        cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
206
        cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]];
207
            
208
        tree = cftree(3, 4, 0.0);
209
        tree.insert_cluster(cluster1);
210
        tree.insert_cluster(cluster2);
211
        tree.insert_cluster(cluster3);
212
            
213
        entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
214
        entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
215
        entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3));
216
            
217
        assert tree.find_nearest_leaf(entry1) == tree.find_nearest_leaf(entry2);
218
        assert tree.find_nearest_leaf(entry2) == tree.find_nearest_leaf(entry3);
219
       
220
       
221
    def templateCfTreeLeafIntegrity(self, number_clusters, branching_factor, max_entries, threshold):
222
        clusters = [ [ [random() + j, random() + j] for i in range(10) ] for j in range(number_clusters) ];
223
        tree = cftree(branching_factor, max_entries, threshold);
224
           
225
        for index_cluster in range(0, len(clusters)):
226
            tree.insert_cluster(clusters[index_cluster]);
227
               
228
            result_searching = False;
229
            for leaf in tree.leafes:
230
                for node_entry in leaf.entries:
231
                    result_searching |= (node_entry == node_entry);
232
       
233
            assert True == result_searching;
234
                   
235
    def testCfTreeLeafIntegrity10_2_1(self):
236
        self.templateCfTreeLeafIntegrity(10, 2, 1, 0.0);
237
            
238
    def testCfTreeLeafIntegrity10_3_1(self):
239
        self.templateCfTreeLeafIntegrity(10, 3, 1, 0.0);
240
            
241
    def testCfTreeLeafIntegrity20_4_1(self):
242
        self.templateCfTreeLeafIntegrity(20, 4, 1, 0.0);
243
       
244
    def testCfTreeLeafIntegrity20_4_2(self):
245
        self.templateCfTreeLeafIntegrity(20, 4, 2, 0.0);
246
           
247
    def testCfTreeLeafIntegrity40_10_5(self):
248
        self.templateCfTreeLeafIntegrity(40, 10, 5, 0.0);
249
          
250
          
251
    def testCfTreeEntryAbsorbing(self):
252
        tree = cftree(2, 1, 10000.0);
253
        absorbing_entry = cfentry(0, [0.0, 0.0], 0.0);
254
          
255
        for offset in range(0, 10):
256
            cluster = [ [random() + offset, random() + offset] for i in range(10)];
257
            entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
258
              
259
            absorbing_entry += entry;
260
              
261
            tree.insert(entry);
262
              
263
            assert 1 == tree.amount_entries;
264
            assert 1 == tree.amount_nodes;
265
            assert 1 == tree.height;
266
              
267
            assert None == tree.root.parent;
268
            assert absorbing_entry == tree.root.feature;
269
             
270
     
271
    def templateCfTreeTotalNumberPoints(self, number_points, dimension, branching_factor, number_entries, diameter):
272
        tree = cftree(branching_factor, number_entries, diameter);
273
         
274
        for index_point in range(0, number_points):
275
            point = [ index_point for i in range(0, dimension) ];
276
             
277
            tree.insert_cluster([ point ]);
278
             
279
            number_points = 0;
280
            for leaf in tree.leafes:
281
                number_points += leaf.feature.number_points;
282
                 
283
            assert (index_point + 1) == number_points;
284
         
285
        number_leaf_points = 0;
286
        for leaf in tree.leafes:
287
            number_leaf_points += leaf.feature.number_points;
288
         
289
        assert number_points == tree.root.feature.number_points;
290
         
291
        if (number_points != number_leaf_points):
292
            print(number_points, number_leaf_points);
293
             
294
        assert number_points == number_leaf_points;
295
         
296
    def testCfTreeTotalNumberPoints10_1_5_5_NoDiameter(self):
297
        self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 0.0);
298
 
299
    def testCfTreeTotalNumberPoints10_1_5_5_WithDiameter(self):
300
        self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 100.0);
301
        
302
    def testCfTreeTotalNumberPoints10_1_5_5_WithSmallDiameter(self):
303
        self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 2.5);
304
         
305
    def testCfTreeTotalNumberPoints10_2_5_5_NoDiameter(self):
306
        self.templateCfTreeTotalNumberPoints(10, 2, 5, 5, 0.0);
307
  
308
    def testCfTreeTotalNumberPoints10_2_5_5_WithDiameter(self):
309
        self.templateCfTreeTotalNumberPoints(10, 2, 5, 5, 100.0);
310
  
311
    def testCfTreeTotalNumberPoints50_3_5_5_NoDiameter(self):
312
        self.templateCfTreeTotalNumberPoints(50, 3, 5, 5, 0.0);
313
          
314
    def testCfTreeTotalNumberPoints50_3_5_5_WithDiameter(self):
315
        self.templateCfTreeTotalNumberPoints(50, 3, 5, 5, 100.0);
316
          
317
    def testCfTreeTotalNumberPoints100_2_2_1_NoDiameter(self):
318
        self.templateCfTreeTotalNumberPoints(100, 2, 2, 1, 0.0);
319
          
320
    def testCfTreeTotalNumberPoints100_2_2_1_WithDiameter(self):
321
        self.templateCfTreeTotalNumberPoints(100, 2, 2, 1, 100.0);
322
         
323
    def testCfTreeTotalNumberPoints100_2_5_5_WithSmallDiameter(self):
324
        self.templateCfTreeTotalNumberPoints(100, 2, 5, 5, 10.0);
325
        
326
        
327
if __name__ == "__main__":
328
    unittest.main();