Total Complexity | 101 |
Total Lines | 291 |
Duplicated Lines | 0 % |
Complex classes like pyclustering.container.tests.Test often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | ''' |
||
34 | class Test(unittest.TestCase): |
||
35 | def templateCfClusterRepresentation(self, cluster, centroid, radius, diameter, tolerance): |
||
36 | entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster)); |
||
37 | |||
38 | assertion_centroid = centroid; |
||
39 | if (type(centroid) != list): |
||
40 | assertion_centroid = [ centroid ]; |
||
41 | |||
42 | if (type(centroid) == list): |
||
43 | for dimension in range(0, len(assertion_centroid)): |
||
44 | assert (assertion_centroid[dimension] - tolerance < ( entry.get_centroid() )[dimension]) and (( entry.get_centroid() )[dimension] < assertion_centroid[dimension] + tolerance); |
||
45 | |||
46 | assert (radius - tolerance < entry.get_radius()) and (entry.get_radius() < radius + tolerance); |
||
47 | assert (diameter - tolerance < entry.get_diameter()) and (entry.get_diameter() < diameter + tolerance); |
||
48 | |||
49 | def testCfClusterRepresentationOneDimension2(self): |
||
50 | cluster = [ [0.1], [0.2], [0.5], [0.4], [0.6] ]; |
||
51 | self.templateCfClusterRepresentation(cluster, 0.36, 0.18547, 0.29326, 0.0001); |
||
52 | |||
53 | def testCfClusterRepresentationTwoDimension(self): |
||
54 | cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ]; |
||
55 | self.templateCfClusterRepresentation(cluster, [0.36, 0.36], 0.26230, 0.41473, 0.0001); |
||
56 | |||
57 | |||
58 | def templateCfEntryValueDistance(self, cluster1, cluster2, value, tolerance, type_measurment): |
||
59 | entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); |
||
60 | entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); |
||
61 | |||
62 | distance = entry1.get_distance(entry2, type_measurment); |
||
63 | assert ( (value - tolerance < distance) and (value + tolerance > distance) ); |
||
64 | |||
65 | def testCfEntryTwoPoints1(self): |
||
66 | self.templateCfEntryValueDistance([[0.0]], [[1.0]], 1.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
67 | |||
68 | def testCfEntryTwoPoints2(self): |
||
69 | self.templateCfEntryValueDistance([[0.0]], [[0.0]], 0.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
70 | |||
71 | def testCfEntryTwoPoints3(self): |
||
72 | self.templateCfEntryValueDistance([[-1.0]], [[0.0]], 1.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
73 | |||
74 | def testCfEntryTwoPoints4(self): |
||
75 | self.templateCfEntryValueDistance([[1.0, 0.0]], [[0.0, 1.0]], 2.0, 0.0000001, measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
76 | |||
77 | |||
78 | def testCfEntryIncrease(self): |
||
79 | tolerance = 0.00001; |
||
80 | cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ]; |
||
81 | |||
82 | entry1 = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster)); |
||
83 | entry2 = entry1 + entry1; |
||
84 | |||
85 | assert cfentry(10, [3.6, 3.6], 3.28) == entry2; |
||
86 | |||
87 | entry2 = entry2 + entry2; |
||
88 | assert cfentry(20, [7.2, 7.2], 6.56) == entry2; |
||
89 | |||
90 | def testCfEntryDecrease(self): |
||
91 | entry1 = cfentry(10, [3.6, 3.6], 3.28); |
||
92 | entry2 = cfentry(5, [1.8, 1.8], 1.64); |
||
93 | |||
94 | assert entry2 == (entry1 - entry2); |
||
95 | |||
96 | |||
97 | def templateCfEntryDistance(self, type_measurement): |
||
98 | cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]]; |
||
99 | cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]]; |
||
100 | cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]]; |
||
101 | |||
102 | entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); |
||
103 | entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); |
||
104 | entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3)); |
||
105 | |||
106 | distance12 = entry1.get_distance(entry2, type_measurement); |
||
107 | distance23 = entry2.get_distance(entry3, type_measurement); |
||
108 | distance13 = entry1.get_distance(entry3, type_measurement); |
||
109 | |||
110 | assert distance12 < distance23; |
||
111 | assert distance23 < distance13; |
||
112 | |||
113 | def testCfDistanceCentroidEuclidian(self): |
||
114 | self.templateCfEntryDistance(measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
115 | |||
116 | def testCfDistanceCentroidManhatten(self): |
||
117 | self.templateCfEntryDistance(measurement_type.CENTROID_MANHATTAN_DISTANCE); |
||
118 | |||
119 | def testCfDistanceAverageInterCluster(self): |
||
120 | self.templateCfEntryDistance(measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE); |
||
121 | |||
122 | def testCfDistanceAverageIntraCluster(self): |
||
123 | self.templateCfEntryDistance(measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE); |
||
124 | |||
125 | def testCfDistanceVarianceIncrease(self): |
||
126 | self.templateCfEntryDistance(measurement_type.VARIANCE_INCREASE_DISTANCE); |
||
127 | |||
128 | |||
129 | def templateDistanceCalculation(self, cluster1, cluster2, type_measurement): |
||
130 | entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); |
||
131 | entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); |
||
132 | |||
133 | # check that the same distance from 1 to 2 and from 2 to 1. |
||
134 | distance12 = entry1.get_distance(entry2, type_measurement); |
||
135 | distance21 = entry2.get_distance(entry1, type_measurement); |
||
136 | |||
137 | assert distance12 == distance21; |
||
138 | |||
139 | # check with utils calculation |
||
140 | float_delta = 0.0000001; |
||
141 | if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE): |
||
142 | assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid()); |
||
143 | |||
144 | elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE): |
||
145 | assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid()); |
||
146 | |||
147 | elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE): |
||
148 | assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True; |
||
149 | |||
150 | elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE): |
||
151 | assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True; |
||
152 | |||
153 | elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE): |
||
154 | assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True; |
||
155 | |||
156 | def templateDistanceCalculationTheSimplestSample(self, type_measurement): |
||
157 | cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]]; |
||
158 | cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]]; |
||
159 | |||
160 | self.templateDistanceCalculation(cluster1, cluster2, type_measurement); |
||
161 | |||
162 | def testDistanceCalculationTheSimplestSampleCentroidEuclidian(self): |
||
163 | self.templateDistanceCalculationTheSimplestSample(measurement_type.CENTROID_EUCLIDIAN_DISTANCE); |
||
164 | |||
165 | def testDistanceCalculationTheSimplestSampleCentroidManhattan(self): |
||
166 | self.templateDistanceCalculationTheSimplestSample(measurement_type.CENTROID_MANHATTAN_DISTANCE); |
||
167 | |||
168 | def testDistanceCalculationTheSimplestSampleAverageInterClusterDistance(self): |
||
169 | self.templateDistanceCalculationTheSimplestSample(measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE); |
||
170 | |||
171 | def testDistanceCalculationTheSimplestSampleAverageIntraClusterDistance(self): |
||
172 | self.templateDistanceCalculationTheSimplestSample(measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE); |
||
173 | |||
174 | def testDistanceCalculationTheSimplestSampleVarianceIncreaseDistance(self): |
||
175 | self.templateDistanceCalculationTheSimplestSample(measurement_type.VARIANCE_INCREASE_DISTANCE); |
||
176 | |||
177 | |||
178 | def testCfTreeCreationWithOneEntry(self): |
||
179 | tree = cftree(2, 1, 1.0); |
||
180 | entry = cfentry(5, [0.0, 0.1], 0.05); |
||
181 | |||
182 | tree.insert(entry); |
||
183 | |||
184 | assert 1 == tree.amount_nodes; |
||
185 | assert 1 == tree.height; |
||
186 | assert 1 == tree.amount_entries; |
||
187 | |||
188 | assert entry == tree.root.feature; |
||
189 | assert None == tree.root.parent; |
||
190 | |||
191 | |||
192 | def testCfTreeCreationWithoutMerging(self): |
||
193 | clusters = [ [ [random() + j, random() + j] for i in range(10) ] for j in range(10) ]; |
||
194 | tree = cftree(2, 1, 0.0); |
||
195 | |||
196 | for cluster in clusters: |
||
197 | tree.insert_cluster(cluster); |
||
198 | |||
199 | assert tree.height >= 4; |
||
200 | assert tree.amount_entries == 10; |
||
201 | assert len(tree.leafes) == 10; |
||
202 | |||
203 | def testCfTreeInserionOneLeafThreeEntries(self): |
||
204 | cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]]; |
||
205 | cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]]; |
||
206 | cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]]; |
||
207 | |||
208 | tree = cftree(3, 4, 0.0); |
||
209 | tree.insert_cluster(cluster1); |
||
210 | tree.insert_cluster(cluster2); |
||
211 | tree.insert_cluster(cluster3); |
||
212 | |||
213 | entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1)); |
||
214 | entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2)); |
||
215 | entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3)); |
||
216 | |||
217 | assert tree.find_nearest_leaf(entry1) == tree.find_nearest_leaf(entry2); |
||
218 | assert tree.find_nearest_leaf(entry2) == tree.find_nearest_leaf(entry3); |
||
219 | |||
220 | |||
221 | def templateCfTreeLeafIntegrity(self, number_clusters, branching_factor, max_entries, threshold): |
||
222 | clusters = [ [ [random() + j, random() + j] for i in range(10) ] for j in range(number_clusters) ]; |
||
223 | tree = cftree(branching_factor, max_entries, threshold); |
||
224 | |||
225 | for index_cluster in range(0, len(clusters)): |
||
226 | tree.insert_cluster(clusters[index_cluster]); |
||
227 | |||
228 | result_searching = False; |
||
229 | for leaf in tree.leafes: |
||
230 | for node_entry in leaf.entries: |
||
231 | result_searching |= (node_entry == node_entry); |
||
232 | |||
233 | assert True == result_searching; |
||
234 | |||
235 | def testCfTreeLeafIntegrity10_2_1(self): |
||
236 | self.templateCfTreeLeafIntegrity(10, 2, 1, 0.0); |
||
237 | |||
238 | def testCfTreeLeafIntegrity10_3_1(self): |
||
239 | self.templateCfTreeLeafIntegrity(10, 3, 1, 0.0); |
||
240 | |||
241 | def testCfTreeLeafIntegrity20_4_1(self): |
||
242 | self.templateCfTreeLeafIntegrity(20, 4, 1, 0.0); |
||
243 | |||
244 | def testCfTreeLeafIntegrity20_4_2(self): |
||
245 | self.templateCfTreeLeafIntegrity(20, 4, 2, 0.0); |
||
246 | |||
247 | def testCfTreeLeafIntegrity40_10_5(self): |
||
248 | self.templateCfTreeLeafIntegrity(40, 10, 5, 0.0); |
||
249 | |||
250 | |||
251 | def testCfTreeEntryAbsorbing(self): |
||
252 | tree = cftree(2, 1, 10000.0); |
||
253 | absorbing_entry = cfentry(0, [0.0, 0.0], 0.0); |
||
254 | |||
255 | for offset in range(0, 10): |
||
256 | cluster = [ [random() + offset, random() + offset] for i in range(10)]; |
||
257 | entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster)); |
||
258 | |||
259 | absorbing_entry += entry; |
||
260 | |||
261 | tree.insert(entry); |
||
262 | |||
263 | assert 1 == tree.amount_entries; |
||
264 | assert 1 == tree.amount_nodes; |
||
265 | assert 1 == tree.height; |
||
266 | |||
267 | assert None == tree.root.parent; |
||
268 | assert absorbing_entry == tree.root.feature; |
||
269 | |||
270 | |||
271 | def templateCfTreeTotalNumberPoints(self, number_points, dimension, branching_factor, number_entries, diameter): |
||
272 | tree = cftree(branching_factor, number_entries, diameter); |
||
273 | |||
274 | for index_point in range(0, number_points): |
||
275 | point = [ index_point for i in range(0, dimension) ]; |
||
276 | |||
277 | tree.insert_cluster([ point ]); |
||
278 | |||
279 | number_points = 0; |
||
280 | for leaf in tree.leafes: |
||
281 | number_points += leaf.feature.number_points; |
||
282 | |||
283 | assert (index_point + 1) == number_points; |
||
284 | |||
285 | number_leaf_points = 0; |
||
286 | for leaf in tree.leafes: |
||
287 | number_leaf_points += leaf.feature.number_points; |
||
288 | |||
289 | assert number_points == tree.root.feature.number_points; |
||
290 | |||
291 | if (number_points != number_leaf_points): |
||
292 | print(number_points, number_leaf_points); |
||
293 | |||
294 | assert number_points == number_leaf_points; |
||
295 | |||
296 | def testCfTreeTotalNumberPoints10_1_5_5_NoDiameter(self): |
||
297 | self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 0.0); |
||
298 | |||
299 | def testCfTreeTotalNumberPoints10_1_5_5_WithDiameter(self): |
||
300 | self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 100.0); |
||
301 | |||
302 | def testCfTreeTotalNumberPoints10_1_5_5_WithSmallDiameter(self): |
||
303 | self.templateCfTreeTotalNumberPoints(10, 1, 5, 5, 2.5); |
||
304 | |||
305 | def testCfTreeTotalNumberPoints10_2_5_5_NoDiameter(self): |
||
306 | self.templateCfTreeTotalNumberPoints(10, 2, 5, 5, 0.0); |
||
307 | |||
308 | def testCfTreeTotalNumberPoints10_2_5_5_WithDiameter(self): |
||
309 | self.templateCfTreeTotalNumberPoints(10, 2, 5, 5, 100.0); |
||
310 | |||
311 | def testCfTreeTotalNumberPoints50_3_5_5_NoDiameter(self): |
||
312 | self.templateCfTreeTotalNumberPoints(50, 3, 5, 5, 0.0); |
||
313 | |||
314 | def testCfTreeTotalNumberPoints50_3_5_5_WithDiameter(self): |
||
315 | self.templateCfTreeTotalNumberPoints(50, 3, 5, 5, 100.0); |
||
316 | |||
317 | def testCfTreeTotalNumberPoints100_2_2_1_NoDiameter(self): |
||
318 | self.templateCfTreeTotalNumberPoints(100, 2, 2, 1, 0.0); |
||
319 | |||
320 | def testCfTreeTotalNumberPoints100_2_2_1_WithDiameter(self): |
||
321 | self.templateCfTreeTotalNumberPoints(100, 2, 2, 1, 100.0); |
||
322 | |||
323 | def testCfTreeTotalNumberPoints100_2_5_5_WithSmallDiameter(self): |
||
324 | self.templateCfTreeTotalNumberPoints(100, 2, 5, 5, 10.0); |
||
325 | |||
328 | unittest.main(); |