Total Complexity | 42 |
Total Lines | 377 |
Duplicated Lines | 39.79 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like test_list_id_in_segment often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import time |
||
2 | import random |
||
3 | import pdb |
||
4 | import threading |
||
5 | import logging |
||
6 | from multiprocessing import Pool, Process |
||
7 | import pytest |
||
8 | from milvus import IndexType, MetricType |
||
9 | from utils import * |
||
10 | |||
11 | dim = 128 |
||
12 | segment_size = 100 |
||
13 | nb = 6000 |
||
14 | tag = "1970-01-01" |
||
15 | field_name = "float_vector" |
||
16 | default_index_name = "list_index" |
||
17 | collection_id = "list_id_in_segment" |
||
18 | entity = gen_entities(1) |
||
19 | raw_vector, binary_entity = gen_binary_entities(1) |
||
20 | entities = gen_entities(nb) |
||
21 | raw_vectors, binary_entities = gen_binary_entities(nb) |
||
22 | default_fields = gen_default_fields() |
||
23 | |||
24 | |||
25 | def get_segment_name(connect, collection, nb=1, vec_type='float', index_params=None): |
||
26 | if vec_type != "float": |
||
27 | vectors, entities = gen_binary_entities(nb) |
||
28 | else: |
||
29 | entities = gen_entities(nb) |
||
30 | ids = connect.insert(collection, entities) |
||
31 | connect.flush([collection]) |
||
32 | if index_params: |
||
33 | connect.create_index(collection, field_name, default_index_name, index_params) |
||
34 | stats = connect.get_collection_stats(collection) |
||
35 | return ids, stats["partitions"][0]["segments"][0]["name"] |
||
36 | |||
37 | |||
38 | class TestGetVectorIdsBase: |
||
39 | |||
40 | """ |
||
41 | ****************************************************************** |
||
42 | The following cases are used to test `list_id_in_segment` function |
||
43 | ****************************************************************** |
||
44 | """ |
||
45 | def test_list_id_in_segment_collection_name_None(self, connect, collection): |
||
46 | ''' |
||
47 | target: get vector ids where collection name is None |
||
48 | method: call list_id_in_segment with the collection_name: None |
||
49 | expected: exception raised |
||
50 | ''' |
||
51 | collection_name = None |
||
52 | ids, name = get_segment_name(connect, collection) |
||
53 | with pytest.raises(Exception) as e: |
||
54 | vector_ids = connect.list_id_in_segment(collection_name, name) |
||
55 | |||
56 | def test_list_id_in_segment_collection_name_not_existed(self, connect, collection): |
||
57 | ''' |
||
58 | target: get vector ids where collection name does not exist |
||
59 | method: call list_id_in_segment with a random collection_name, which is not in db |
||
60 | expected: status not ok |
||
61 | ''' |
||
62 | collection_name = gen_unique_str(collection_id) |
||
63 | ids, name = get_segment_name(connect, collection) |
||
64 | with pytest.raises(Exception) as e: |
||
65 | vector_ids = connect.list_id_in_segment(collection_name, name) |
||
66 | |||
67 | @pytest.fixture( |
||
68 | scope="function", |
||
69 | params=gen_invalid_strs() |
||
70 | ) |
||
71 | def get_collection_name(self, request): |
||
72 | yield request.param |
||
73 | |||
74 | def test_list_id_in_segment_collection_name_invalid(self, connect, collection, get_collection_name): |
||
75 | ''' |
||
76 | target: get vector ids where collection name is invalid |
||
77 | method: call list_id_in_segment with invalid collection_name |
||
78 | expected: status not ok |
||
79 | ''' |
||
80 | collection_name = get_collection_name |
||
81 | ids, name = get_segment_name(connect, collection) |
||
82 | with pytest.raises(Exception) as e: |
||
83 | vector_ids = connect.list_id_in_segment(collection_name, name) |
||
84 | |||
85 | def test_list_id_in_segment_name_None(self, connect, collection): |
||
86 | ''' |
||
87 | target: get vector ids where segment name is None |
||
88 | method: call list_id_in_segment with the name: None |
||
89 | expected: exception raised |
||
90 | ''' |
||
91 | ids, valid_name = get_segment_name(connect, collection) |
||
92 | segment = None |
||
93 | with pytest.raises(Exception) as e: |
||
94 | vector_ids = connect.list_id_in_segment(collection, segment) |
||
95 | |||
96 | def test_list_id_in_segment_name_not_existed(self, connect, collection): |
||
97 | ''' |
||
98 | target: get vector ids where segment name does not exist |
||
99 | method: call list_id_in_segment with a random segment name |
||
100 | expected: status not ok |
||
101 | ''' |
||
102 | ids, valid_name = get_segment_name(connect, collection) |
||
103 | segment = gen_unique_str(collection_id) |
||
104 | with pytest.raises(Exception) as e: |
||
105 | vector_ids = connect.list_id_in_segment(collection, segment) |
||
106 | |||
107 | def test_list_id_in_segment_without_index_A(self, connect, collection): |
||
108 | ''' |
||
109 | target: get vector ids when there is no index |
||
110 | method: call list_id_in_segment and check if the segment contains vectors |
||
111 | expected: status ok |
||
112 | ''' |
||
113 | nb = 1 |
||
114 | ids, name = get_segment_name(connect, collection, nb=nb) |
||
115 | vector_ids = connect.list_id_in_segment(collection, name) |
||
116 | # vector_ids should match ids |
||
117 | assert len(vector_ids) == nb |
||
118 | assert vector_ids[0] == ids[0] |
||
119 | |||
120 | View Code Duplication | def test_list_id_in_segment_without_index_B(self, connect, collection): |
|
|
|||
121 | ''' |
||
122 | target: get vector ids when there is no index but with partition |
||
123 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
124 | expected: status ok |
||
125 | ''' |
||
126 | nb = 10 |
||
127 | entities = gen_entities(nb) |
||
128 | connect.create_partition(collection, tag) |
||
129 | ids = connect.insert(collection, entities, partition_tag=tag) |
||
130 | connect.flush([collection]) |
||
131 | stats = connect.get_collection_stats(collection) |
||
132 | assert stats["partitions"][1]["tag"] == tag |
||
133 | vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["name"]) |
||
134 | # vector_ids should match ids |
||
135 | assert len(vector_ids) == nb |
||
136 | for i in range(nb): |
||
137 | assert vector_ids[i] == ids[i] |
||
138 | |||
139 | @pytest.fixture( |
||
140 | scope="function", |
||
141 | params=gen_simple_index() |
||
142 | ) |
||
143 | def get_simple_index(self, request, connect): |
||
144 | if str(connect._cmd("mode")) == "CPU": |
||
145 | if request.param["index_type"] in index_cpu_not_support(): |
||
146 | pytest.skip("CPU not support index_type: ivf_sq8h") |
||
147 | return request.param |
||
148 | |||
149 | def test_list_id_in_segment_with_index_A(self, connect, collection, get_simple_index): |
||
150 | ''' |
||
151 | target: get vector ids when there is index |
||
152 | method: call list_id_in_segment and check if the segment contains vectors |
||
153 | expected: status ok |
||
154 | ''' |
||
155 | ids, name = get_segment_name(connect, collection, nb=nb, index_params=get_simple_index) |
||
156 | vector_ids = connect.list_id_in_segment(collection, name) |
||
157 | # TODO: |
||
158 | |||
159 | View Code Duplication | def test_list_id_in_segment_with_index_B(self, connect, collection, get_simple_index): |
|
160 | ''' |
||
161 | target: get vector ids when there is index and with partition |
||
162 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
163 | expected: status ok |
||
164 | ''' |
||
165 | connect.create_partition(collection, tag) |
||
166 | ids = connect.insert(collection, entities, partition_tag=tag) |
||
167 | connect.flush([collection]) |
||
168 | stats = connect.get_collection_stats(collection) |
||
169 | assert stats["partitions"][1]["tag"] == tag |
||
170 | vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["name"]) |
||
171 | # vector_ids should match ids |
||
172 | # TODO |
||
173 | |||
174 | View Code Duplication | def test_list_id_in_segment_after_delete_vectors(self, connect, collection): |
|
175 | ''' |
||
176 | target: get vector ids after vectors are deleted |
||
177 | method: add vectors and delete a few, call list_id_in_segment |
||
178 | expected: status ok, vector_ids decreased after vectors deleted |
||
179 | ''' |
||
180 | nb = 2 |
||
181 | ids, name = get_segment_name(connect, collection, nb=nb) |
||
182 | delete_ids = [ids[0]] |
||
183 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
184 | connect.flush([collection]) |
||
185 | stats = connect.get_collection_stats(collection) |
||
186 | vector_ids = connect.list_id_in_segment(collection, stats["partitions"][0]["segments"][0]["name"]) |
||
187 | assert len(vector_ids) == 1 |
||
188 | assert vector_ids[0] == ids[1] |
||
189 | |||
190 | |||
191 | class TestGetVectorIdsIP: |
||
192 | """ |
||
193 | ****************************************************************** |
||
194 | The following cases are used to test `list_id_in_segment` function |
||
195 | ****************************************************************** |
||
196 | """ |
||
197 | def test_list_id_in_segment_without_index_A(self, connect, ip_collection): |
||
198 | ''' |
||
199 | target: get vector ids when there is no index |
||
200 | method: call list_id_in_segment and check if the segment contains vectors |
||
201 | expected: status ok |
||
202 | ''' |
||
203 | nb = 10 |
||
204 | entities = gen_entities(nb) |
||
205 | ids = connect.insert(ip_collection, entities) |
||
206 | connect.flush([ip_collection]) |
||
207 | stats = connect.get_collection_stats(ip_collection) |
||
208 | vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["name"]) |
||
209 | # vector_ids should match ids |
||
210 | assert len(vector_ids) == nb |
||
211 | for i in range(nb): |
||
212 | assert vector_ids[i] == ids[i] |
||
213 | |||
214 | View Code Duplication | def test_list_id_in_segment_without_index_B(self, connect, ip_collection): |
|
215 | ''' |
||
216 | target: get vector ids when there is no index but with partition |
||
217 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
218 | expected: status ok |
||
219 | ''' |
||
220 | connect.create_partition(ip_collection, tag) |
||
221 | nb = 10 |
||
222 | entities = gen_entities(nb) |
||
223 | ids = connect.insert(ip_collection, entities, partition_tag=tag) |
||
224 | connect.flush([ip_collection]) |
||
225 | stats = connect.get_collection_stats(ip_collection) |
||
226 | assert stats["partitions"][1]["tag"] == tag |
||
227 | vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["name"]) |
||
228 | # vector_ids should match ids |
||
229 | assert len(vector_ids) == nb |
||
230 | for i in range(nb): |
||
231 | assert vector_ids[i] == ids[i] |
||
232 | |||
233 | @pytest.fixture( |
||
234 | scope="function", |
||
235 | params=gen_simple_index() |
||
236 | ) |
||
237 | def get_simple_index(self, request, connect): |
||
238 | if str(connect._cmd("mode")) == "CPU": |
||
239 | if request.param["index_type"] in index_cpu_not_support(): |
||
240 | pytest.skip("CPU not support index_type: ivf_sq8h") |
||
241 | return request.param |
||
242 | |||
243 | def test_list_id_in_segment_with_index_A(self, connect, ip_collection, get_simple_index): |
||
244 | ''' |
||
245 | target: get vector ids when there is index |
||
246 | method: call list_id_in_segment and check if the segment contains vectors |
||
247 | expected: status ok |
||
248 | ''' |
||
249 | ids, name = get_segment_name(connect, ip_collection, nb=nb, index_params=get_simple_index) |
||
250 | vector_ids = connect.list_id_in_segment(ip_collection, name) |
||
251 | # TODO: |
||
252 | |||
253 | View Code Duplication | def test_list_id_in_segment_with_index_B(self, connect, ip_collection, get_simple_index): |
|
254 | ''' |
||
255 | target: get vector ids when there is index and with partition |
||
256 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
257 | expected: status ok |
||
258 | ''' |
||
259 | connect.create_partition(ip_collection, tag) |
||
260 | ids = connect.insert(ip_collection, entities, partition_tag=tag) |
||
261 | connect.flush([ip_collection]) |
||
262 | stats = connect.get_collection_stats(ip_collection) |
||
263 | assert stats["partitions"][1]["tag"] == tag |
||
264 | vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][1]["segments"][0]["name"]) |
||
265 | # vector_ids should match ids |
||
266 | # TODO |
||
267 | |||
268 | View Code Duplication | def test_list_id_in_segment_after_delete_vectors(self, connect, ip_collection): |
|
269 | ''' |
||
270 | target: get vector ids after vectors are deleted |
||
271 | method: add vectors and delete a few, call list_id_in_segment |
||
272 | expected: status ok, vector_ids decreased after vectors deleted |
||
273 | ''' |
||
274 | nb = 2 |
||
275 | ids, name = get_segment_name(connect, ip_collection, nb=nb) |
||
276 | delete_ids = [ids[0]] |
||
277 | status = connect.delete_entity_by_id(ip_collection, delete_ids) |
||
278 | connect.flush([ip_collection]) |
||
279 | stats = connect.get_collection_stats(ip_collection) |
||
280 | vector_ids = connect.list_id_in_segment(ip_collection, stats["partitions"][0]["segments"][0]["name"]) |
||
281 | assert len(vector_ids) == 1 |
||
282 | assert vector_ids[0] == ids[1] |
||
283 | |||
284 | |||
285 | class TestGetVectorIdsJAC: |
||
286 | """ |
||
287 | ****************************************************************** |
||
288 | The following cases are used to test `list_id_in_segment` function |
||
289 | ****************************************************************** |
||
290 | """ |
||
291 | View Code Duplication | def test_list_id_in_segment_without_index_A(self, connect, jac_collection): |
|
292 | ''' |
||
293 | target: get vector ids when there is no index |
||
294 | method: call list_id_in_segment and check if the segment contains vectors |
||
295 | expected: status ok |
||
296 | ''' |
||
297 | nb = 10 |
||
298 | vectors, entities = gen_binary_entities(nb) |
||
299 | ids = connect.insert(jac_collection, entities) |
||
300 | connect.flush([jac_collection]) |
||
301 | stats = connect.get_collection_stats(jac_collection) |
||
302 | vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["name"]) |
||
303 | # vector_ids should match ids |
||
304 | assert len(vector_ids) == nb |
||
305 | for i in range(nb): |
||
306 | assert vector_ids[i] == ids[i] |
||
307 | |||
308 | View Code Duplication | def test_list_id_in_segment_without_index_B(self, connect, jac_collection): |
|
309 | ''' |
||
310 | target: get vector ids when there is no index but with partition |
||
311 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
312 | expected: status ok |
||
313 | ''' |
||
314 | connect.create_partition(jac_collection, tag) |
||
315 | nb = 10 |
||
316 | vectors, entities = gen_binary_entities(nb) |
||
317 | ids = connect.insert(jac_collection, entities, partition_tag=tag) |
||
318 | connect.flush([jac_collection]) |
||
319 | stats = connect.get_collection_stats(jac_collection) |
||
320 | vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["name"]) |
||
321 | # vector_ids should match ids |
||
322 | assert len(vector_ids) == nb |
||
323 | for i in range(nb): |
||
324 | assert vector_ids[i] == ids[i] |
||
325 | |||
326 | @pytest.fixture( |
||
327 | scope="function", |
||
328 | params=gen_simple_index() |
||
329 | ) |
||
330 | def get_jaccard_index(self, request, connect): |
||
331 | logging.getLogger().info(request.param) |
||
332 | if request.param["index_type"] in binary_support(): |
||
333 | return request.param |
||
334 | else: |
||
335 | pytest.skip("not support") |
||
336 | |||
337 | def test_list_id_in_segment_with_index_A(self, connect, jac_collection, get_jaccard_index): |
||
338 | ''' |
||
339 | target: get vector ids when there is index |
||
340 | method: call list_id_in_segment and check if the segment contains vectors |
||
341 | expected: status ok |
||
342 | ''' |
||
343 | ids, name = get_segment_name(connect, jac_collection, nb=nb, index_params=get_jaccard_index, vec_type='binary') |
||
344 | vector_ids = connect.list_id_in_segment(jac_collection, name) |
||
345 | # TODO: |
||
346 | |||
347 | View Code Duplication | def test_list_id_in_segment_with_index_B(self, connect, jac_collection, get_jaccard_index): |
|
348 | ''' |
||
349 | target: get vector ids when there is index and with partition |
||
350 | method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
||
351 | expected: status ok |
||
352 | ''' |
||
353 | connect.create_partition(jac_collection, tag) |
||
354 | ids = connect.insert(jac_collection, entities, partition_tag=tag) |
||
355 | connect.flush([jac_collection]) |
||
356 | stats = connect.get_collection_stats(jac_collection) |
||
357 | assert stats["partitions"][1]["tag"] == tag |
||
358 | vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][1]["segments"][0]["name"]) |
||
359 | # vector_ids should match ids |
||
360 | # TODO |
||
361 | |||
362 | View Code Duplication | def test_list_id_in_segment_after_delete_vectors(self, connect, jac_collection, get_jaccard_index): |
|
363 | ''' |
||
364 | target: get vector ids after vectors are deleted |
||
365 | method: add vectors and delete a few, call list_id_in_segment |
||
366 | expected: status ok, vector_ids decreased after vectors deleted |
||
367 | ''' |
||
368 | nb = 2 |
||
369 | ids, name = get_segment_name(connect, jac_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index) |
||
370 | delete_ids = [ids[0]] |
||
371 | status = connect.delete_entity_by_id(jac_collection, delete_ids) |
||
372 | connect.flush([jac_collection]) |
||
373 | stats = connect.get_collection_stats(jac_collection) |
||
374 | vector_ids = connect.list_id_in_segment(jac_collection, stats["partitions"][0]["segments"][0]["name"]) |
||
375 | assert len(vector_ids) == 1 |
||
376 | assert vector_ids[0] == ids[1] |
||
377 |