Total Complexity | 42 |
Total Lines | 726 |
Duplicated Lines | 49.04 % |
Changes | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like test_compact often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | import time |
||
2 | import pdb |
||
3 | import threading |
||
4 | import logging |
||
5 | from multiprocessing import Pool, Process |
||
6 | import pytest |
||
7 | from utils import * |
||
8 | |||
9 | dim = 128 |
||
10 | index_file_size = 10 |
||
11 | COMPACT_TIMEOUT = 180 |
||
12 | nprobe = 1 |
||
13 | top_k = 1 |
||
14 | tag = "1970-01-01" |
||
15 | nb = 6000 |
||
16 | segment_row_count = 5000 |
||
17 | entity = gen_entities(1) |
||
18 | entities = gen_entities(nb) |
||
19 | raw_vector, binary_entity = gen_binary_entities(1) |
||
20 | raw_vectors, binary_entities = gen_binary_entities(nb) |
||
21 | default_fields = gen_default_fields() |
||
22 | default_binary_fields = gen_binary_default_fields() |
||
23 | field_name = default_float_vec_field_name |
||
24 | default_single_query = { |
||
25 | "bool": { |
||
26 | "must": [ |
||
27 | {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim), |
||
28 | "params": {"nprobe": 10}}}} |
||
29 | ] |
||
30 | } |
||
31 | } |
||
32 | |||
33 | def ip_query(): |
||
34 | query = copy.deepcopy(default_single_query) |
||
35 | query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"}) |
||
36 | return query |
||
37 | |||
38 | |||
39 | class TestCompactBase: |
||
40 | """ |
||
41 | ****************************************************************** |
||
42 | The following cases are used to test `compact` function |
||
43 | ****************************************************************** |
||
44 | """ |
||
45 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
46 | def test_compact_collection_name_None(self, connect, collection): |
||
47 | ''' |
||
48 | target: compact collection where collection name is None |
||
49 | method: compact with the collection_name: None |
||
50 | expected: exception raised |
||
51 | ''' |
||
52 | collection_name = None |
||
53 | with pytest.raises(Exception) as e: |
||
54 | status = connect.compact(collection_name) |
||
55 | |||
56 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
57 | def test_compact_collection_name_not_existed(self, connect, collection): |
||
58 | ''' |
||
59 | target: compact collection not existed |
||
60 | method: compact with a random collection_name, which is not in db |
||
61 | expected: exception raised |
||
62 | ''' |
||
63 | collection_name = gen_unique_str("not_existed") |
||
64 | with pytest.raises(Exception) as e: |
||
65 | status = connect.compact(collection_name) |
||
66 | |||
67 | @pytest.fixture( |
||
68 | scope="function", |
||
69 | params=gen_invalid_strs() |
||
70 | ) |
||
71 | def get_collection_name(self, request): |
||
72 | yield request.param |
||
73 | |||
74 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
75 | def test_compact_collection_name_invalid(self, connect, get_collection_name): |
||
76 | ''' |
||
77 | target: compact collection with invalid name |
||
78 | method: compact with invalid collection_name |
||
79 | expected: exception raised |
||
80 | ''' |
||
81 | collection_name = get_collection_name |
||
82 | with pytest.raises(Exception) as e: |
||
83 | status = connect.compact(collection_name) |
||
84 | # assert not status.OK() |
||
85 | |||
86 | View Code Duplication | @pytest.mark.level(2) |
|
|
|||
87 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
88 | def test_add_entity_and_compact(self, connect, collection): |
||
89 | ''' |
||
90 | target: test add entity and compact |
||
91 | method: add entity and compact collection |
||
92 | expected: data_size before and after Compact |
||
93 | ''' |
||
94 | # vector = gen_single_vector(dim) |
||
95 | ids = connect.insert(collection, entity) |
||
96 | assert len(ids) == 1 |
||
97 | connect.flush([collection]) |
||
98 | # get collection info before compact |
||
99 | info = connect.get_collection_stats(collection) |
||
100 | logging.getLogger().info(info) |
||
101 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
102 | status = connect.compact(collection) |
||
103 | assert status.OK() |
||
104 | # get collection info after compact |
||
105 | info = connect.get_collection_stats(collection) |
||
106 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
107 | assert(size_before == size_after) |
||
108 | |||
109 | # TODO |
||
110 | View Code Duplication | @pytest.mark.level(2) |
|
111 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
112 | def test_insert_and_compact(self, connect, collection): |
||
113 | ''' |
||
114 | target: test add entities and compact |
||
115 | method: add entities and compact collection |
||
116 | expected: data_size before and after Compact |
||
117 | ''' |
||
118 | # entities = gen_vector(nb, dim) |
||
119 | ids = connect.insert(collection, entities) |
||
120 | connect.flush([collection]) |
||
121 | # get collection info before compact |
||
122 | info = connect.get_collection_stats(collection) |
||
123 | # assert status.OK() |
||
124 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
125 | status = connect.compact(collection) |
||
126 | assert status.OK() |
||
127 | # get collection info after compact |
||
128 | info = connect.get_collection_stats(collection) |
||
129 | # assert status.OK() |
||
130 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
131 | assert(size_before == size_after) |
||
132 | |||
133 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
134 | @pytest.mark.skip(reason="delete not support yet") |
||
135 | def test_insert_delete_part_and_compact(self, connect, collection): |
||
136 | ''' |
||
137 | target: test add entities, delete part of them and compact |
||
138 | method: add entities, delete a few and compact collection |
||
139 | expected: status ok, data size maybe is smaller after compact |
||
140 | ''' |
||
141 | ids = connect.insert(collection, entities) |
||
142 | assert len(ids) == nb |
||
143 | connect.flush([collection]) |
||
144 | delete_ids = [ids[0], ids[-1]] |
||
145 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
146 | assert status.OK() |
||
147 | connect.flush([collection]) |
||
148 | # get collection info before compact |
||
149 | info = connect.get_collection_stats(collection) |
||
150 | logging.getLogger().info(info["partitions"]) |
||
151 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
152 | logging.getLogger().info(size_before) |
||
153 | status = connect.compact(collection) |
||
154 | assert status.OK() |
||
155 | # get collection info after compact |
||
156 | info = connect.get_collection_stats(collection) |
||
157 | logging.getLogger().info(info["partitions"]) |
||
158 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
159 | logging.getLogger().info(size_after) |
||
160 | assert(size_before >= size_after) |
||
161 | |||
162 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
163 | @pytest.mark.skip(reason="delete not support yet") |
||
164 | def test_insert_delete_all_and_compact(self, connect, collection): |
||
165 | ''' |
||
166 | target: test add entities, delete them and compact |
||
167 | method: add entities, delete all and compact collection |
||
168 | expected: status ok, no data size in collection info because collection is empty |
||
169 | ''' |
||
170 | ids = connect.insert(collection, entities) |
||
171 | assert len(ids) == nb |
||
172 | connect.flush([collection]) |
||
173 | status = connect.delete_entity_by_id(collection, ids) |
||
174 | assert status.OK() |
||
175 | connect.flush([collection]) |
||
176 | # get collection info before compact |
||
177 | info = connect.get_collection_stats(collection) |
||
178 | status = connect.compact(collection) |
||
179 | assert status.OK() |
||
180 | # get collection info after compact |
||
181 | info = connect.get_collection_stats(collection) |
||
182 | logging.getLogger().info(info["partitions"]) |
||
183 | assert not info["partitions"][0]["segments"] |
||
184 | |||
185 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
186 | @pytest.mark.skip(reason="delete not support yet") |
||
187 | def test_insert_partition_delete_half_and_compact(self, connect, collection): |
||
188 | ''' |
||
189 | target: test add entities into partition, delete them and compact |
||
190 | method: add entities, delete half of entities in partition and compact collection |
||
191 | expected: status ok, data_size less than the older version |
||
192 | ''' |
||
193 | connect.create_partition(collection, tag) |
||
194 | assert connect.has_partition(collection, tag) |
||
195 | ids = connect.insert(collection, entities, partition_tag=tag) |
||
196 | connect.flush([collection]) |
||
197 | info = connect.get_collection_stats(collection) |
||
198 | logging.getLogger().info(info["partitions"]) |
||
199 | |||
200 | delete_ids = ids[:3000] |
||
201 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
202 | assert status.OK() |
||
203 | connect.flush([collection]) |
||
204 | # get collection info before compact |
||
205 | info = connect.get_collection_stats(collection) |
||
206 | logging.getLogger().info(info["partitions"]) |
||
207 | status = connect.compact(collection) |
||
208 | assert status.OK() |
||
209 | # get collection info after compact |
||
210 | info_after = connect.get_collection_stats(collection) |
||
211 | logging.getLogger().info(info_after["partitions"]) |
||
212 | assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"] |
||
213 | |||
214 | View Code Duplication | @pytest.fixture( |
|
215 | scope="function", |
||
216 | params=gen_simple_index() |
||
217 | ) |
||
218 | def get_simple_index(self, request, connect): |
||
219 | if str(connect._cmd("mode")) == "GPU": |
||
220 | if not request.param["index_type"] not in ivf(): |
||
221 | pytest.skip("Only support index_type: idmap/ivf") |
||
222 | if str(connect._cmd("mode")) == "CPU": |
||
223 | if request.param["index_type"] in index_cpu_not_support(): |
||
224 | pytest.skip("CPU not support index_type: ivf_sq8h") |
||
225 | return request.param |
||
226 | |||
227 | @pytest.mark.skip(reason="create_index not support yet") |
||
228 | def test_compact_after_index_created(self, connect, collection, get_simple_index): |
||
229 | ''' |
||
230 | target: test compact collection after index created |
||
231 | method: add entities, create index, delete part of entities and compact |
||
232 | expected: status ok, index description no change, data size smaller after compact |
||
233 | ''' |
||
234 | count = 10 |
||
235 | ids = connect.insert(collection, entities) |
||
236 | connect.flush([collection]) |
||
237 | connect.create_index(collection, field_name, get_simple_index) |
||
238 | connect.flush([collection]) |
||
239 | # get collection info before compact |
||
240 | info = connect.get_collection_stats(collection) |
||
241 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
242 | logging.getLogger().info(info["partitions"]) |
||
243 | delete_ids = [ids[0], ids[-1]] |
||
244 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
245 | assert status.OK() |
||
246 | connect.flush([collection]) |
||
247 | status = connect.compact(collection) |
||
248 | assert status.OK() |
||
249 | # get collection info after compact |
||
250 | info = connect.get_collection_stats(collection) |
||
251 | logging.getLogger().info(info["partitions"]) |
||
252 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
253 | assert(size_before >= size_after) |
||
254 | |||
255 | # TODO |
||
256 | View Code Duplication | @pytest.mark.level(2) |
|
257 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
258 | def test_add_entity_and_compact_twice(self, connect, collection): |
||
259 | ''' |
||
260 | target: test add entity and compact twice |
||
261 | method: add entity and compact collection twice |
||
262 | expected: status ok, data size no change |
||
263 | ''' |
||
264 | ids = connect.insert(collection, entity) |
||
265 | connect.flush([collection]) |
||
266 | # get collection info before compact |
||
267 | info = connect.get_collection_stats(collection) |
||
268 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
269 | status = connect.compact(collection) |
||
270 | assert status.OK() |
||
271 | connect.flush([collection]) |
||
272 | # get collection info after compact |
||
273 | info = connect.get_collection_stats(collection) |
||
274 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
275 | assert(size_before == size_after) |
||
276 | status = connect.compact(collection) |
||
277 | assert status.OK() |
||
278 | # get collection info after compact twice |
||
279 | info = connect.get_collection_stats(collection) |
||
280 | size_after_twice = info["partitions"][0]["segments"][0]["data_size"] |
||
281 | assert(size_after == size_after_twice) |
||
282 | |||
283 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
284 | @pytest.mark.skip(reason="delete not support yet") |
||
285 | def test_insert_delete_part_and_compact_twice(self, connect, collection): |
||
286 | ''' |
||
287 | target: test add entities, delete part of them and compact twice |
||
288 | method: add entities, delete part and compact collection twice |
||
289 | expected: status ok, data size smaller after first compact, no change after second |
||
290 | ''' |
||
291 | ids = connect.insert(collection, entities) |
||
292 | connect.flush([collection]) |
||
293 | delete_ids = [ids[0], ids[-1]] |
||
294 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
295 | assert status.OK() |
||
296 | connect.flush([collection]) |
||
297 | # get collection info before compact |
||
298 | info = connect.get_collection_stats(collection) |
||
299 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
300 | status = connect.compact(collection) |
||
301 | assert status.OK() |
||
302 | # get collection info after compact |
||
303 | info = connect.get_collection_stats(collection) |
||
304 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
305 | assert(size_before >= size_after) |
||
306 | status = connect.compact(collection) |
||
307 | assert status.OK() |
||
308 | # get collection info after compact twice |
||
309 | info = connect.get_collection_stats(collection) |
||
310 | size_after_twice = info["partitions"][0]["segments"][0]["data_size"] |
||
311 | assert(size_after == size_after_twice) |
||
312 | |||
313 | # TODO |
||
314 | @pytest.mark.level(2) |
||
315 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
316 | def test_compact_multi_collections(self, connect): |
||
317 | ''' |
||
318 | target: test compact works or not with multiple collections |
||
319 | method: create 50 collections, add entities into them and compact in turn |
||
320 | expected: status ok |
||
321 | ''' |
||
322 | nq = 100 |
||
323 | num_collections = 50 |
||
324 | entities = gen_entities(nq) |
||
325 | collection_list = [] |
||
326 | for i in range(num_collections): |
||
327 | collection_name = gen_unique_str("test_compact_multi_collection_%d" % i) |
||
328 | collection_list.append(collection_name) |
||
329 | connect.create_collection(collection_name, default_fields) |
||
330 | time.sleep(6) |
||
331 | for i in range(num_collections): |
||
332 | ids = connect.insert(collection_list[i], entities) |
||
333 | status = connect.compact(collection_list[i]) |
||
334 | assert status.OK() |
||
335 | |||
336 | View Code Duplication | @pytest.mark.level(2) |
|
337 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
338 | def test_add_entity_after_compact(self, connect, collection): |
||
339 | ''' |
||
340 | target: test add entity after compact |
||
341 | method: after compact operation, add entity |
||
342 | expected: status ok, entity added |
||
343 | ''' |
||
344 | ids = connect.insert(collection, entities) |
||
345 | assert len(ids) == nb |
||
346 | connect.flush([collection]) |
||
347 | # get collection info before compact |
||
348 | info = connect.get_collection_stats(collection) |
||
349 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
350 | status = connect.compact(collection) |
||
351 | assert status.OK() |
||
352 | # get collection info after compact |
||
353 | info = connect.get_collection_stats(collection) |
||
354 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
355 | assert(size_before == size_after) |
||
356 | ids = connect.insert(collection, entity) |
||
357 | connect.flush([collection]) |
||
358 | res = connect.count_entities(collection) |
||
359 | assert res == nb+1 |
||
360 | |||
361 | @pytest.mark.skip(reason="delete not support yet") |
||
362 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
363 | def test_index_creation_after_compact(self, connect, collection, get_simple_index): |
||
364 | ''' |
||
365 | target: test index creation after compact |
||
366 | method: after compact operation, create index |
||
367 | expected: status ok, index description no change |
||
368 | ''' |
||
369 | ids = connect.insert(collection, entities) |
||
370 | connect.flush([collection]) |
||
371 | status = connect.delete_entity_by_id(collection, ids[:10]) |
||
372 | assert status.OK() |
||
373 | connect.flush([collection]) |
||
374 | status = connect.compact(collection) |
||
375 | assert status.OK() |
||
376 | status = connect.create_index(collection, field_name, get_simple_index) |
||
377 | assert status.OK() |
||
378 | # status, result = connect.get_index_info(collection) |
||
379 | |||
380 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
381 | @pytest.mark.skip(reason="delete not support yet") |
||
382 | def test_delete_entities_after_compact(self, connect, collection): |
||
383 | ''' |
||
384 | target: test delete entities after compact |
||
385 | method: after compact operation, delete entities |
||
386 | expected: status ok, entities deleted |
||
387 | ''' |
||
388 | ids = connect.insert(collection, entities) |
||
389 | assert len(ids) == nb |
||
390 | connect.flush([collection]) |
||
391 | status = connect.compact(collection) |
||
392 | assert status.OK() |
||
393 | connect.flush([collection]) |
||
394 | status = connect.delete_entity_by_id(collection, ids) |
||
395 | assert status.OK() |
||
396 | connect.flush([collection]) |
||
397 | assert connect.count_entities(collection) == 0 |
||
398 | |||
399 | @pytest.mark.skip(reason="search not support yet") |
||
400 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
401 | def test_search_after_compact(self, connect, collection): |
||
402 | ''' |
||
403 | target: test search after compact |
||
404 | method: after compact operation, search vector |
||
405 | expected: status ok |
||
406 | ''' |
||
407 | ids = connect.insert(collection, entities) |
||
408 | assert len(ids) == nb |
||
409 | connect.flush([collection]) |
||
410 | status = connect.compact(collection) |
||
411 | assert status.OK() |
||
412 | query = copy.deepcopy(default_single_query) |
||
413 | query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0], |
||
414 | entities[-1]["values"][-1]] |
||
415 | res = connect.search(collection, query) |
||
416 | logging.getLogger().debug(res) |
||
417 | assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"]) |
||
418 | assert res[0]._distances[0] > epsilon |
||
419 | assert res[1]._distances[0] < epsilon |
||
420 | assert res[2]._distances[0] < epsilon |
||
421 | |||
422 | # TODO: enable |
||
423 | @pytest.mark.skip(reason="delete not support yet") |
||
424 | def _test_compact_server_crashed_recovery(self, connect, collection): |
||
425 | ''' |
||
426 | target: test compact when server crashed unexpectedly and restarted |
||
427 | method: add entities, delete and compact collection; server stopped and restarted during compact |
||
428 | expected: status ok, request recovered |
||
429 | ''' |
||
430 | entities = gen_vectors(nb * 100, dim) |
||
431 | status, ids = connect.insert(collection, entities) |
||
432 | assert status.OK() |
||
433 | status = connect.flush([collection]) |
||
434 | assert status.OK() |
||
435 | delete_ids = ids[0:1000] |
||
436 | status = connect.delete_entity_by_id(collection, delete_ids) |
||
437 | assert status.OK() |
||
438 | status = connect.flush([collection]) |
||
439 | assert status.OK() |
||
440 | # start to compact, kill and restart server |
||
441 | logging.getLogger().info("compact starting...") |
||
442 | status = connect.compact(collection) |
||
443 | # pdb.set_trace() |
||
444 | assert status.OK() |
||
445 | # get collection info after compact |
||
446 | status, info = connect.get_collection_stats(collection) |
||
447 | assert status.OK() |
||
448 | assert info["partitions"][0].count == nb * 100 - 1000 |
||
449 | |||
450 | |||
451 | class TestCompactBinary: |
||
452 | """ |
||
453 | ****************************************************************** |
||
454 | The following cases are used to test `compact` function |
||
455 | ****************************************************************** |
||
456 | """ |
||
457 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
458 | # TODO |
||
459 | @pytest.mark.level(2) |
||
460 | def test_add_entity_and_compact(self, connect, binary_collection): |
||
461 | ''' |
||
462 | target: test add binary vector and compact |
||
463 | method: add vector and compact collection |
||
464 | expected: status ok, vector added |
||
465 | ''' |
||
466 | ids = connect.insert(binary_collection, binary_entity) |
||
467 | assert len(ids) == 1 |
||
468 | connect.flush([binary_collection]) |
||
469 | # get collection info before compact |
||
470 | info = connect.get_collection_stats(binary_collection) |
||
471 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
472 | status = connect.compact(binary_collection) |
||
473 | assert status.OK() |
||
474 | # get collection info after compact |
||
475 | info = connect.get_collection_stats(binary_collection) |
||
476 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
477 | assert(size_before == size_after) |
||
478 | |||
479 | # TODO |
||
480 | View Code Duplication | @pytest.mark.level(2) |
|
481 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
482 | def test_insert_and_compact(self, connect, binary_collection): |
||
483 | ''' |
||
484 | target: test add entities with binary vector and compact |
||
485 | method: add entities and compact collection |
||
486 | expected: status ok, entities added |
||
487 | ''' |
||
488 | ids = connect.insert(binary_collection, binary_entities) |
||
489 | assert len(ids) == nb |
||
490 | connect.flush([binary_collection]) |
||
491 | # get collection info before compact |
||
492 | info = connect.get_collection_stats(binary_collection) |
||
493 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
494 | status = connect.compact(binary_collection) |
||
495 | assert status.OK() |
||
496 | # get collection info after compact |
||
497 | info = connect.get_collection_stats(binary_collection) |
||
498 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
499 | assert(size_before == size_after) |
||
500 | |||
501 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
502 | @pytest.mark.skip(reason="delete not support yet") |
||
503 | def test_insert_delete_part_and_compact(self, connect, binary_collection): |
||
504 | ''' |
||
505 | target: test add entities, delete part of them and compact |
||
506 | method: add entities, delete a few and compact collection |
||
507 | expected: status ok, data size is smaller after compact |
||
508 | ''' |
||
509 | ids = connect.insert(binary_collection, binary_entities) |
||
510 | assert len(ids) == nb |
||
511 | connect.flush([binary_collection]) |
||
512 | delete_ids = [ids[0], ids[-1]] |
||
513 | status = connect.delete_entity_by_id(binary_collection, delete_ids) |
||
514 | assert status.OK() |
||
515 | connect.flush([binary_collection]) |
||
516 | # get collection info before compact |
||
517 | info = connect.get_collection_stats(binary_collection) |
||
518 | logging.getLogger().info(info["partitions"]) |
||
519 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
520 | logging.getLogger().info(size_before) |
||
521 | status = connect.compact(binary_collection) |
||
522 | assert status.OK() |
||
523 | # get collection info after compact |
||
524 | info = connect.get_collection_stats(binary_collection) |
||
525 | logging.getLogger().info(info["partitions"]) |
||
526 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
527 | logging.getLogger().info(size_after) |
||
528 | assert(size_before >= size_after) |
||
529 | |||
530 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
531 | @pytest.mark.skip(reason="delete not support yet") |
||
532 | def test_insert_delete_all_and_compact(self, connect, binary_collection): |
||
533 | ''' |
||
534 | target: test add entities, delete them and compact |
||
535 | method: add entities, delete all and compact collection |
||
536 | expected: status ok, no data size in collection info because collection is empty |
||
537 | ''' |
||
538 | ids = connect.insert(binary_collection, binary_entities) |
||
539 | assert len(ids) == nb |
||
540 | connect.flush([binary_collection]) |
||
541 | status = connect.delete_entity_by_id(binary_collection, ids) |
||
542 | assert status.OK() |
||
543 | connect.flush([binary_collection]) |
||
544 | # get collection info before compact |
||
545 | info = connect.get_collection_stats(binary_collection) |
||
546 | status = connect.compact(binary_collection) |
||
547 | assert status.OK() |
||
548 | # get collection info after compact |
||
549 | info = connect.get_collection_stats(binary_collection) |
||
550 | assert status.OK() |
||
551 | logging.getLogger().info(info["partitions"]) |
||
552 | assert not info["partitions"][0]["segments"] |
||
553 | |||
554 | # TODO |
||
555 | View Code Duplication | @pytest.mark.level(2) |
|
556 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
557 | def test_add_entity_and_compact_twice(self, connect, binary_collection): |
||
558 | ''' |
||
559 | target: test add entity and compact twice |
||
560 | method: add entity and compact collection twice |
||
561 | expected: status ok |
||
562 | ''' |
||
563 | ids = connect.insert(binary_collection, binary_entity) |
||
564 | assert len(ids) == 1 |
||
565 | connect.flush([binary_collection]) |
||
566 | # get collection info before compact |
||
567 | info = connect.get_collection_stats(binary_collection) |
||
568 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
569 | status = connect.compact(binary_collection) |
||
570 | assert status.OK() |
||
571 | # get collection info after compact |
||
572 | info = connect.get_collection_stats(binary_collection) |
||
573 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
574 | assert(size_before == size_after) |
||
575 | status = connect.compact(binary_collection) |
||
576 | assert status.OK() |
||
577 | # get collection info after compact twice |
||
578 | info = connect.get_collection_stats(binary_collection) |
||
579 | size_after_twice = info["partitions"][0]["segments"][0]["data_size"] |
||
580 | assert(size_after == size_after_twice) |
||
581 | |||
582 | View Code Duplication | @pytest.mark.timeout(COMPACT_TIMEOUT) |
|
583 | @pytest.mark.skip(reason="delete not support yet") |
||
584 | def test_insert_delete_part_and_compact_twice(self, connect, binary_collection): |
||
585 | ''' |
||
586 | target: test add entities, delete part of them and compact twice |
||
587 | method: add entities, delete part and compact collection twice |
||
588 | expected: status ok, data size smaller after first compact, no change after second |
||
589 | ''' |
||
590 | ids = connect.insert(binary_collection, binary_entities) |
||
591 | assert len(ids) == nb |
||
592 | connect.flush([binary_collection]) |
||
593 | delete_ids = [ids[0], ids[-1]] |
||
594 | status = connect.delete_entity_by_id(binary_collection, delete_ids) |
||
595 | assert status.OK() |
||
596 | connect.flush([binary_collection]) |
||
597 | # get collection info before compact |
||
598 | info = connect.get_collection_stats(binary_collection) |
||
599 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
600 | status = connect.compact(binary_collection) |
||
601 | assert status.OK() |
||
602 | # get collection info after compact |
||
603 | info = connect.get_collection_stats(binary_collection) |
||
604 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
605 | assert(size_before >= size_after) |
||
606 | status = connect.compact(binary_collection) |
||
607 | assert status.OK() |
||
608 | # get collection info after compact twice |
||
609 | info = connect.get_collection_stats(binary_collection) |
||
610 | size_after_twice = info["partitions"][0]["segments"][0]["data_size"] |
||
611 | assert(size_after == size_after_twice) |
||
612 | |||
613 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
614 | @pytest.mark.skip(reason="delete not support yet") |
||
615 | def test_compact_multi_collections(self, connect): |
||
616 | ''' |
||
617 | target: test compact works or not with multiple collections |
||
618 | method: create 10 collections, add entities into them and compact in turn |
||
619 | expected: status ok |
||
620 | ''' |
||
621 | nq = 100 |
||
622 | num_collections = 10 |
||
623 | tmp, entities = gen_binary_entities(nq) |
||
624 | collection_list = [] |
||
625 | for i in range(num_collections): |
||
626 | collection_name = gen_unique_str("test_compact_multi_collection_%d" % i) |
||
627 | collection_list.append(collection_name) |
||
628 | connect.create_collection(collection_name, default_fields) |
||
629 | for i in range(num_collections): |
||
630 | ids = connect.insert(collection_list[i], entities) |
||
631 | assert len(ids) == nq |
||
632 | status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]]) |
||
633 | assert status.OK() |
||
634 | connect.flush([collection_list[i]]) |
||
635 | status = connect.compact(collection_list[i]) |
||
636 | assert status.OK() |
||
637 | status = connect.drop_collection(collection_list[i]) |
||
638 | assert status.OK() |
||
639 | |||
640 | View Code Duplication | @pytest.mark.level(2) |
|
641 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
642 | def test_add_entity_after_compact(self, connect, binary_collection): |
||
643 | ''' |
||
644 | target: test add entity after compact |
||
645 | method: after compact operation, add entity |
||
646 | expected: status ok, entity added |
||
647 | ''' |
||
648 | ids = connect.insert(binary_collection, binary_entities) |
||
649 | connect.flush([binary_collection]) |
||
650 | # get collection info before compact |
||
651 | info = connect.get_collection_stats(binary_collection) |
||
652 | size_before = info["partitions"][0]["segments"][0]["data_size"] |
||
653 | status = connect.compact(binary_collection) |
||
654 | assert status.OK() |
||
655 | # get collection info after compact |
||
656 | info = connect.get_collection_stats(binary_collection) |
||
657 | size_after = info["partitions"][0]["segments"][0]["data_size"] |
||
658 | assert(size_before == size_after) |
||
659 | ids = connect.insert(binary_collection, binary_entity) |
||
660 | connect.flush([binary_collection]) |
||
661 | res = connect.count_entities(binary_collection) |
||
662 | assert res == nb + 1 |
||
663 | |||
664 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
665 | @pytest.mark.skip(reason="delete not support yet") |
||
666 | def test_delete_entities_after_compact(self, connect, binary_collection): |
||
667 | ''' |
||
668 | target: test delete entities after compact |
||
669 | method: after compact operation, delete entities |
||
670 | expected: status ok, entities deleted |
||
671 | ''' |
||
672 | ids = connect.insert(binary_collection, binary_entities) |
||
673 | connect.flush([binary_collection]) |
||
674 | status = connect.compact(binary_collection) |
||
675 | assert status.OK() |
||
676 | connect.flush([binary_collection]) |
||
677 | status = connect.delete_entity_by_id(binary_collection, ids) |
||
678 | assert status.OK() |
||
679 | connect.flush([binary_collection]) |
||
680 | res = connect.count_entities(binary_collection) |
||
681 | assert res == 0 |
||
682 | |||
683 | @pytest.mark.skip(reason="search not support yet") |
||
684 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
685 | def test_search_after_compact(self, connect, binary_collection): |
||
686 | ''' |
||
687 | target: test search after compact |
||
688 | method: after compact operation, search vector |
||
689 | expected: status ok |
||
690 | ''' |
||
691 | ids = connect.insert(binary_collection, binary_entities) |
||
692 | assert len(ids) == nb |
||
693 | connect.flush([binary_collection]) |
||
694 | status = connect.compact(binary_collection) |
||
695 | assert status.OK() |
||
696 | query_vecs = [raw_vectors[0]] |
||
697 | distance = jaccard(query_vecs[0], raw_vectors[0]) |
||
698 | query = copy.deepcopy(default_single_query) |
||
699 | query["bool"]["must"][0]["vector"][field_name]["query"] = [binary_entities[-1]["values"][0], |
||
700 | binary_entities[-1]["values"][-1]] |
||
701 | res = connect.search(binary_collection, query) |
||
702 | assert abs(res[0]._distances[0]-distance) <= epsilon |
||
703 | |||
704 | # TODO: |
||
705 | @pytest.mark.skip(reason="search not support yet") |
||
706 | @pytest.mark.timeout(COMPACT_TIMEOUT) |
||
707 | def test_search_after_compact_ip(self, connect, collection): |
||
708 | ''' |
||
709 | target: test search after compact |
||
710 | method: after compact operation, search vector |
||
711 | expected: status ok |
||
712 | ''' |
||
713 | ids = connect.insert(collection, entities) |
||
714 | assert len(ids) == nb |
||
715 | connect.flush([collection]) |
||
716 | status = connect.compact(collection) |
||
717 | query = ip_query() |
||
718 | query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0], |
||
719 | entities[-1]["values"][-1]] |
||
720 | res = connect.search(collection, query) |
||
721 | logging.getLogger().info(res) |
||
722 | assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"]) |
||
723 | assert res[0]._distances[0] < 1 - epsilon |
||
724 | assert res[1]._distances[0] > 1 - epsilon |
||
725 | assert res[2]._distances[0] > 1 - epsilon |
||
726 |