Passed
Push — master ( 4645fd...ef24a8 )
by
unknown
10:35 queued 08:28
created

TestCompactBase.test_search_after_compact()   A

Complexity

Conditions 1

Size

Total Lines 22
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 17
nop 3
dl 0
loc 22
rs 9.55
c 0
b 0
f 0
1
import time
2
import pdb
3
import threading
4
import logging
5
from multiprocessing import Pool, Process
6
import pytest
7
from utils import *
8
9
dim = 128
10
index_file_size = 10
11
COMPACT_TIMEOUT = 180
12
nprobe = 1
13
top_k = 1
14
tag = "1970-01-01"
15
nb = 6000
16
segment_row_count = 5000
17
entity = gen_entities(1)
18
entities = gen_entities(nb)
19
raw_vector, binary_entity = gen_binary_entities(1)
20
raw_vectors, binary_entities = gen_binary_entities(nb)
21
default_fields = gen_default_fields()
22
default_binary_fields = gen_binary_default_fields()
23
field_name = default_float_vec_field_name
24
default_single_query = {
25
    "bool": {
26
        "must": [
27
            {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim),
28
                                     "params": {"nprobe": 10}}}}
29
        ]
30
    }
31
}
32
33
def ip_query():
34
    query = copy.deepcopy(default_single_query)
35
    query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
36
    return query
37
38
39
class TestCompactBase:
40
    """
41
    ******************************************************************
42
      The following cases are used to test `compact` function
43
    ******************************************************************
44
    """
45
    @pytest.mark.timeout(COMPACT_TIMEOUT)
46
    def test_compact_collection_name_None(self, connect, collection):
47
        '''
48
        target: compact collection where collection name is None
49
        method: compact with the collection_name: None
50
        expected: exception raised
51
        '''
52
        collection_name = None
53
        with pytest.raises(Exception) as e:
54
            status = connect.compact(collection_name)
55
56
    @pytest.mark.timeout(COMPACT_TIMEOUT)
57
    def test_compact_collection_name_not_existed(self, connect, collection):
58
        '''
59
        target: compact collection not existed
60
        method: compact with a random collection_name, which is not in db
61
        expected: exception raised
62
        '''
63
        collection_name = gen_unique_str("not_existed")
64
        with pytest.raises(Exception) as e:
65
            status = connect.compact(collection_name)
66
    
67
    @pytest.fixture(
68
        scope="function",
69
        params=gen_invalid_strs()
70
    )
71
    def get_collection_name(self, request):
72
        yield request.param
73
74
    @pytest.mark.timeout(COMPACT_TIMEOUT)
75
    def test_compact_collection_name_invalid(self, connect, get_collection_name):
76
        '''
77
        target: compact collection with invalid name
78
        method: compact with invalid collection_name
79
        expected: exception raised
80
        '''
81
        collection_name = get_collection_name
82
        with pytest.raises(Exception) as e:
83
            status = connect.compact(collection_name)
84
            # assert not status.OK()
85
    
86 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
87
    @pytest.mark.timeout(COMPACT_TIMEOUT)
88
    def test_add_entity_and_compact(self, connect, collection):
89
        '''
90
        target: test add entity and compact
91
        method: add entity and compact collection
92
        expected: data_size before and after Compact
93
        '''
94
        # vector = gen_single_vector(dim)
95
        ids = connect.insert(collection, entity)
96
        assert len(ids) == 1
97
        connect.flush([collection])
98
        # get collection info before compact
99
        info = connect.get_collection_stats(collection)
100
        logging.getLogger().info(info)
101
        size_before = info["partitions"][0]["segments"][0]["data_size"]
102
        status = connect.compact(collection)
103
        assert status.OK()
104
        # get collection info after compact
105
        info = connect.get_collection_stats(collection)
106
        size_after = info["partitions"][0]["segments"][0]["data_size"]
107
        assert(size_before == size_after)
108
109
    # TODO
110 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
111
    @pytest.mark.timeout(COMPACT_TIMEOUT)
112
    def test_insert_and_compact(self, connect, collection):
113
        '''
114
        target: test add entities and compact 
115
        method: add entities and compact collection
116
        expected: data_size before and after Compact
117
        '''
118
        # entities = gen_vector(nb, dim)
119
        ids = connect.insert(collection, entities)
120
        connect.flush([collection])
121
        # get collection info before compact
122
        info = connect.get_collection_stats(collection)
123
        # assert status.OK()
124
        size_before = info["partitions"][0]["segments"][0]["data_size"]
125
        status = connect.compact(collection)
126
        assert status.OK()
127
        # get collection info after compact
128
        info = connect.get_collection_stats(collection)
129
        # assert status.OK()
130
        size_after = info["partitions"][0]["segments"][0]["data_size"]
131
        assert(size_before == size_after)
132
133 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
134
    @pytest.mark.skip(reason="delete not support yet")
135
    def test_insert_delete_part_and_compact(self, connect, collection):
136
        '''
137
        target: test add entities, delete part of them and compact
138
        method: add entities, delete a few and compact collection
139
        expected: status ok, data size maybe is smaller after compact
140
        '''
141
        ids = connect.insert(collection, entities)
142
        assert len(ids) == nb
143
        connect.flush([collection])
144
        delete_ids = [ids[0], ids[-1]]
145
        status = connect.delete_entity_by_id(collection, delete_ids)
146
        assert status.OK()
147
        connect.flush([collection])
148
        # get collection info before compact
149
        info = connect.get_collection_stats(collection)
150
        logging.getLogger().info(info["partitions"])
151
        size_before = info["partitions"][0]["segments"][0]["data_size"]
152
        logging.getLogger().info(size_before)
153
        status = connect.compact(collection)
154
        assert status.OK()
155
        # get collection info after compact
156
        info = connect.get_collection_stats(collection)
157
        logging.getLogger().info(info["partitions"])
158
        size_after = info["partitions"][0]["segments"][0]["data_size"]
159
        logging.getLogger().info(size_after)
160
        assert(size_before >= size_after)
161
    
162 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
163
    @pytest.mark.skip(reason="delete not support yet")
164
    def test_insert_delete_all_and_compact(self, connect, collection):
165
        '''
166
        target: test add entities, delete them and compact 
167
        method: add entities, delete all and compact collection
168
        expected: status ok, no data size in collection info because collection is empty
169
        '''
170
        ids = connect.insert(collection, entities)
171
        assert len(ids) == nb
172
        connect.flush([collection])
173
        status = connect.delete_entity_by_id(collection, ids)
174
        assert status.OK()
175
        connect.flush([collection])
176
        # get collection info before compact
177
        info = connect.get_collection_stats(collection)
178
        status = connect.compact(collection)
179
        assert status.OK()
180
        # get collection info after compact
181
        info = connect.get_collection_stats(collection)
182
        logging.getLogger().info(info["partitions"])
183
        assert not info["partitions"][0]["segments"]
184
185
    @pytest.mark.timeout(COMPACT_TIMEOUT)
186
    @pytest.mark.skip(reason="delete not support yet")
187
    def test_insert_partition_delete_half_and_compact(self, connect, collection):
188
        '''
189
        target: test add entities into partition, delete them and compact 
190
        method: add entities, delete half of entities in partition and compact collection
191
        expected: status ok, data_size less than the older version
192
        '''
193
        connect.create_partition(collection, tag)
194
        assert connect.has_partition(collection, tag)
195
        ids = connect.insert(collection, entities, partition_tag=tag)
196
        connect.flush([collection])
197
        info = connect.get_collection_stats(collection)
198
        logging.getLogger().info(info["partitions"])
199
200
        delete_ids = ids[:3000]
201
        status = connect.delete_entity_by_id(collection, delete_ids)
202
        assert status.OK()
203
        connect.flush([collection])
204
        # get collection info before compact
205
        info = connect.get_collection_stats(collection)
206
        logging.getLogger().info(info["partitions"])
207
        status = connect.compact(collection)
208
        assert status.OK()
209
        # get collection info after compact
210
        info_after = connect.get_collection_stats(collection)
211
        logging.getLogger().info(info_after["partitions"])
212
        assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"]
213
214 View Code Duplication
    @pytest.fixture(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
215
        scope="function",
216
        params=gen_simple_index()
217
    )
218
    def get_simple_index(self, request, connect):
219
        if str(connect._cmd("mode")) == "GPU":
220
            if not request.param["index_type"] not in ivf():
221
                pytest.skip("Only support index_type: idmap/ivf")
222
        if str(connect._cmd("mode")) == "CPU":
223
            if request.param["index_type"] in index_cpu_not_support():
224
                pytest.skip("CPU not support index_type: ivf_sq8h")
225
        return request.param
226
227
    @pytest.mark.skip(reason="create_index not support yet")
228
    def test_compact_after_index_created(self, connect, collection, get_simple_index):
229
        '''
230
        target: test compact collection after index created
231
        method: add entities, create index, delete part of entities and compact
232
        expected: status ok, index description no change, data size smaller after compact
233
        '''
234
        count = 10
235
        ids = connect.insert(collection, entities)
236
        connect.flush([collection])
237
        connect.create_index(collection, field_name, get_simple_index)
238
        connect.flush([collection])
239
        # get collection info before compact
240
        info = connect.get_collection_stats(collection)
241
        size_before = info["partitions"][0]["segments"][0]["data_size"]
242
        logging.getLogger().info(info["partitions"])
243
        delete_ids = [ids[0], ids[-1]]
244
        status = connect.delete_entity_by_id(collection, delete_ids)
245
        assert status.OK()
246
        connect.flush([collection])
247
        status = connect.compact(collection)
248
        assert status.OK()
249
        # get collection info after compact
250
        info = connect.get_collection_stats(collection)
251
        logging.getLogger().info(info["partitions"])
252
        size_after = info["partitions"][0]["segments"][0]["data_size"]
253
        assert(size_before >= size_after)
254
255
    # TODO
256 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
257
    @pytest.mark.timeout(COMPACT_TIMEOUT)
258
    def test_add_entity_and_compact_twice(self, connect, collection):
259
        '''
260
        target: test add entity and compact twice
261
        method: add entity and compact collection twice
262
        expected: status ok, data size no change
263
        '''
264
        ids = connect.insert(collection, entity)
265
        connect.flush([collection])
266
        # get collection info before compact
267
        info = connect.get_collection_stats(collection)
268
        size_before = info["partitions"][0]["segments"][0]["data_size"]
269
        status = connect.compact(collection)
270
        assert status.OK()
271
        connect.flush([collection])
272
        # get collection info after compact
273
        info = connect.get_collection_stats(collection)
274
        size_after = info["partitions"][0]["segments"][0]["data_size"]
275
        assert(size_before == size_after)
276
        status = connect.compact(collection)
277
        assert status.OK()
278
        # get collection info after compact twice
279
        info = connect.get_collection_stats(collection)
280
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
281
        assert(size_after == size_after_twice)
282
283 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
284
    @pytest.mark.skip(reason="delete not support yet")
285
    def test_insert_delete_part_and_compact_twice(self, connect, collection):
286
        '''
287
        target: test add entities, delete part of them and compact twice
288
        method: add entities, delete part and compact collection twice
289
        expected: status ok, data size smaller after first compact, no change after second
290
        '''
291
        ids = connect.insert(collection, entities)
292
        connect.flush([collection])
293
        delete_ids = [ids[0], ids[-1]]
294
        status = connect.delete_entity_by_id(collection, delete_ids)
295
        assert status.OK()
296
        connect.flush([collection])
297
        # get collection info before compact
298
        info = connect.get_collection_stats(collection)
299
        size_before = info["partitions"][0]["segments"][0]["data_size"]
300
        status = connect.compact(collection)
301
        assert status.OK()
302
        # get collection info after compact
303
        info = connect.get_collection_stats(collection)
304
        size_after = info["partitions"][0]["segments"][0]["data_size"]
305
        assert(size_before >= size_after)
306
        status = connect.compact(collection)
307
        assert status.OK()
308
        # get collection info after compact twice
309
        info = connect.get_collection_stats(collection)
310
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
311
        assert(size_after == size_after_twice)
312
313
    # TODO
314
    @pytest.mark.level(2)
315
    @pytest.mark.timeout(COMPACT_TIMEOUT)
316
    def test_compact_multi_collections(self, connect):
317
        '''
318
        target: test compact works or not with multiple collections
319
        method: create 50 collections, add entities into them and compact in turn
320
        expected: status ok
321
        '''
322
        nq = 100
323
        num_collections = 50
324
        entities = gen_entities(nq)
325
        collection_list = []
326
        for i in range(num_collections):
327
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
328
            collection_list.append(collection_name)
329
            connect.create_collection(collection_name, default_fields)
330
        time.sleep(6)
331
        for i in range(num_collections):
332
            ids = connect.insert(collection_list[i], entities)
333
            status = connect.compact(collection_list[i])
334
            assert status.OK()
335
336 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
337
    @pytest.mark.timeout(COMPACT_TIMEOUT)
338
    def test_add_entity_after_compact(self, connect, collection):
339
        '''
340
        target: test add entity after compact
341
        method: after compact operation, add entity
342
        expected: status ok, entity added
343
        '''
344
        ids = connect.insert(collection, entities)
345
        assert len(ids) == nb
346
        connect.flush([collection])
347
        # get collection info before compact
348
        info = connect.get_collection_stats(collection)
349
        size_before = info["partitions"][0]["segments"][0]["data_size"]
350
        status = connect.compact(collection)
351
        assert status.OK()
352
        # get collection info after compact
353
        info = connect.get_collection_stats(collection)
354
        size_after = info["partitions"][0]["segments"][0]["data_size"]
355
        assert(size_before == size_after)
356
        ids = connect.insert(collection, entity)
357
        connect.flush([collection])
358
        res = connect.count_entities(collection)
359
        assert res == nb+1
360
361
    @pytest.mark.skip(reason="delete not support yet")
362
    @pytest.mark.timeout(COMPACT_TIMEOUT)
363
    def test_index_creation_after_compact(self, connect, collection, get_simple_index):
364
        '''
365
        target: test index creation after compact
366
        method: after compact operation, create index
367
        expected: status ok, index description no change
368
        '''
369
        ids = connect.insert(collection, entities)
370
        connect.flush([collection])
371
        status = connect.delete_entity_by_id(collection, ids[:10])
372
        assert status.OK()
373
        connect.flush([collection])
374
        status = connect.compact(collection)
375
        assert status.OK()
376
        status = connect.create_index(collection, field_name, get_simple_index)
377
        assert status.OK()
378
        # status, result = connect.get_index_info(collection)
379
380
    @pytest.mark.timeout(COMPACT_TIMEOUT)
381
    @pytest.mark.skip(reason="delete not support yet")
382
    def test_delete_entities_after_compact(self, connect, collection):
383
        '''
384
        target: test delete entities after compact
385
        method: after compact operation, delete entities
386
        expected: status ok, entities deleted
387
        '''
388
        ids = connect.insert(collection, entities)
389
        assert len(ids) == nb
390
        connect.flush([collection])
391
        status = connect.compact(collection)
392
        assert status.OK()
393
        connect.flush([collection])
394
        status = connect.delete_entity_by_id(collection, ids)
395
        assert status.OK()
396
        connect.flush([collection])
397
        assert connect.count_entities(collection) == 0
398
399
    @pytest.mark.skip(reason="search not support yet")
400
    @pytest.mark.timeout(COMPACT_TIMEOUT)
401
    def test_search_after_compact(self, connect, collection):
402
        '''
403
        target: test search after compact
404
        method: after compact operation, search vector
405
        expected: status ok
406
        '''
407
        ids = connect.insert(collection, entities)
408
        assert len(ids) == nb
409
        connect.flush([collection])
410
        status = connect.compact(collection)
411
        assert status.OK()
412
        query = copy.deepcopy(default_single_query)
413
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
414
                                                                   entities[-1]["values"][-1]]
415
        res = connect.search(collection, query)
416
        logging.getLogger().debug(res)
417
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable field_name does not seem to be defined.
Loading history...
418
        assert res[0]._distances[0] > epsilon
419
        assert res[1]._distances[0] < epsilon
420
        assert res[2]._distances[0] < epsilon
421
422
    # TODO: enable
423
    @pytest.mark.skip(reason="delete not support yet")
424
    def _test_compact_server_crashed_recovery(self, connect, collection):
425
        '''
426
        target: test compact when server crashed unexpectedly and restarted
427
        method: add entities, delete and compact collection; server stopped and restarted during compact
428
        expected: status ok, request recovered
429
        '''
430
        entities = gen_vectors(nb * 100, dim)
431
        status, ids = connect.insert(collection, entities)
432
        assert status.OK()
433
        status = connect.flush([collection])
434
        assert status.OK()
435
        delete_ids = ids[0:1000]
436
        status = connect.delete_entity_by_id(collection, delete_ids)
437
        assert status.OK()
438
        status = connect.flush([collection])
439
        assert status.OK()
440
        # start to compact, kill and restart server
441
        logging.getLogger().info("compact starting...")
442
        status = connect.compact(collection)
443
        # pdb.set_trace()
444
        assert status.OK()
445
        # get collection info after compact
446
        status, info = connect.get_collection_stats(collection)
447
        assert status.OK()
448
        assert info["partitions"][0].count == nb * 100 - 1000
449
450
451
class TestCompactBinary:
452
    """
453
    ******************************************************************
454
      The following cases are used to test `compact` function
455
    ******************************************************************
456
    """
457 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
458
    # TODO
459
    @pytest.mark.level(2)
460
    def test_add_entity_and_compact(self, connect, binary_collection):
461
        '''
462
        target: test add binary vector and compact
463
        method: add vector and compact collection
464
        expected: status ok, vector added
465
        '''
466
        ids = connect.insert(binary_collection, binary_entity)
467
        assert len(ids) == 1
468
        connect.flush([binary_collection])
469
        # get collection info before compact
470
        info = connect.get_collection_stats(binary_collection)
471
        size_before = info["partitions"][0]["segments"][0]["data_size"]
472
        status = connect.compact(binary_collection)
473
        assert status.OK()
474
        # get collection info after compact
475
        info = connect.get_collection_stats(binary_collection)
476
        size_after = info["partitions"][0]["segments"][0]["data_size"]
477
        assert(size_before == size_after)
478
479
    # TODO
480 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
481
    @pytest.mark.timeout(COMPACT_TIMEOUT)
482
    def test_insert_and_compact(self, connect, binary_collection):
483
        '''
484
        target: test add entities with binary vector and compact
485
        method: add entities and compact collection
486
        expected: status ok, entities added
487
        '''
488
        ids = connect.insert(binary_collection, binary_entities)
489
        assert len(ids) == nb
490
        connect.flush([binary_collection])
491
        # get collection info before compact
492
        info = connect.get_collection_stats(binary_collection)
493
        size_before = info["partitions"][0]["segments"][0]["data_size"]
494
        status = connect.compact(binary_collection)
495
        assert status.OK()
496
        # get collection info after compact
497
        info = connect.get_collection_stats(binary_collection)
498
        size_after = info["partitions"][0]["segments"][0]["data_size"]
499
        assert(size_before == size_after)
500
501 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
502
    @pytest.mark.skip(reason="delete not support yet")
503
    def test_insert_delete_part_and_compact(self, connect, binary_collection):
504
        '''
505
        target: test add entities, delete part of them and compact 
506
        method: add entities, delete a few and compact collection
507
        expected: status ok, data size is smaller after compact
508
        '''
509
        ids = connect.insert(binary_collection, binary_entities)
510
        assert len(ids) == nb
511
        connect.flush([binary_collection])
512
        delete_ids = [ids[0], ids[-1]]
513
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
514
        assert status.OK()
515
        connect.flush([binary_collection])
516
        # get collection info before compact
517
        info = connect.get_collection_stats(binary_collection)
518
        logging.getLogger().info(info["partitions"])
519
        size_before = info["partitions"][0]["segments"][0]["data_size"]
520
        logging.getLogger().info(size_before)
521
        status = connect.compact(binary_collection)
522
        assert status.OK()
523
        # get collection info after compact
524
        info = connect.get_collection_stats(binary_collection)
525
        logging.getLogger().info(info["partitions"])
526
        size_after = info["partitions"][0]["segments"][0]["data_size"]
527
        logging.getLogger().info(size_after)
528
        assert(size_before >= size_after)
529
    
530 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
531
    @pytest.mark.skip(reason="delete not support yet")
532
    def test_insert_delete_all_and_compact(self, connect, binary_collection):
533
        '''
534
        target: test add entities, delete them and compact 
535
        method: add entities, delete all and compact collection
536
        expected: status ok, no data size in collection info because collection is empty
537
        '''
538
        ids = connect.insert(binary_collection, binary_entities)
539
        assert len(ids) == nb
540
        connect.flush([binary_collection])
541
        status = connect.delete_entity_by_id(binary_collection, ids)
542
        assert status.OK()
543
        connect.flush([binary_collection])
544
        # get collection info before compact
545
        info = connect.get_collection_stats(binary_collection)
546
        status = connect.compact(binary_collection)
547
        assert status.OK()
548
        # get collection info after compact
549
        info = connect.get_collection_stats(binary_collection)
550
        assert status.OK()
551
        logging.getLogger().info(info["partitions"])
552
        assert not info["partitions"][0]["segments"]
553
554
    # TODO
555 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
556
    @pytest.mark.timeout(COMPACT_TIMEOUT)
557
    def test_add_entity_and_compact_twice(self, connect, binary_collection):
558
        '''
559
        target: test add entity and compact twice
560
        method: add entity and compact collection twice
561
        expected: status ok
562
        '''
563
        ids = connect.insert(binary_collection, binary_entity)
564
        assert len(ids) == 1
565
        connect.flush([binary_collection])
566
        # get collection info before compact
567
        info = connect.get_collection_stats(binary_collection)
568
        size_before = info["partitions"][0]["segments"][0]["data_size"]
569
        status = connect.compact(binary_collection)
570
        assert status.OK()
571
        # get collection info after compact
572
        info = connect.get_collection_stats(binary_collection)
573
        size_after = info["partitions"][0]["segments"][0]["data_size"]
574
        assert(size_before == size_after)
575
        status = connect.compact(binary_collection)
576
        assert status.OK()
577
        # get collection info after compact twice
578
        info = connect.get_collection_stats(binary_collection)
579
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
580
        assert(size_after == size_after_twice)
581
582 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
583
    @pytest.mark.skip(reason="delete not support yet")
584
    def test_insert_delete_part_and_compact_twice(self, connect, binary_collection):
585
        '''
586
        target: test add entities, delete part of them and compact twice
587
        method: add entities, delete part and compact collection twice
588
        expected: status ok, data size smaller after first compact, no change after second
589
        '''
590
        ids = connect.insert(binary_collection, binary_entities)
591
        assert len(ids) == nb
592
        connect.flush([binary_collection])
593
        delete_ids = [ids[0], ids[-1]]
594
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
595
        assert status.OK()
596
        connect.flush([binary_collection])
597
        # get collection info before compact
598
        info = connect.get_collection_stats(binary_collection)
599
        size_before = info["partitions"][0]["segments"][0]["data_size"]
600
        status = connect.compact(binary_collection)
601
        assert status.OK()
602
        # get collection info after compact
603
        info = connect.get_collection_stats(binary_collection)
604
        size_after = info["partitions"][0]["segments"][0]["data_size"]
605
        assert(size_before >= size_after)
606
        status = connect.compact(binary_collection)
607
        assert status.OK()
608
        # get collection info after compact twice
609
        info = connect.get_collection_stats(binary_collection)
610
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
611
        assert(size_after == size_after_twice)
612
613
    @pytest.mark.timeout(COMPACT_TIMEOUT)
614
    @pytest.mark.skip(reason="delete not support yet")
615
    def test_compact_multi_collections(self, connect):
616
        '''
617
        target: test compact works or not with multiple collections
618
        method: create 10 collections, add entities into them and compact in turn
619
        expected: status ok
620
        '''
621
        nq = 100
622
        num_collections = 10
623
        tmp, entities = gen_binary_entities(nq)
624
        collection_list = []
625
        for i in range(num_collections):
626
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
627
            collection_list.append(collection_name)
628
            connect.create_collection(collection_name, default_fields)
629
        for i in range(num_collections):
630
            ids = connect.insert(collection_list[i], entities)
631
            assert len(ids) == nq
632
            status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]])
633
            assert status.OK()
634
            connect.flush([collection_list[i]])
635
            status = connect.compact(collection_list[i])
636
            assert status.OK()
637
            status = connect.drop_collection(collection_list[i])
638
            assert status.OK()
639
640 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
641
    @pytest.mark.timeout(COMPACT_TIMEOUT)
642
    def test_add_entity_after_compact(self, connect, binary_collection):
643
        '''
644
        target: test add entity after compact
645
        method: after compact operation, add entity
646
        expected: status ok, entity added
647
        '''
648
        ids = connect.insert(binary_collection, binary_entities)
649
        connect.flush([binary_collection])
650
        # get collection info before compact
651
        info = connect.get_collection_stats(binary_collection)
652
        size_before = info["partitions"][0]["segments"][0]["data_size"]
653
        status = connect.compact(binary_collection)
654
        assert status.OK()
655
        # get collection info after compact
656
        info = connect.get_collection_stats(binary_collection)
657
        size_after = info["partitions"][0]["segments"][0]["data_size"]
658
        assert(size_before == size_after)
659
        ids = connect.insert(binary_collection, binary_entity)
660
        connect.flush([binary_collection])
661
        res = connect.count_entities(binary_collection)
662
        assert res == nb + 1
663
664
    @pytest.mark.timeout(COMPACT_TIMEOUT)
665
    @pytest.mark.skip(reason="delete not support yet")
666
    def test_delete_entities_after_compact(self, connect, binary_collection):
667
        '''
668
        target: test delete entities after compact
669
        method: after compact operation, delete entities
670
        expected: status ok, entities deleted
671
        '''
672
        ids = connect.insert(binary_collection, binary_entities)
673
        connect.flush([binary_collection])
674
        status = connect.compact(binary_collection)
675
        assert status.OK()
676
        connect.flush([binary_collection])
677
        status = connect.delete_entity_by_id(binary_collection, ids)
678
        assert status.OK()
679
        connect.flush([binary_collection])
680
        res = connect.count_entities(binary_collection)
681
        assert res == 0
682
683
    @pytest.mark.skip(reason="search not support yet")
684
    @pytest.mark.timeout(COMPACT_TIMEOUT)
685
    def test_search_after_compact(self, connect, binary_collection):
686
        '''
687
        target: test search after compact
688
        method: after compact operation, search vector
689
        expected: status ok
690
        '''
691
        ids = connect.insert(binary_collection, binary_entities)
692
        assert len(ids) == nb
693
        connect.flush([binary_collection])
694
        status = connect.compact(binary_collection)
695
        assert status.OK()
696
        query_vecs = [raw_vectors[0]]
697
        distance = jaccard(query_vecs[0], raw_vectors[0])
698
        query = copy.deepcopy(default_single_query)
699
        query["bool"]["must"][0]["vector"][field_name]["query"] = [binary_entities[-1]["values"][0],
700
                                                                   binary_entities[-1]["values"][-1]]
701
        res = connect.search(binary_collection, query)
702
        assert abs(res[0]._distances[0]-distance) <= epsilon
703
704
    # TODO:
705
    @pytest.mark.skip(reason="search not support yet")
706
    @pytest.mark.timeout(COMPACT_TIMEOUT)
707
    def test_search_after_compact_ip(self, connect, collection):
708
        '''
709
        target: test search after compact
710
        method: after compact operation, search vector
711
        expected: status ok
712
        '''
713
        ids = connect.insert(collection, entities)
714
        assert len(ids) == nb
715
        connect.flush([collection])
716
        status = connect.compact(collection)
717
        query = ip_query()
718
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
719
                                                                   entities[-1]["values"][-1]]
720
        res = connect.search(collection, query)
721
        logging.getLogger().info(res)
722
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable field_name does not seem to be defined.
Loading history...
723
        assert res[0]._distances[0] < 1 - epsilon
724
        assert res[1]._distances[0] > 1 - epsilon
725
        assert res[2]._distances[0] > 1 - epsilon
726