Passed
Push — master ( 7861da...b12a19 )
by
unknown
05:56 queued 04:03
created

TestCompactBase._test_compact_server_crashed_recovery()   A

Complexity

Conditions 1

Size

Total Lines 26
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 18
nop 3
dl 0
loc 26
rs 9.5
c 0
b 0
f 0
1
import time
2
import pdb
3
import threading
4
import logging
5
from multiprocessing import Pool, Process
6
import pytest
7
from utils import *
8
9
dim = 128
10
index_file_size = 10
11
COMPACT_TIMEOUT = 180
12
nprobe = 1
13
top_k = 1
14
tag = "1970-01-01"
15
nb = 6000
16
segment_row_count = 5000
17
entity = gen_entities(1)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_entities does not seem to be defined.
Loading history...
18
entities = gen_entities(nb)
19
raw_vector, binary_entity = gen_binary_entities(1)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
20
raw_vectors, binary_entities = gen_binary_entities(nb)
21
default_fields = gen_default_fields()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_default_fields does not seem to be defined.
Loading history...
22
field_name = default_float_vec_field_name
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable default_float_vec_field_name does not seem to be defined.
Loading history...
23
default_single_query = {
24
    "bool": {
25
        "must": [
26
            {"vector": {field_name: {"topk": 10, "query": gen_vectors(1, dim),
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_vectors does not seem to be defined.
Loading history...
27
                                     "params": {"nprobe": 10}}}}
28
        ]
29
    }
30
}
31
32
def ip_query():
33
    query = copy.deepcopy(default_single_query)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable copy does not seem to be defined.
Loading history...
34
    query["bool"]["must"][0]["vector"][field_name].update({"metric_type": "IP"})
35
    return query
36
37
38
class TestCompactBase:
39
    """
40
    ******************************************************************
41
      The following cases are used to test `compact` function
42
    ******************************************************************
43
    """
44
    @pytest.mark.timeout(COMPACT_TIMEOUT)
45
    def test_compact_collection_name_None(self, connect, collection):
46
        '''
47
        target: compact collection where collection name is None
48
        method: compact with the collection_name: None
49
        expected: exception raised
50
        '''
51
        collection_name = None
52
        with pytest.raises(Exception) as e:
53
            status = connect.compact(collection_name)
54
55
    @pytest.mark.timeout(COMPACT_TIMEOUT)
56
    def test_compact_collection_name_not_existed(self, connect, collection):
57
        '''
58
        target: compact collection not existed
59
        method: compact with a random collection_name, which is not in db
60
        expected: exception raised
61
        '''
62
        collection_name = gen_unique_str("not_existed")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_unique_str does not seem to be defined.
Loading history...
63
        with pytest.raises(Exception) as e:
64
            status = connect.compact(collection_name)
65
    
66
    @pytest.fixture(
67
        scope="function",
68
        params=gen_invalid_strs()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_invalid_strs does not seem to be defined.
Loading history...
69
    )
70
    def get_collection_name(self, request):
71
        yield request.param
72
73
    @pytest.mark.timeout(COMPACT_TIMEOUT)
74
    def test_compact_collection_name_invalid(self, connect, get_collection_name):
75
        '''
76
        target: compact collection with invalid name
77
        method: compact with invalid collection_name
78
        expected: exception raised
79
        '''
80
        collection_name = get_collection_name
81
        with pytest.raises(Exception) as e:
82
            status = connect.compact(collection_name)
83
            # assert not status.OK()
84
    
85
    @pytest.mark.level(2)
86
    @pytest.mark.timeout(COMPACT_TIMEOUT)
87
    def test_add_entity_and_compact(self, connect, collection):
88
        '''
89
        target: test add entity and compact
90
        method: add entity and compact collection
91
        expected: data_size before and after Compact
92
        '''
93
        # vector = gen_single_vector(dim)
94
        ids = connect.insert(collection, entity)
95
        assert len(ids) == 1
96
        connect.flush([collection])
97
        # get collection info before compact
98
        info = connect.get_collection_stats(collection)
99
        logging.getLogger().info(info)
100
        size_before = info["partitions"][0]["segments"][0]["data_size"]
101
        status = connect.compact(collection)
102
        assert status.OK()
103
        # get collection info after compact
104
        info = connect.get_collection_stats(collection)
105
        size_after = info["partitions"][0]["segments"][0]["data_size"]
106
        assert(size_before == size_after)
107
    
108 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
109
    def test_insert_and_compact(self, connect, collection):
110
        '''
111
        target: test add entities and compact 
112
        method: add entities and compact collection
113
        expected: data_size before and after Compact
114
        '''
115
        # entities = gen_vector(nb, dim)
116
        ids = connect.insert(collection, entities)
117
        connect.flush([collection])
118
        # get collection info before compact
119
        info = connect.get_collection_stats(collection)
120
        # assert status.OK()
121
        size_before = info["partitions"][0]["segments"][0]["data_size"]
122
        status = connect.compact(collection)
123
        assert status.OK()
124
        # get collection info after compact
125
        info = connect.get_collection_stats(collection)
126
        # assert status.OK()
127
        size_after = info["partitions"][0]["segments"][0]["data_size"]
128
        assert(size_before == size_after)
129
130 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
131
    @pytest.mark.skip(reason="delete not support yet")
132
    def test_insert_delete_part_and_compact(self, connect, collection):
133
        '''
134
        target: test add entities, delete part of them and compact
135
        method: add entities, delete a few and compact collection
136
        expected: status ok, data size maybe is smaller after compact
137
        '''
138
        ids = connect.insert(collection, entities)
139
        assert len(ids) == nb
140
        connect.flush([collection])
141
        delete_ids = [ids[0], ids[-1]]
142
        status = connect.delete_entity_by_id(collection, delete_ids)
143
        assert status.OK()
144
        connect.flush([collection])
145
        # get collection info before compact
146
        info = connect.get_collection_stats(collection)
147
        logging.getLogger().info(info["partitions"])
148
        size_before = info["partitions"][0]["segments"][0]["data_size"]
149
        logging.getLogger().info(size_before)
150
        status = connect.compact(collection)
151
        assert status.OK()
152
        # get collection info after compact
153
        info = connect.get_collection_stats(collection)
154
        logging.getLogger().info(info["partitions"])
155
        size_after = info["partitions"][0]["segments"][0]["data_size"]
156
        logging.getLogger().info(size_after)
157
        assert(size_before >= size_after)
158
    
159 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
160
    @pytest.mark.skip(reason="delete not support yet")
161
    def test_insert_delete_all_and_compact(self, connect, collection):
162
        '''
163
        target: test add entities, delete them and compact 
164
        method: add entities, delete all and compact collection
165
        expected: status ok, no data size in collection info because collection is empty
166
        '''
167
        ids = connect.insert(collection, entities)
168
        assert len(ids) == nb
169
        connect.flush([collection])
170
        status = connect.delete_entity_by_id(collection, ids)
171
        assert status.OK()
172
        connect.flush([collection])
173
        # get collection info before compact
174
        info = connect.get_collection_stats(collection)
175
        status = connect.compact(collection)
176
        assert status.OK()
177
        # get collection info after compact
178
        info = connect.get_collection_stats(collection)
179
        logging.getLogger().info(info["partitions"])
180
        assert not info["partitions"][0]["segments"]
181
182
    @pytest.mark.timeout(COMPACT_TIMEOUT)
183
    @pytest.mark.skip(reason="delete not support yet")
184
    def test_insert_partition_delete_half_and_compact(self, connect, collection):
185
        '''
186
        target: test add entities into partition, delete them and compact 
187
        method: add entities, delete half of entities in partition and compact collection
188
        expected: status ok, data_size less than the older version
189
        '''
190
        connect.create_partition(collection, tag)
191
        assert connect.has_partition(collection, tag)
192
        ids = connect.insert(collection, entities, partition_tag=tag)
193
        connect.flush([collection])
194
        info = connect.get_collection_stats(collection)
195
        logging.getLogger().info(info["partitions"])
196
197
        delete_ids = ids[:3000]
198
        status = connect.delete_entity_by_id(collection, delete_ids)
199
        assert status.OK()
200
        connect.flush([collection])
201
        # get collection info before compact
202
        info = connect.get_collection_stats(collection)
203
        logging.getLogger().info(info["partitions"])
204
        status = connect.compact(collection)
205
        assert status.OK()
206
        # get collection info after compact
207
        info_after = connect.get_collection_stats(collection)
208
        logging.getLogger().info(info_after["partitions"])
209
        assert info["partitions"][1]["segments"][0]["data_size"] > info_after["partitions"][1]["segments"][0]["data_size"]
210
211 View Code Duplication
    @pytest.fixture(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
212
        scope="function",
213
        params=gen_simple_index()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_simple_index does not seem to be defined.
Loading history...
214
    )
215
    def get_simple_index(self, request, connect):
216
        if str(connect._cmd("mode")) == "GPU":
217
            if not request.param["index_type"] not in ivf():
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable ivf does not seem to be defined.
Loading history...
218
                pytest.skip("Only support index_type: idmap/ivf")
219
        if str(connect._cmd("mode")) == "CPU":
220
            if request.param["index_type"] in index_cpu_not_support():
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable index_cpu_not_support does not seem to be defined.
Loading history...
221
                pytest.skip("CPU not support index_type: ivf_sq8h")
222
        return request.param
223
224
    @pytest.mark.skip(reason="create_index not support yet")
225
    def test_compact_after_index_created(self, connect, collection, get_simple_index):
226
        '''
227
        target: test compact collection after index created
228
        method: add entities, create index, delete part of entities and compact
229
        expected: status ok, index description no change, data size smaller after compact
230
        '''
231
        count = 10
232
        ids = connect.insert(collection, entities)
233
        connect.flush([collection])
234
        connect.create_index(collection, field_name, get_simple_index)
235
        connect.flush([collection])
236
        # get collection info before compact
237
        info = connect.get_collection_stats(collection)
238
        size_before = info["partitions"][0]["segments"][0]["data_size"]
239
        logging.getLogger().info(info["partitions"])
240
        delete_ids = [ids[0], ids[-1]]
241
        status = connect.delete_entity_by_id(collection, delete_ids)
242
        assert status.OK()
243
        connect.flush([collection])
244
        status = connect.compact(collection)
245
        assert status.OK()
246
        # get collection info after compact
247
        info = connect.get_collection_stats(collection)
248
        logging.getLogger().info(info["partitions"])
249
        size_after = info["partitions"][0]["segments"][0]["data_size"]
250
        assert(size_before >= size_after)
251
    
252 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
253
    def test_add_entity_and_compact_twice(self, connect, collection):
254
        '''
255
        target: test add entity and compact twice
256
        method: add entity and compact collection twice
257
        expected: status ok, data size no change
258
        '''
259
        ids = connect.insert(collection, entity)
260
        connect.flush([collection])
261
        # get collection info before compact
262
        info = connect.get_collection_stats(collection)
263
        size_before = info["partitions"][0]["segments"][0]["data_size"]
264
        status = connect.compact(collection)
265
        assert status.OK()
266
        connect.flush([collection])
267
        # get collection info after compact
268
        info = connect.get_collection_stats(collection)
269
        size_after = info["partitions"][0]["segments"][0]["data_size"]
270
        assert(size_before == size_after)
271
        status = connect.compact(collection)
272
        assert status.OK()
273
        # get collection info after compact twice
274
        info = connect.get_collection_stats(collection)
275
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
276
        assert(size_after == size_after_twice)
277
278 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
279
    @pytest.mark.skip(reason="delete not support yet")
280
    def test_insert_delete_part_and_compact_twice(self, connect, collection):
281
        '''
282
        target: test add entities, delete part of them and compact twice
283
        method: add entities, delete part and compact collection twice
284
        expected: status ok, data size smaller after first compact, no change after second
285
        '''
286
        ids = connect.insert(collection, entities)
287
        connect.flush([collection])
288
        delete_ids = [ids[0], ids[-1]]
289
        status = connect.delete_entity_by_id(collection, delete_ids)
290
        assert status.OK()
291
        connect.flush([collection])
292
        # get collection info before compact
293
        info = connect.get_collection_stats(collection)
294
        size_before = info["partitions"][0]["segments"][0]["data_size"]
295
        status = connect.compact(collection)
296
        assert status.OK()
297
        # get collection info after compact
298
        info = connect.get_collection_stats(collection)
299
        size_after = info["partitions"][0]["segments"][0]["data_size"]
300
        assert(size_before >= size_after)
301
        status = connect.compact(collection)
302
        assert status.OK()
303
        # get collection info after compact twice
304
        info = connect.get_collection_stats(collection)
305
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
306
        assert(size_after == size_after_twice)
307
308
    @pytest.mark.timeout(COMPACT_TIMEOUT)
309
    def test_compact_multi_collections(self, connect):
310
        '''
311
        target: test compact works or not with multiple collections
312
        method: create 50 collections, add entities into them and compact in turn
313
        expected: status ok
314
        '''
315
        nq = 100
316
        num_collections = 50
317
        entities = gen_entities(nq)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_entities does not seem to be defined.
Loading history...
318
        collection_list = []
319
        for i in range(num_collections):
320
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_unique_str does not seem to be defined.
Loading history...
321
            collection_list.append(collection_name)
322
            connect.create_collection(collection_name, default_fields)
323
        time.sleep(6)
324
        for i in range(num_collections):
325
            ids = connect.insert(collection_list[i], entities)
326
            status = connect.compact(collection_list[i])
327
            assert status.OK()
328
329 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
330
    @pytest.mark.timeout(COMPACT_TIMEOUT)
331
    def test_add_entity_after_compact(self, connect, collection):
332
        '''
333
        target: test add entity after compact
334
        method: after compact operation, add entity
335
        expected: status ok, entity added
336
        '''
337
        ids = connect.insert(collection, entities)
338
        assert len(ids) == nb
339
        connect.flush([collection])
340
        # get collection info before compact
341
        info = connect.get_collection_stats(collection)
342
        size_before = info["partitions"][0]["segments"][0]["data_size"]
343
        status = connect.compact(collection)
344
        assert status.OK()
345
        # get collection info after compact
346
        info = connect.get_collection_stats(collection)
347
        size_after = info["partitions"][0]["segments"][0]["data_size"]
348
        assert(size_before == size_after)
349
        ids = connect.insert(collection, entity)
350
        connect.flush([collection])
351
        res = connect.count_entities(collection)
352
        assert res == nb+1
353
354
    @pytest.mark.skip(reason="delete not support yet")
355
    @pytest.mark.timeout(COMPACT_TIMEOUT)
356
    def test_index_creation_after_compact(self, connect, collection, get_simple_index):
357
        '''
358
        target: test index creation after compact
359
        method: after compact operation, create index
360
        expected: status ok, index description no change
361
        '''
362
        ids = connect.insert(collection, entities)
363
        connect.flush([collection])
364
        status = connect.delete_entity_by_id(collection, ids[:10])
365
        assert status.OK()
366
        connect.flush([collection])
367
        status = connect.compact(collection)
368
        assert status.OK()
369
        status = connect.create_index(collection, field_name, get_simple_index)
370
        assert status.OK()
371
        # status, result = connect.get_index_info(collection)
372
373
    @pytest.mark.timeout(COMPACT_TIMEOUT)
374
    @pytest.mark.skip(reason="delete not support yet")
375
    def test_delete_entities_after_compact(self, connect, collection):
376
        '''
377
        target: test delete entities after compact
378
        method: after compact operation, delete entities
379
        expected: status ok, entities deleted
380
        '''
381
        ids = connect.insert(collection, entities)
382
        assert len(ids) == nb
383
        connect.flush([collection])
384
        status = connect.compact(collection)
385
        assert status.OK()
386
        connect.flush([collection])
387
        status = connect.delete_entity_by_id(collection, ids)
388
        assert status.OK()
389
        connect.flush([collection])
390
        assert connect.count_entities(collection) == 0
391
392
    @pytest.mark.skip(reason="search not support yet")
393
    @pytest.mark.timeout(COMPACT_TIMEOUT)
394
    def test_search_after_compact(self, connect, collection):
395
        '''
396
        target: test search after compact
397
        method: after compact operation, search vector
398
        expected: status ok
399
        '''
400
        ids = connect.insert(collection, entities)
401
        assert len(ids) == nb
402
        connect.flush([collection])
403
        status = connect.compact(collection)
404
        assert status.OK()
405
        query = copy.deepcopy(default_single_query)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable copy does not seem to be defined.
Loading history...
406
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
407
                                                                   entities[-1]["values"][-1]]
408
        res = connect.search(collection, query)
409
        logging.getLogger().debug(res)
410
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable field_name does not seem to be defined.
Loading history...
411
        assert res[0]._distances[0] > epsilon
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable epsilon does not seem to be defined.
Loading history...
412
        assert res[1]._distances[0] < epsilon
413
        assert res[2]._distances[0] < epsilon
414
415
    # TODO: enable
416
    @pytest.mark.skip(reason="delete not support yet")
417
    def _test_compact_server_crashed_recovery(self, connect, collection):
418
        '''
419
        target: test compact when server crashed unexpectedly and restarted
420
        method: add entities, delete and compact collection; server stopped and restarted during compact
421
        expected: status ok, request recovered
422
        '''
423
        entities = gen_vector(nb * 100, dim)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_vector does not seem to be defined.
Loading history...
424
        status, ids = connect.insert(collection, entities)
425
        assert status.OK()
426
        status = connect.flush([collection])
427
        assert status.OK()
428
        delete_ids = ids[0:1000]
429
        status = connect.delete_entity_by_id(collection, delete_ids)
430
        assert status.OK()
431
        status = connect.flush([collection])
432
        assert status.OK()
433
        # start to compact, kill and restart server
434
        logging.getLogger().info("compact starting...")
435
        status = connect.compact(collection)
436
        # pdb.set_trace()
437
        assert status.OK()
438
        # get collection info after compact
439
        status, info = connect.get_collection_stats(collection)
440
        assert status.OK()
441
        assert info["partitions"][0].count == nb * 100 - 1000
442
443
444
class TestCompactJAC:
445
    """
446
    ******************************************************************
447
      The following cases are used to test `compact` function
448
    ******************************************************************
449
    """
450 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
451
    def test_add_entity_and_compact(self, connect, jac_collection):
452
        '''
453
        target: test add binary vector and compact
454
        method: add vector and compact collection
455
        expected: status ok, vector added
456
        '''
457
        ids = connect.insert(jac_collection, binary_entity)
458
        assert len(ids) == 1
459
        connect.flush([jac_collection])
460
        # get collection info before compact
461
        info = connect.get_collection_stats(jac_collection)
462
        size_before = info["partitions"][0]["segments"][0]["data_size"]
463
        status = connect.compact(jac_collection)
464
        assert status.OK()
465
        # get collection info after compact
466
        info = connect.get_collection_stats(jac_collection)
467
        size_after = info["partitions"][0]["segments"][0]["data_size"]
468
        assert(size_before == size_after)
469
    
470 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
471
    def test_insert_and_compact(self, connect, jac_collection):
472
        '''
473
        target: test add entities with binary vector and compact
474
        method: add entities and compact collection
475
        expected: status ok, entities added
476
        '''
477
        ids = connect.insert(jac_collection, binary_entities)
478
        assert len(ids) == nb
479
        connect.flush([jac_collection])
480
        # get collection info before compact
481
        info = connect.get_collection_stats(jac_collection)
482
        size_before = info["partitions"][0]["segments"][0]["data_size"]
483
        status = connect.compact(jac_collection)
484
        assert status.OK()
485
        # get collection info after compact
486
        info = connect.get_collection_stats(jac_collection)
487
        size_after = info["partitions"][0]["segments"][0]["data_size"]
488
        assert(size_before == size_after)
489
490 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
491
    @pytest.mark.skip(reason="delete not support yet")
492
    def test_insert_delete_part_and_compact(self, connect, jac_collection):
493
        '''
494
        target: test add entities, delete part of them and compact 
495
        method: add entities, delete a few and compact collection
496
        expected: status ok, data size is smaller after compact
497
        '''
498
        ids = connect.insert(jac_collection, binary_entities)
499
        assert len(ids) == nb
500
        connect.flush([jac_collection])
501
        delete_ids = [ids[0], ids[-1]]
502
        status = connect.delete_entity_by_id(jac_collection, delete_ids)
503
        assert status.OK()
504
        connect.flush([jac_collection])
505
        # get collection info before compact
506
        info = connect.get_collection_stats(jac_collection)
507
        logging.getLogger().info(info["partitions"])
508
        size_before = info["partitions"][0]["segments"][0]["data_size"]
509
        logging.getLogger().info(size_before)
510
        status = connect.compact(jac_collection)
511
        assert status.OK()
512
        # get collection info after compact
513
        info = connect.get_collection_stats(jac_collection)
514
        logging.getLogger().info(info["partitions"])
515
        size_after = info["partitions"][0]["segments"][0]["data_size"]
516
        logging.getLogger().info(size_after)
517
        assert(size_before >= size_after)
518
    
519 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
520
    @pytest.mark.skip(reason="delete not support yet")
521
    def test_insert_delete_all_and_compact(self, connect, jac_collection):
522
        '''
523
        target: test add entities, delete them and compact 
524
        method: add entities, delete all and compact collection
525
        expected: status ok, no data size in collection info because collection is empty
526
        '''
527
        ids = connect.insert(jac_collection, binary_entities)
528
        assert len(ids) == nb
529
        connect.flush([jac_collection])
530
        status = connect.delete_entity_by_id(jac_collection, ids)
531
        assert status.OK()
532
        connect.flush([jac_collection])
533
        # get collection info before compact
534
        info = connect.get_collection_stats(jac_collection)
535
        status = connect.compact(jac_collection)
536
        assert status.OK()
537
        # get collection info after compact
538
        info = connect.get_collection_stats(jac_collection)
539
        assert status.OK()
540
        logging.getLogger().info(info["partitions"])
541
        assert not info["partitions"][0]["segments"]
542
    
543 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
544
    def test_add_entity_and_compact_twice(self, connect, jac_collection):
545
        '''
546
        target: test add entity and compact twice
547
        method: add entity and compact collection twice
548
        expected: status ok
549
        '''
550
        ids = connect.insert(jac_collection, binary_entity)
551
        assert len(ids) == 1
552
        connect.flush([jac_collection])
553
        # get collection info before compact
554
        info = connect.get_collection_stats(jac_collection)
555
        size_before = info["partitions"][0]["segments"][0]["data_size"]
556
        status = connect.compact(jac_collection)
557
        assert status.OK()
558
        # get collection info after compact
559
        info = connect.get_collection_stats(jac_collection)
560
        size_after = info["partitions"][0]["segments"][0]["data_size"]
561
        assert(size_before == size_after)
562
        status = connect.compact(jac_collection)
563
        assert status.OK()
564
        # get collection info after compact twice
565
        info = connect.get_collection_stats(jac_collection)
566
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
567
        assert(size_after == size_after_twice)
568
569 View Code Duplication
    @pytest.mark.timeout(COMPACT_TIMEOUT)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
570
    @pytest.mark.skip(reason="delete not support yet")
571
    def test_insert_delete_part_and_compact_twice(self, connect, jac_collection):
572
        '''
573
        target: test add entities, delete part of them and compact twice
574
        method: add entities, delete part and compact collection twice
575
        expected: status ok, data size smaller after first compact, no change after second
576
        '''
577
        ids = connect.insert(jac_collection, binary_entities)
578
        assert len(ids) == nb
579
        connect.flush([jac_collection])
580
        delete_ids = [ids[0], ids[-1]]
581
        status = connect.delete_entity_by_id(jac_collection, delete_ids)
582
        assert status.OK()
583
        connect.flush([jac_collection])
584
        # get collection info before compact
585
        info = connect.get_collection_stats(jac_collection)
586
        size_before = info["partitions"][0]["segments"][0]["data_size"]
587
        status = connect.compact(jac_collection)
588
        assert status.OK()
589
        # get collection info after compact
590
        info = connect.get_collection_stats(jac_collection)
591
        size_after = info["partitions"][0]["segments"][0]["data_size"]
592
        assert(size_before >= size_after)
593
        status = connect.compact(jac_collection)
594
        assert status.OK()
595
        # get collection info after compact twice
596
        info = connect.get_collection_stats(jac_collection)
597
        size_after_twice = info["partitions"][0]["segments"][0]["data_size"]
598
        assert(size_after == size_after_twice)
599
600
    @pytest.mark.timeout(COMPACT_TIMEOUT)
601
    @pytest.mark.skip(reason="delete not support yet")
602
    def test_compact_multi_collections(self, connect):
603
        '''
604
        target: test compact works or not with multiple collections
605
        method: create 10 collections, add entities into them and compact in turn
606
        expected: status ok
607
        '''
608
        nq = 100
609
        num_collections = 10
610
        tmp, entities = gen_binary_entities(nq)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
611
        collection_list = []
612
        for i in range(num_collections):
613
            collection_name = gen_unique_str("test_compact_multi_collection_%d" % i)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_unique_str does not seem to be defined.
Loading history...
614
            collection_list.append(collection_name)
615
            fields = update_fields_metric_type(default_fields, "JACCARD")
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable update_fields_metric_type does not seem to be defined.
Loading history...
616
            connect.create_collection(collection_name, fields)
617
        for i in range(num_collections):
618
            ids = connect.insert(collection_list[i], entities)
619
            assert len(ids) == nq
620
            status = connect.delete_entity_by_id(collection_list[i], [ids[0], ids[-1]])
621
            assert status.OK()
622
            connect.flush([collection_list[i]])
623
            status = connect.compact(collection_list[i])
624
            assert status.OK()
625
            status = connect.drop_collection(collection_list[i])
626
            assert status.OK()
627
628 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
629
    @pytest.mark.timeout(COMPACT_TIMEOUT)
630
    def test_add_entity_after_compact(self, connect, jac_collection):
631
        '''
632
        target: test add entity after compact
633
        method: after compact operation, add entity
634
        expected: status ok, entity added
635
        '''
636
        ids = connect.insert(jac_collection, binary_entities)
637
        connect.flush([jac_collection])
638
        # get collection info before compact
639
        info = connect.get_collection_stats(jac_collection)
640
        size_before = info["partitions"][0]["segments"][0]["data_size"]
641
        status = connect.compact(jac_collection)
642
        assert status.OK()
643
        # get collection info after compact
644
        info = connect.get_collection_stats(jac_collection)
645
        size_after = info["partitions"][0]["segments"][0]["data_size"]
646
        assert(size_before == size_after)
647
        ids = connect.insert(jac_collection, binary_entity)
648
        connect.flush([jac_collection])
649
        res = connect.count_entities(jac_collection)
650
        assert res == nb + 1
651
652
    @pytest.mark.timeout(COMPACT_TIMEOUT)
653
    @pytest.mark.skip(reason="delete not support yet")
654
    def test_delete_entities_after_compact(self, connect, jac_collection):
655
        '''
656
        target: test delete entities after compact
657
        method: after compact operation, delete entities
658
        expected: status ok, entities deleted
659
        '''
660
        ids = connect.insert(jac_collection, binary_entities)
661
        connect.flush([jac_collection])
662
        status = connect.compact(jac_collection)
663
        assert status.OK()
664
        connect.flush([jac_collection])
665
        status = connect.delete_entity_by_id(jac_collection, ids)
666
        assert status.OK()
667
        connect.flush([jac_collection])
668
        res = connect.count_entities(jac_collection)
669
        assert res == 0
670
671
    @pytest.mark.skip(reason="search not support yet")
672
    @pytest.mark.timeout(COMPACT_TIMEOUT)
673
    def test_search_after_compact(self, connect, jac_collection):
674
        '''
675
        target: test search after compact
676
        method: after compact operation, search vector
677
        expected: status ok
678
        '''
679
        ids = connect.insert(jac_collection, binary_entities)
680
        assert len(ids) == nb
681
        connect.flush([jac_collection])
682
        status = connect.compact(jac_collection)
683
        assert status.OK()
684
        query_vecs = [raw_vectors[0]]
685
        distance = jaccard(query_vecs[0], raw_vectors[0])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable jaccard does not seem to be defined.
Loading history...
686
        query = copy.deepcopy(default_single_query)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable copy does not seem to be defined.
Loading history...
687
        query["bool"]["must"][0]["vector"][field_name]["query"] = [binary_entities[-1]["values"][0],
688
                                                                   binary_entities[-1]["values"][-1]]
689
        res = connect.search(jac_collection, query)
690
        assert abs(res[0]._distances[0]-distance) <= epsilon
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable epsilon does not seem to be defined.
Loading history...
691
692
    # TODO:
693
    @pytest.mark.skip(reason="search not support yet")
694
    @pytest.mark.timeout(COMPACT_TIMEOUT)
695
    def test_search_after_compact_ip(self, connect, collection):
696
        '''
697
        target: test search after compact
698
        method: after compact operation, search vector
699
        expected: status ok
700
        '''
701
        ids = connect.insert(collection, entities)
702
        assert len(ids) == nb
703
        connect.flush([collection])
704
        status = connect.compact(collection)
705
        query = ip_query()
706
        query["bool"]["must"][0]["vector"][field_name]["query"] = [entity[-1]["values"][0], entities[-1]["values"][0],
707
                                                                   entities[-1]["values"][-1]]
708
        res = connect.search(collection, query)
709
        logging.getLogger().info(res)
710
        assert len(res) == len(query["bool"]["must"][0]["vector"][field_name]["query"])
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable field_name does not seem to be defined.
Loading history...
711
        assert res[0]._distances[0] < 1 - epsilon
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable epsilon does not seem to be defined.
Loading history...
712
        assert res[1]._distances[0] > 1 - epsilon
713
        assert res[2]._distances[0] > 1 - epsilon
714