Passed
Push — master ( fd4969...54df52 )
by
unknown
01:50
created

TestStatsBase.test_get_collection_stats_partitions()   A

Complexity

Conditions 5

Size

Total Lines 23
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 17
nop 3
dl 0
loc 23
rs 9.0833
c 0
b 0
f 0
1
import time
2
import pdb
3
import threading
4
import logging
5
from multiprocessing import Pool, Process
6
import pytest
7
from utils import *
8
9
dim = 128
10
segment_size = 10
11
nprobe = 1
12
top_k = 1
13
epsilon = 0.0001
14
tag = "1970-01-01"
15
nb = 6000
16
nlist = 1024
17
collection_id = "collection_stats"
18
field_name = "float_vector"
19
default_index_name = "stats_index"
20
entity = gen_entities(1)
21
raw_vector, binary_entity = gen_binary_entities(1)
22
entities = gen_entities(nb)
23
raw_vectors, binary_entities = gen_binary_entities(nb)
24
default_fields = gen_default_fields()
25
26
27
class TestStatsBase:
28
    """
29
    ******************************************************************
30
      The following cases are used to test `collection_stats` function
31
    ******************************************************************
32
    """
33
    
34
    @pytest.fixture(
35
        scope="function",
36
        params=gen_invalid_strs()
37
    )
38
    def get_collection_name(self, request):
39
        yield request.param
40
41
    @pytest.fixture(
42
        scope="function",
43
        params=gen_simple_index()
44
    )
45
    def get_simple_index(self, request, connect):
46
        if str(connect._cmd("mode")) == "CPU":
47
            if request.param["index_type"] in index_cpu_not_support():
48
                pytest.skip("CPU not support index_type: ivf_sq8h")
49
        return request.param
50
51
    @pytest.fixture(
52
        scope="function",
53
        params=gen_simple_index()
54
    )
55
    def get_jaccard_index(self, request, connect):
56
        logging.getLogger().info(request.param)
57
        if request.param["index_type"] in binary_support():
58
            return request.param
59
        else:
60
            pytest.skip("Skip index Temporary")
61
62
    def test_get_collection_stats_name_not_existed(self, connect, collection):
63
        '''
64
        target: get collection stats where collection name does not exist
65
        method: call collection_stats with a random collection_name, which is not in db
66
        expected: status not ok
67
        '''
68
        collection_name = gen_unique_str(collection_id)
69
        with pytest.raises(Exception) as e:
70
            stats = connect.get_collection_stats(collection_name)
71
72
    @pytest.mark.level(2)
73
    def test_get_collection_stats_name_invalid(self, connect, get_collection_name):
74
        '''
75
        target: get collection stats where collection name is invalid
76
        method: call collection_stats with invalid collection_name
77
        expected: status not ok
78
        '''
79
        collection_name = get_collection_name
80
        with pytest.raises(Exception) as e:
81
            stats = connect.get_collection_stats(collection_name)
82
83
    def test_get_collection_stats_empty(self, connect, collection):
84
        '''
85
        target: get collection stats where no entity in collection
86
        method: call collection_stats in empty collection
87
        expected: segment = []
88
        '''
89
        stats = connect.get_collection_stats(collection)
90
        assert stats["row_count"] == 0
91
        assert len(stats["partitions"]) == 1
92
        assert stats["partitions"][0]["tag"] == "_default"
93
        assert stats["partitions"][0]["row_count"] == 0
94
95
    def test_get_collection_stats_batch(self, connect, collection):
96
        '''
97
        target: get row count with collection_stats
98
        method: add entities, check count in collection info
99
        expected: count as expected
100
        '''
101
        ids = connect.insert(collection, entities)
102
        connect.flush([collection])
103
        stats = connect.get_collection_stats(collection)
104
        assert stats["row_count"] == nb
105
        assert len(stats["partitions"]) == 1
106
        assert stats["partitions"][0]["tag"] == "_default"
107
        assert stats["partitions"][0]["row_count"] == nb
108
109
    def test_get_collection_stats_batch_ip(self, connect, ip_collection):
110
        '''
111
        target: get row count with collection_stats
112
        method: add entities, check count in collection info
113
        expected: count as expected
114
        '''
115
        ids = connect.insert(ip_collection, entities)
116
        connect.flush([ip_collection])
117
        stats = connect.get_collection_stats(ip_collection)
118
        assert stats["row_count"] == nb
119
        assert len(stats["partitions"]) == 1
120
        assert stats["partitions"][0]["tag"] == "_default"
121
        assert stats["partitions"][0]["row_count"] == nb
122
123
    def test_get_collection_stats_single(self, connect, collection):
124
        '''
125
        target: get row count with collection_stats
126
        method: add entity one by one, check count in collection info
127
        expected: count as expected
128
        '''
129
        nb = 10
130
        for i in range(nb):
131
            ids = connect.insert(collection, entity)
132
            connect.flush([collection])
133
        stats = connect.get_collection_stats(collection)
134
        assert stats["row_count"] == nb
135
        assert len(stats["partitions"]) == 1
136
        assert stats["partitions"][0]["tag"] == "_default"
137
        assert stats["partitions"][0]["row_count"] == nb
138
139
    def test_get_collection_stats_after_delete(self, connect, collection):
140
        '''
141
        target: get row count with collection_stats
142
        method: add and delete entities, check count in collection info
143
        expected: status ok, count as expected
144
        '''
145
        ids = connect.insert(collection, entities)
146
        status = connect.flush([collection])
147
        delete_ids = [ids[0], ids[-1]]
148
        connect.delete_entity_by_id(collection, delete_ids)
149
        connect.flush([collection])
150
        stats = connect.get_collection_stats(collection)
151
        assert stats["row_count"] == nb - 2
152
        assert stats["partitions"][0]["segments"][0]["data_size"] > 0
153
        assert stats["partitions"][0]["segments"][0]["index_name"] == "FLAT"
154
155 View Code Duplication
    def test_get_collection_stats_after_compact_parts(self, connect, collection):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
156
        '''
157
        target: get row count with collection_stats
158
        method: add and delete entities, and compact collection, check count in collection info
159
        expected: status ok, count as expected
160
        '''
161
        ids = connect.insert(collection, entities)
162
        status = connect.flush([collection])
163
        delete_ids = ids[:3000]
164
        connect.delete_entity_by_id(collection, delete_ids)
165
        connect.flush([collection])
166
        stats = connect.get_collection_stats(collection)
167
        logging.getLogger().info(stats)
168
        assert stats["row_count"] == nb - 3000
169
        compact_before = stats["partitions"][0]["segments"][0]["data_size"]
170
        connect.compact(collection)
171
        stats = connect.get_collection_stats(collection)
172
        logging.getLogger().info(stats)
173
        compact_after = stats["partitions"][0]["segments"][0]["data_size"]
174
        # pdb.set_trace()
175
        assert compact_before > compact_after
176
177 View Code Duplication
    def test_get_collection_stats_after_compact_delete_one(self, connect, collection):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
178
        '''
179
        target: get row count with collection_stats
180
        method: add and delete one entity, and compact collection, check count in collection info
181
        expected: status ok, count as expected
182
        '''
183
        ids = connect.insert(collection, entities)
184
        status = connect.flush([collection])
185
        delete_ids = ids[:1]
186
        connect.delete_entity_by_id(collection, delete_ids)
187
        connect.flush([collection])
188
        stats = connect.get_collection_stats(collection)
189
        logging.getLogger().info(stats)
190
        compact_before = stats["partitions"][0]["segments"][0]["data_size"]
191
        connect.compact(collection)
192
        stats = connect.get_collection_stats(collection)
193
        logging.getLogger().info(stats)
194
        compact_after = stats["partitions"][0]["segments"][0]["data_size"]
195
        # pdb.set_trace()
196
        assert compact_before == compact_after
197
198
    def test_get_collection_stats_partition(self, connect, collection):
199
        '''
200
        target: get partition info in a collection
201
        method: call collection_stats after partition created and check partition_stats
202
        expected: status ok, vectors added to partition
203
        '''
204
        connect.create_partition(collection, tag)
205
        ids = connect.insert(collection, entities, partition_tag=tag)
206
        connect.flush([collection])
207
        stats = connect.get_collection_stats(collection)
208
        assert len(stats["partitions"]) == 2
209
        assert stats["partitions"][1]["tag"] == tag
210
        assert stats["partitions"][1]["row_count"] == nb
211
212
    def test_get_collection_stats_partitions(self, connect, collection):
213
        '''
214
        target: get partition info in a collection
215
        method: create two partitions, add vectors in one of the partitions, call collection_stats and check 
216
        expected: status ok, vectors added to one partition but not the other
217
        '''
218
        new_tag = "new_tag"
219
        connect.create_partition(collection, tag)
220
        connect.create_partition(collection, new_tag)
221
        ids = connect.insert(collection, entities, partition_tag=tag)
222
        connect.flush([collection])
223
        stats = connect.get_collection_stats(collection)
224
        for partition in stats["partitions"]:
225
            if partition["tag"] == tag:
226
                assert partition["row_count"] == nb
227
            else:
228
                assert partition["row_count"] == 0
229
        ids = connect.insert(collection, entities, partition_tag=new_tag)
230
        connect.flush([collection])
231
        stats = connect.get_collection_stats(collection)
232
        for partition in stats["partitions"]:
233
            if partition["tag"] in [tag, new_tag]:
234
                assert partition["row_count"] == nb
235
    
236 View Code Duplication
    def test_get_collection_stats_after_index_created(self, connect, collection, get_simple_index):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
237
        '''
238
        target: test collection info after index created
239
        method: create collection, add vectors, create index and call collection_stats 
240
        expected: status ok, index created and shown in segments
241
        '''
242
        ids = connect.insert(collection, entities)
243
        connect.flush([collection])
244
        connect.create_index(collection, field_name, default_index_name, get_simple_index)
245
        stats = connect.get_collection_stats(collection)
246
        logging.getLogger().info(stats)
247
        assert stats["partitions"][0]["segments"][0]["row_count"] == nb
248
        assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"]
249
250 View Code Duplication
    def test_get_collection_stats_after_index_created_ip(self, connect, ip_collection, get_simple_index):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
251
        '''
252
        target: test collection info after index created
253
        method: create collection, add vectors, create index and call collection_stats 
254
        expected: status ok, index created and shown in segments
255
        '''
256
        ids = connect.insert(ip_collection, entities)
257
        connect.flush([ip_collection])
258
        connect.create_index(ip_collection, field_name, default_index_name, get_simple_index)
259
        stats = connect.get_collection_stats(ip_collection)
260
        logging.getLogger().info(stats)
261
        assert stats["partitions"][0]["segments"][0]["row_count"] == nb
262
        assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"]
263
264
    def test_get_collection_stats_after_index_created_jac(self, connect, jac_collection, get_jaccard_index):
265
        '''
266
        target: test collection info after index created
267
        method: create collection, add binary entities, create index and call collection_stats 
268
        expected: status ok, index created and shown in segments
269
        '''
270
        ids = connect.insert(jac_collection, binary_entities)
271
        connect.flush([jac_collection])
272
        connect.create_index(jac_collection, "binary_vector", default_index_name, get_jaccard_index)
273
        stats = connect.get_collection_stats(jac_collection)
274
        logging.getLogger().info(stats)
275
        assert stats["partitions"][0]["segments"][0]["row_count"] == nb
276
        assert stats["partitions"][0]["segments"][0]["index_name"] == get_jaccard_index["index_type"]
277
278
    def test_get_collection_stats_after_create_different_index(self, connect, collection):
279
        '''
280
        target: test collection info after index created repeatedly
281
        method: create collection, add vectors, create index and call collection_stats multiple times 
282
        expected: status ok, index info shown in segments
283
        '''
284
        ids = connect.insert(collection, entities)
285
        connect.flush([collection])
286
        for index_type in ["IVF_FLAT", "IVF_SQ8"]:
287
            connect.create_index(collection, field_name, default_index_name, {"index_type": index_type, "nlist": 1024})
288
            stats = connect.get_collection_stats(collection)
289
            logging.getLogger().info(stats)
290
            assert stats["partitions"][0]["segments"][0]["index_name"] == index_type
291
            assert stats["partitions"][0]["segments"][0]["row_count"] == nb
292
293
    def test_collection_count_multi_collections(self, connect):
294
        '''
295
        target: test collection rows_count is correct or not with multiple collections of L2
296
        method: create collection and add entities in it,
297
            assert the value returned by count_entities method is equal to length of entities
298
        expected: row count in segments
299
        '''
300
        collection_list = []
301
        collection_num = 10
302
        for i in range(collection_num):
303
            collection_name = gen_unique_str(collection_id)
304
            collection_list.append(collection_name)
305
            connect.create_collection(collection_name, default_fields)
306
            res = connect.insert(collection_name, entities)
307
        connect.flush(collection_list)
308
        for i in range(collection_num):
309
            stats = connect.get_collection_stats(collection_list[i])
310
            assert stats["partitions"][0]["segments"][0]["row_count"] == nb
311
            connect.drop_collection(collection_list[i])
312
313
    def test_collection_count_multi_collections_indexed(self, connect):
314
        '''
315
        target: test collection rows_count is correct or not with multiple collections of L2
316
        method: create collection and add entities in it,
317
            assert the value returned by count_entities method is equal to length of entities
318
        expected: row count in segments
319
        '''
320
        collection_list = []
321
        collection_num = 10
322
        for i in range(collection_num):
323
            collection_name = gen_unique_str(collection_id)
324
            collection_list.append(collection_name)
325
            connect.create_collection(collection_name, default_fields)
326
            res = connect.insert(collection_name, entities)
327
            connect.flush(collection_list)
328
            if i % 2:
329
                connect.create_index(collection_name, field_name, default_index_name, {"index_type": "IVF_SQ8", "nlist": 1024})
330
            else:
331
                connect.create_index(collection_name, field_name, default_index_name, {"index_type": "IVF_FLAT", "nlist": 1024})
332
        for i in range(collection_num):
333
            stats = connect.get_collection_stats(collection_list[i])
334
            assert stats["partitions"][0]["segments"][0]["row_count"] == nb
335
            if i % 2:
336
                assert stats["partitions"][0]["segments"][0]["index_name"] == "IVF_SQ8"
337
            else:
338
                assert stats["partitions"][0]["segments"][0]["index_name"] == "IVF_FLAT"
339
            connect.drop_collection(collection_list[i])