1
|
|
|
import time |
2
|
|
|
import pdb |
3
|
|
|
import threading |
4
|
|
|
import logging |
5
|
|
|
from multiprocessing import Pool, Process |
6
|
|
|
import pytest |
7
|
|
|
from utils import * |
8
|
|
|
|
9
|
|
|
dim = 128 |
10
|
|
|
segment_size = 10 |
11
|
|
|
nprobe = 1 |
12
|
|
|
top_k = 1 |
13
|
|
|
epsilon = 0.0001 |
14
|
|
|
tag = "1970-01-01" |
15
|
|
|
nb = 6000 |
16
|
|
|
nlist = 1024 |
17
|
|
|
collection_id = "collection_stats" |
18
|
|
|
field_name = "float_vector" |
19
|
|
|
default_index_name = "stats_index" |
20
|
|
|
entity = gen_entities(1) |
21
|
|
|
raw_vector, binary_entity = gen_binary_entities(1) |
22
|
|
|
entities = gen_entities(nb) |
23
|
|
|
raw_vectors, binary_entities = gen_binary_entities(nb) |
24
|
|
|
default_fields = gen_default_fields() |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
class TestStatsBase: |
28
|
|
|
""" |
29
|
|
|
****************************************************************** |
30
|
|
|
The following cases are used to test `collection_stats` function |
31
|
|
|
****************************************************************** |
32
|
|
|
""" |
33
|
|
|
|
34
|
|
|
@pytest.fixture( |
35
|
|
|
scope="function", |
36
|
|
|
params=gen_invalid_strs() |
37
|
|
|
) |
38
|
|
|
def get_collection_name(self, request): |
39
|
|
|
yield request.param |
40
|
|
|
|
41
|
|
|
@pytest.fixture( |
42
|
|
|
scope="function", |
43
|
|
|
params=gen_simple_index() |
44
|
|
|
) |
45
|
|
|
def get_simple_index(self, request, connect): |
46
|
|
|
if str(connect._cmd("mode")) == "CPU": |
47
|
|
|
if request.param["index_type"] in index_cpu_not_support(): |
48
|
|
|
pytest.skip("CPU not support index_type: ivf_sq8h") |
49
|
|
|
return request.param |
50
|
|
|
|
51
|
|
|
@pytest.fixture( |
52
|
|
|
scope="function", |
53
|
|
|
params=gen_simple_index() |
54
|
|
|
) |
55
|
|
|
def get_jaccard_index(self, request, connect): |
56
|
|
|
logging.getLogger().info(request.param) |
57
|
|
|
if request.param["index_type"] in binary_support(): |
58
|
|
|
return request.param |
59
|
|
|
else: |
60
|
|
|
pytest.skip("Skip index Temporary") |
61
|
|
|
|
62
|
|
|
def test_get_collection_stats_name_not_existed(self, connect, collection): |
63
|
|
|
''' |
64
|
|
|
target: get collection stats where collection name does not exist |
65
|
|
|
method: call collection_stats with a random collection_name, which is not in db |
66
|
|
|
expected: status not ok |
67
|
|
|
''' |
68
|
|
|
collection_name = gen_unique_str(collection_id) |
69
|
|
|
with pytest.raises(Exception) as e: |
70
|
|
|
stats = connect.get_collection_stats(collection_name) |
71
|
|
|
|
72
|
|
|
@pytest.mark.level(2) |
73
|
|
|
def test_get_collection_stats_name_invalid(self, connect, get_collection_name): |
74
|
|
|
''' |
75
|
|
|
target: get collection stats where collection name is invalid |
76
|
|
|
method: call collection_stats with invalid collection_name |
77
|
|
|
expected: status not ok |
78
|
|
|
''' |
79
|
|
|
collection_name = get_collection_name |
80
|
|
|
with pytest.raises(Exception) as e: |
81
|
|
|
stats = connect.get_collection_stats(collection_name) |
82
|
|
|
|
83
|
|
|
def test_get_collection_stats_empty(self, connect, collection): |
84
|
|
|
''' |
85
|
|
|
target: get collection stats where no entity in collection |
86
|
|
|
method: call collection_stats in empty collection |
87
|
|
|
expected: segment = [] |
88
|
|
|
''' |
89
|
|
|
stats = connect.get_collection_stats(collection) |
90
|
|
|
assert stats["row_count"] == 0 |
91
|
|
|
assert len(stats["partitions"]) == 1 |
92
|
|
|
assert stats["partitions"][0]["tag"] == "_default" |
93
|
|
|
assert stats["partitions"][0]["row_count"] == 0 |
94
|
|
|
|
95
|
|
|
def test_get_collection_stats_batch(self, connect, collection): |
96
|
|
|
''' |
97
|
|
|
target: get row count with collection_stats |
98
|
|
|
method: add entities, check count in collection info |
99
|
|
|
expected: count as expected |
100
|
|
|
''' |
101
|
|
|
ids = connect.insert(collection, entities) |
102
|
|
|
connect.flush([collection]) |
103
|
|
|
stats = connect.get_collection_stats(collection) |
104
|
|
|
assert stats["row_count"] == nb |
105
|
|
|
assert len(stats["partitions"]) == 1 |
106
|
|
|
assert stats["partitions"][0]["tag"] == "_default" |
107
|
|
|
assert stats["partitions"][0]["row_count"] == nb |
108
|
|
|
|
109
|
|
|
def test_get_collection_stats_batch_ip(self, connect, ip_collection): |
110
|
|
|
''' |
111
|
|
|
target: get row count with collection_stats |
112
|
|
|
method: add entities, check count in collection info |
113
|
|
|
expected: count as expected |
114
|
|
|
''' |
115
|
|
|
ids = connect.insert(ip_collection, entities) |
116
|
|
|
connect.flush([ip_collection]) |
117
|
|
|
stats = connect.get_collection_stats(ip_collection) |
118
|
|
|
assert stats["row_count"] == nb |
119
|
|
|
assert len(stats["partitions"]) == 1 |
120
|
|
|
assert stats["partitions"][0]["tag"] == "_default" |
121
|
|
|
assert stats["partitions"][0]["row_count"] == nb |
122
|
|
|
|
123
|
|
|
def test_get_collection_stats_single(self, connect, collection): |
124
|
|
|
''' |
125
|
|
|
target: get row count with collection_stats |
126
|
|
|
method: add entity one by one, check count in collection info |
127
|
|
|
expected: count as expected |
128
|
|
|
''' |
129
|
|
|
nb = 10 |
130
|
|
|
for i in range(nb): |
131
|
|
|
ids = connect.insert(collection, entity) |
132
|
|
|
connect.flush([collection]) |
133
|
|
|
stats = connect.get_collection_stats(collection) |
134
|
|
|
assert stats["row_count"] == nb |
135
|
|
|
assert len(stats["partitions"]) == 1 |
136
|
|
|
assert stats["partitions"][0]["tag"] == "_default" |
137
|
|
|
assert stats["partitions"][0]["row_count"] == nb |
138
|
|
|
|
139
|
|
|
def test_get_collection_stats_after_delete(self, connect, collection): |
140
|
|
|
''' |
141
|
|
|
target: get row count with collection_stats |
142
|
|
|
method: add and delete entities, check count in collection info |
143
|
|
|
expected: status ok, count as expected |
144
|
|
|
''' |
145
|
|
|
ids = connect.insert(collection, entities) |
146
|
|
|
status = connect.flush([collection]) |
147
|
|
|
delete_ids = [ids[0], ids[-1]] |
148
|
|
|
connect.delete_entity_by_id(collection, delete_ids) |
149
|
|
|
connect.flush([collection]) |
150
|
|
|
stats = connect.get_collection_stats(collection) |
151
|
|
|
assert stats["row_count"] == nb - 2 |
152
|
|
|
assert stats["partitions"][0]["segments"][0]["data_size"] > 0 |
153
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == "FLAT" |
154
|
|
|
|
155
|
|
View Code Duplication |
def test_get_collection_stats_after_compact_parts(self, connect, collection): |
|
|
|
|
156
|
|
|
''' |
157
|
|
|
target: get row count with collection_stats |
158
|
|
|
method: add and delete entities, and compact collection, check count in collection info |
159
|
|
|
expected: status ok, count as expected |
160
|
|
|
''' |
161
|
|
|
ids = connect.insert(collection, entities) |
162
|
|
|
status = connect.flush([collection]) |
163
|
|
|
delete_ids = ids[:3000] |
164
|
|
|
connect.delete_entity_by_id(collection, delete_ids) |
165
|
|
|
connect.flush([collection]) |
166
|
|
|
stats = connect.get_collection_stats(collection) |
167
|
|
|
logging.getLogger().info(stats) |
168
|
|
|
assert stats["row_count"] == nb - 3000 |
169
|
|
|
compact_before = stats["partitions"][0]["segments"][0]["data_size"] |
170
|
|
|
connect.compact(collection) |
171
|
|
|
stats = connect.get_collection_stats(collection) |
172
|
|
|
logging.getLogger().info(stats) |
173
|
|
|
compact_after = stats["partitions"][0]["segments"][0]["data_size"] |
174
|
|
|
# pdb.set_trace() |
175
|
|
|
assert compact_before > compact_after |
176
|
|
|
|
177
|
|
View Code Duplication |
def test_get_collection_stats_after_compact_delete_one(self, connect, collection): |
|
|
|
|
178
|
|
|
''' |
179
|
|
|
target: get row count with collection_stats |
180
|
|
|
method: add and delete one entity, and compact collection, check count in collection info |
181
|
|
|
expected: status ok, count as expected |
182
|
|
|
''' |
183
|
|
|
ids = connect.insert(collection, entities) |
184
|
|
|
status = connect.flush([collection]) |
185
|
|
|
delete_ids = ids[:1] |
186
|
|
|
connect.delete_entity_by_id(collection, delete_ids) |
187
|
|
|
connect.flush([collection]) |
188
|
|
|
stats = connect.get_collection_stats(collection) |
189
|
|
|
logging.getLogger().info(stats) |
190
|
|
|
compact_before = stats["partitions"][0]["segments"][0]["data_size"] |
191
|
|
|
connect.compact(collection) |
192
|
|
|
stats = connect.get_collection_stats(collection) |
193
|
|
|
logging.getLogger().info(stats) |
194
|
|
|
compact_after = stats["partitions"][0]["segments"][0]["data_size"] |
195
|
|
|
# pdb.set_trace() |
196
|
|
|
assert compact_before == compact_after |
197
|
|
|
|
198
|
|
|
def test_get_collection_stats_partition(self, connect, collection): |
199
|
|
|
''' |
200
|
|
|
target: get partition info in a collection |
201
|
|
|
method: call collection_stats after partition created and check partition_stats |
202
|
|
|
expected: status ok, vectors added to partition |
203
|
|
|
''' |
204
|
|
|
connect.create_partition(collection, tag) |
205
|
|
|
ids = connect.insert(collection, entities, partition_tag=tag) |
206
|
|
|
connect.flush([collection]) |
207
|
|
|
stats = connect.get_collection_stats(collection) |
208
|
|
|
assert len(stats["partitions"]) == 2 |
209
|
|
|
assert stats["partitions"][1]["tag"] == tag |
210
|
|
|
assert stats["partitions"][1]["row_count"] == nb |
211
|
|
|
|
212
|
|
|
def test_get_collection_stats_partitions(self, connect, collection): |
213
|
|
|
''' |
214
|
|
|
target: get partition info in a collection |
215
|
|
|
method: create two partitions, add vectors in one of the partitions, call collection_stats and check |
216
|
|
|
expected: status ok, vectors added to one partition but not the other |
217
|
|
|
''' |
218
|
|
|
new_tag = "new_tag" |
219
|
|
|
connect.create_partition(collection, tag) |
220
|
|
|
connect.create_partition(collection, new_tag) |
221
|
|
|
ids = connect.insert(collection, entities, partition_tag=tag) |
222
|
|
|
connect.flush([collection]) |
223
|
|
|
stats = connect.get_collection_stats(collection) |
224
|
|
|
for partition in stats["partitions"]: |
225
|
|
|
if partition["tag"] == tag: |
226
|
|
|
assert partition["row_count"] == nb |
227
|
|
|
else: |
228
|
|
|
assert partition["row_count"] == 0 |
229
|
|
|
ids = connect.insert(collection, entities, partition_tag=new_tag) |
230
|
|
|
connect.flush([collection]) |
231
|
|
|
stats = connect.get_collection_stats(collection) |
232
|
|
|
for partition in stats["partitions"]: |
233
|
|
|
if partition["tag"] in [tag, new_tag]: |
234
|
|
|
assert partition["row_count"] == nb |
235
|
|
|
|
236
|
|
View Code Duplication |
def test_get_collection_stats_after_index_created(self, connect, collection, get_simple_index): |
|
|
|
|
237
|
|
|
''' |
238
|
|
|
target: test collection info after index created |
239
|
|
|
method: create collection, add vectors, create index and call collection_stats |
240
|
|
|
expected: status ok, index created and shown in segments |
241
|
|
|
''' |
242
|
|
|
ids = connect.insert(collection, entities) |
243
|
|
|
connect.flush([collection]) |
244
|
|
|
connect.create_index(collection, field_name, default_index_name, get_simple_index) |
245
|
|
|
stats = connect.get_collection_stats(collection) |
246
|
|
|
logging.getLogger().info(stats) |
247
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
248
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"] |
249
|
|
|
|
250
|
|
View Code Duplication |
def test_get_collection_stats_after_index_created_ip(self, connect, ip_collection, get_simple_index): |
|
|
|
|
251
|
|
|
''' |
252
|
|
|
target: test collection info after index created |
253
|
|
|
method: create collection, add vectors, create index and call collection_stats |
254
|
|
|
expected: status ok, index created and shown in segments |
255
|
|
|
''' |
256
|
|
|
ids = connect.insert(ip_collection, entities) |
257
|
|
|
connect.flush([ip_collection]) |
258
|
|
|
connect.create_index(ip_collection, field_name, default_index_name, get_simple_index) |
259
|
|
|
stats = connect.get_collection_stats(ip_collection) |
260
|
|
|
logging.getLogger().info(stats) |
261
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
262
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == get_simple_index["index_type"] |
263
|
|
|
|
264
|
|
|
def test_get_collection_stats_after_index_created_jac(self, connect, jac_collection, get_jaccard_index): |
265
|
|
|
''' |
266
|
|
|
target: test collection info after index created |
267
|
|
|
method: create collection, add binary entities, create index and call collection_stats |
268
|
|
|
expected: status ok, index created and shown in segments |
269
|
|
|
''' |
270
|
|
|
ids = connect.insert(jac_collection, binary_entities) |
271
|
|
|
connect.flush([jac_collection]) |
272
|
|
|
connect.create_index(jac_collection, "binary_vector", default_index_name, get_jaccard_index) |
273
|
|
|
stats = connect.get_collection_stats(jac_collection) |
274
|
|
|
logging.getLogger().info(stats) |
275
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
276
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == get_jaccard_index["index_type"] |
277
|
|
|
|
278
|
|
|
def test_get_collection_stats_after_create_different_index(self, connect, collection): |
279
|
|
|
''' |
280
|
|
|
target: test collection info after index created repeatedly |
281
|
|
|
method: create collection, add vectors, create index and call collection_stats multiple times |
282
|
|
|
expected: status ok, index info shown in segments |
283
|
|
|
''' |
284
|
|
|
ids = connect.insert(collection, entities) |
285
|
|
|
connect.flush([collection]) |
286
|
|
|
for index_type in ["IVF_FLAT", "IVF_SQ8"]: |
287
|
|
|
connect.create_index(collection, field_name, default_index_name, {"index_type": index_type, "nlist": 1024}) |
288
|
|
|
stats = connect.get_collection_stats(collection) |
289
|
|
|
logging.getLogger().info(stats) |
290
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == index_type |
291
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
292
|
|
|
|
293
|
|
|
def test_collection_count_multi_collections(self, connect): |
294
|
|
|
''' |
295
|
|
|
target: test collection rows_count is correct or not with multiple collections of L2 |
296
|
|
|
method: create collection and add entities in it, |
297
|
|
|
assert the value returned by count_entities method is equal to length of entities |
298
|
|
|
expected: row count in segments |
299
|
|
|
''' |
300
|
|
|
collection_list = [] |
301
|
|
|
collection_num = 10 |
302
|
|
|
for i in range(collection_num): |
303
|
|
|
collection_name = gen_unique_str(collection_id) |
304
|
|
|
collection_list.append(collection_name) |
305
|
|
|
connect.create_collection(collection_name, default_fields) |
306
|
|
|
res = connect.insert(collection_name, entities) |
307
|
|
|
connect.flush(collection_list) |
308
|
|
|
for i in range(collection_num): |
309
|
|
|
stats = connect.get_collection_stats(collection_list[i]) |
310
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
311
|
|
|
connect.drop_collection(collection_list[i]) |
312
|
|
|
|
313
|
|
|
def test_collection_count_multi_collections_indexed(self, connect): |
314
|
|
|
''' |
315
|
|
|
target: test collection rows_count is correct or not with multiple collections of L2 |
316
|
|
|
method: create collection and add entities in it, |
317
|
|
|
assert the value returned by count_entities method is equal to length of entities |
318
|
|
|
expected: row count in segments |
319
|
|
|
''' |
320
|
|
|
collection_list = [] |
321
|
|
|
collection_num = 10 |
322
|
|
|
for i in range(collection_num): |
323
|
|
|
collection_name = gen_unique_str(collection_id) |
324
|
|
|
collection_list.append(collection_name) |
325
|
|
|
connect.create_collection(collection_name, default_fields) |
326
|
|
|
res = connect.insert(collection_name, entities) |
327
|
|
|
connect.flush(collection_list) |
328
|
|
|
if i % 2: |
329
|
|
|
connect.create_index(collection_name, field_name, default_index_name, {"index_type": "IVF_SQ8", "nlist": 1024}) |
330
|
|
|
else: |
331
|
|
|
connect.create_index(collection_name, field_name, default_index_name, {"index_type": "IVF_FLAT", "nlist": 1024}) |
332
|
|
|
for i in range(collection_num): |
333
|
|
|
stats = connect.get_collection_stats(collection_list[i]) |
334
|
|
|
assert stats["partitions"][0]["segments"][0]["row_count"] == nb |
335
|
|
|
if i % 2: |
336
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == "IVF_SQ8" |
337
|
|
|
else: |
338
|
|
|
assert stats["partitions"][0]["segments"][0]["index_name"] == "IVF_FLAT" |
339
|
|
|
connect.drop_collection(collection_list[i]) |