1
|
|
|
import time |
2
|
|
|
import random |
3
|
|
|
import pdb |
4
|
|
|
import threading |
5
|
|
|
import logging |
6
|
|
|
from multiprocessing import Pool, Process |
7
|
|
|
import pytest |
8
|
|
|
from utils import * |
9
|
|
|
|
10
|
|
|
dim = 128 |
11
|
|
|
segment_row_count = 100000 |
12
|
|
|
nb = 6000 |
13
|
|
|
tag = "1970-01-01" |
14
|
|
|
field_name = default_float_vec_field_name |
|
|
|
|
15
|
|
|
collection_id = "list_id_in_segment" |
16
|
|
|
entity = gen_entities(1) |
|
|
|
|
17
|
|
|
raw_vector, binary_entity = gen_binary_entities(1) |
|
|
|
|
18
|
|
|
entities = gen_entities(nb) |
19
|
|
|
raw_vectors, binary_entities = gen_binary_entities(nb) |
20
|
|
|
default_fields = gen_default_fields() |
|
|
|
|
21
|
|
|
|
22
|
|
|
|
23
|
|
|
def get_segment_id(connect, collection, nb=1, vec_type='float', index_params=None): |
24
|
|
|
if vec_type != "float": |
25
|
|
|
vectors, entities = gen_binary_entities(nb) |
|
|
|
|
26
|
|
|
else: |
27
|
|
|
entities = gen_entities(nb) |
|
|
|
|
28
|
|
|
ids = connect.insert(collection, entities) |
29
|
|
|
connect.flush([collection]) |
30
|
|
|
if index_params: |
31
|
|
|
connect.create_index(collection, field_name, index_params) |
32
|
|
|
stats = connect.get_collection_stats(collection) |
33
|
|
|
return ids, stats["partitions"][0]["segments"][0]["id"] |
34
|
|
|
|
35
|
|
|
|
36
|
|
|
class TestListIdInSegmentBase: |
37
|
|
|
|
38
|
|
|
""" |
39
|
|
|
****************************************************************** |
40
|
|
|
The following cases are used to test `list_id_in_segment` function |
41
|
|
|
****************************************************************** |
42
|
|
|
""" |
43
|
|
|
def test_list_id_in_segment_collection_name_None(self, connect, collection): |
44
|
|
|
''' |
45
|
|
|
target: get vector ids where collection name is None |
46
|
|
|
method: call list_id_in_segment with the collection_name: None |
47
|
|
|
expected: exception raised |
48
|
|
|
''' |
49
|
|
|
collection_name = None |
50
|
|
|
ids, segment_id = get_segment_id(connect, collection) |
51
|
|
|
with pytest.raises(Exception) as e: |
52
|
|
|
connect.list_id_in_segment(collection_name, segment_id) |
53
|
|
|
|
54
|
|
|
def test_list_id_in_segment_collection_name_not_existed(self, connect, collection): |
55
|
|
|
''' |
56
|
|
|
target: get vector ids where collection name does not exist |
57
|
|
|
method: call list_id_in_segment with a random collection_name, which is not in db |
58
|
|
|
expected: status not ok |
59
|
|
|
''' |
60
|
|
|
collection_name = gen_unique_str(collection_id) |
|
|
|
|
61
|
|
|
ids, segment_id = get_segment_id(connect, collection) |
62
|
|
|
with pytest.raises(Exception) as e: |
63
|
|
|
vector_ids = connect.list_id_in_segment(collection_name, segment_id) |
64
|
|
|
|
65
|
|
|
@pytest.fixture( |
66
|
|
|
scope="function", |
67
|
|
|
params=gen_invalid_strs() |
|
|
|
|
68
|
|
|
) |
69
|
|
|
def get_collection_name(self, request): |
70
|
|
|
yield request.param |
71
|
|
|
|
72
|
|
|
def test_list_id_in_segment_collection_name_invalid(self, connect, collection, get_collection_name): |
73
|
|
|
''' |
74
|
|
|
target: get vector ids where collection name is invalid |
75
|
|
|
method: call list_id_in_segment with invalid collection_name |
76
|
|
|
expected: status not ok |
77
|
|
|
''' |
78
|
|
|
collection_name = get_collection_name |
79
|
|
|
ids, segment_id = get_segment_id(connect, collection) |
80
|
|
|
with pytest.raises(Exception) as e: |
81
|
|
|
connect.list_id_in_segment(collection_name, segment_id) |
82
|
|
|
|
83
|
|
|
def test_list_id_in_segment_name_None(self, connect, collection): |
84
|
|
|
''' |
85
|
|
|
target: get vector ids where segment name is None |
86
|
|
|
method: call list_id_in_segment with the name: None |
87
|
|
|
expected: exception raised |
88
|
|
|
''' |
89
|
|
|
ids, segment_id = get_segment_id(connect, collection) |
90
|
|
|
segment = None |
91
|
|
|
with pytest.raises(Exception) as e: |
92
|
|
|
vector_ids = connect.list_id_in_segment(collection, segment) |
93
|
|
|
|
94
|
|
|
def test_list_id_in_segment_name_not_existed(self, connect, collection): |
95
|
|
|
''' |
96
|
|
|
target: get vector ids where segment name does not exist |
97
|
|
|
method: call list_id_in_segment with a random segment name |
98
|
|
|
expected: status not ok |
99
|
|
|
''' |
100
|
|
|
ids, seg_id = get_segment_id(connect, collection) |
101
|
|
|
# segment = gen_unique_str(collection_id) |
102
|
|
|
with pytest.raises(Exception) as e: |
103
|
|
|
vector_ids = connect.list_id_in_segment(collection, seg_id + 10000) |
104
|
|
|
|
105
|
|
|
@pytest.mark.level(2) |
106
|
|
|
def test_list_id_in_segment_without_index_A(self, connect, collection): |
107
|
|
|
''' |
108
|
|
|
target: get vector ids when there is no index |
109
|
|
|
method: call list_id_in_segment and check if the segment contains vectors |
110
|
|
|
expected: status ok |
111
|
|
|
''' |
112
|
|
|
nb = 1 |
113
|
|
|
ids, seg_id = get_segment_id(connect, collection, nb=nb) |
114
|
|
|
vector_ids = connect.list_id_in_segment(collection, seg_id) |
115
|
|
|
# vector_ids should match ids |
116
|
|
|
assert len(vector_ids) == nb |
117
|
|
|
assert vector_ids[0] == ids[0] |
118
|
|
|
|
119
|
|
|
@pytest.mark.level(2) |
120
|
|
|
def test_list_id_in_segment_without_index_B(self, connect, collection): |
121
|
|
|
''' |
122
|
|
|
target: get vector ids when there is no index but with partition |
123
|
|
|
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
124
|
|
|
expected: status ok |
125
|
|
|
''' |
126
|
|
|
nb = 10 |
127
|
|
|
entities = gen_entities(nb) |
|
|
|
|
128
|
|
|
connect.create_partition(collection, tag) |
129
|
|
|
ids = connect.insert(collection, entities, partition_tag=tag) |
130
|
|
|
connect.flush([collection]) |
131
|
|
|
stats = connect.get_collection_stats(collection) |
132
|
|
|
assert stats["partitions"][1]["tag"] == tag |
133
|
|
|
vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["id"]) |
134
|
|
|
# vector_ids should match ids |
135
|
|
|
assert len(vector_ids) == nb |
136
|
|
|
for i in range(nb): |
137
|
|
|
assert vector_ids[i] == ids[i] |
138
|
|
|
|
139
|
|
|
@pytest.fixture( |
140
|
|
|
scope="function", |
141
|
|
|
params=gen_simple_index() |
|
|
|
|
142
|
|
|
) |
143
|
|
|
def get_simple_index(self, request, connect): |
144
|
|
|
if str(connect._cmd("mode")) == "CPU": |
145
|
|
|
if request.param["index_type"] in index_cpu_not_support(): |
|
|
|
|
146
|
|
|
pytest.skip("CPU not support index_type: ivf_sq8h") |
147
|
|
|
return request.param |
148
|
|
|
|
149
|
|
|
@pytest.mark.level(2) |
150
|
|
|
def test_list_id_in_segment_with_index_A(self, connect, collection, get_simple_index): |
151
|
|
|
''' |
152
|
|
|
target: get vector ids when there is index |
153
|
|
|
method: call list_id_in_segment and check if the segment contains vectors |
154
|
|
|
expected: status ok |
155
|
|
|
''' |
156
|
|
|
ids, seg_id = get_segment_id(connect, collection, nb=nb, index_params=get_simple_index) |
157
|
|
|
try: |
158
|
|
|
connect.list_id_in_segment(collection, seg_id) |
159
|
|
|
except Exception as e: |
160
|
|
|
assert False, str(e) |
161
|
|
|
# TODO: |
162
|
|
|
|
163
|
|
|
@pytest.mark.level(2) |
164
|
|
|
def test_list_id_in_segment_with_index_B(self, connect, collection, get_simple_index): |
165
|
|
|
''' |
166
|
|
|
target: get vector ids when there is index and with partition |
167
|
|
|
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
168
|
|
|
expected: status ok |
169
|
|
|
''' |
170
|
|
|
connect.create_partition(collection, tag) |
171
|
|
|
ids = connect.insert(collection, entities, partition_tag=tag) |
172
|
|
|
connect.flush([collection]) |
173
|
|
|
stats = connect.get_collection_stats(collection) |
174
|
|
|
assert stats["partitions"][1]["tag"] == tag |
175
|
|
|
try: |
176
|
|
|
connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["id"]) |
177
|
|
|
except Exception as e: |
178
|
|
|
assert False, str(e) |
179
|
|
|
# vector_ids should match ids |
180
|
|
|
# TODO |
181
|
|
|
|
182
|
|
|
@pytest.mark.level(2) |
183
|
|
|
def test_list_id_in_segment_after_delete_vectors(self, connect, collection): |
184
|
|
|
''' |
185
|
|
|
target: get vector ids after vectors are deleted |
186
|
|
|
method: add vectors and delete a few, call list_id_in_segment |
187
|
|
|
expected: status ok, vector_ids decreased after vectors deleted |
188
|
|
|
''' |
189
|
|
|
nb = 2 |
190
|
|
|
ids, seg_id = get_segment_id(connect, collection, nb=nb) |
191
|
|
|
delete_ids = [ids[0]] |
192
|
|
|
status = connect.delete_entity_by_id(collection, delete_ids) |
193
|
|
|
connect.flush([collection]) |
194
|
|
|
stats = connect.get_collection_stats(collection) |
195
|
|
|
vector_ids = connect.list_id_in_segment(collection, stats["partitions"][0]["segments"][0]["id"]) |
196
|
|
|
assert len(vector_ids) == 1 |
197
|
|
|
assert vector_ids[0] == ids[1] |
198
|
|
|
|
199
|
|
|
@pytest.mark.level(2) |
200
|
|
|
def test_list_id_in_segment_with_index_ip(self, connect, collection, get_simple_index): |
201
|
|
|
''' |
202
|
|
|
target: get vector ids when there is index |
203
|
|
|
method: call list_id_in_segment and check if the segment contains vectors |
204
|
|
|
expected: status ok |
205
|
|
|
''' |
206
|
|
|
get_simple_index["metric_type"] = "IP" |
207
|
|
|
ids, seg_id = get_segment_id(connect, collection, nb=nb, index_params=get_simple_index) |
208
|
|
|
vector_ids = connect.list_id_in_segment(collection, seg_id) |
209
|
|
|
# TODO: |
210
|
|
|
|
211
|
|
|
|
212
|
|
|
class TestListIdInSegmentBinary: |
213
|
|
|
""" |
214
|
|
|
****************************************************************** |
215
|
|
|
The following cases are used to test `list_id_in_segment` function |
216
|
|
|
****************************************************************** |
217
|
|
|
""" |
218
|
|
View Code Duplication |
@pytest.mark.level(2) |
|
|
|
|
219
|
|
|
def test_list_id_in_segment_without_index_A(self, connect, binary_collection): |
220
|
|
|
''' |
221
|
|
|
target: get vector ids when there is no index |
222
|
|
|
method: call list_id_in_segment and check if the segment contains vectors |
223
|
|
|
expected: status ok |
224
|
|
|
''' |
225
|
|
|
nb = 10 |
226
|
|
|
vectors, entities = gen_binary_entities(nb) |
|
|
|
|
227
|
|
|
ids = connect.insert(binary_collection, entities) |
228
|
|
|
connect.flush([binary_collection]) |
229
|
|
|
stats = connect.get_collection_stats(binary_collection) |
230
|
|
|
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"]) |
231
|
|
|
# vector_ids should match ids |
232
|
|
|
assert len(vector_ids) == nb |
233
|
|
|
for i in range(nb): |
234
|
|
|
assert vector_ids[i] == ids[i] |
235
|
|
|
|
236
|
|
View Code Duplication |
@pytest.mark.level(2) |
|
|
|
|
237
|
|
|
def test_list_id_in_segment_without_index_B(self, connect, binary_collection): |
238
|
|
|
''' |
239
|
|
|
target: get vector ids when there is no index but with partition |
240
|
|
|
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
241
|
|
|
expected: status ok |
242
|
|
|
''' |
243
|
|
|
connect.create_partition(binary_collection, tag) |
244
|
|
|
nb = 10 |
245
|
|
|
vectors, entities = gen_binary_entities(nb) |
|
|
|
|
246
|
|
|
ids = connect.insert(binary_collection, entities, partition_tag=tag) |
247
|
|
|
connect.flush([binary_collection]) |
248
|
|
|
stats = connect.get_collection_stats(binary_collection) |
249
|
|
|
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"]) |
250
|
|
|
# vector_ids should match ids |
251
|
|
|
assert len(vector_ids) == nb |
252
|
|
|
for i in range(nb): |
253
|
|
|
assert vector_ids[i] == ids[i] |
254
|
|
|
|
255
|
|
View Code Duplication |
@pytest.fixture( |
|
|
|
|
256
|
|
|
scope="function", |
257
|
|
|
params=gen_simple_index() |
|
|
|
|
258
|
|
|
) |
259
|
|
|
def get_jaccard_index(self, request, connect): |
260
|
|
|
logging.getLogger().info(request.param) |
261
|
|
|
if request.param["index_type"] in binary_support(): |
|
|
|
|
262
|
|
|
request.param["metric_type"] = "JACCARD" |
263
|
|
|
return request.param |
264
|
|
|
else: |
265
|
|
|
pytest.skip("not support") |
266
|
|
|
|
267
|
|
|
def test_list_id_in_segment_with_index_A(self, connect, binary_collection, get_jaccard_index): |
268
|
|
|
''' |
269
|
|
|
target: get vector ids when there is index |
270
|
|
|
method: call list_id_in_segment and check if the segment contains vectors |
271
|
|
|
expected: status ok |
272
|
|
|
''' |
273
|
|
|
ids, seg_id = get_segment_id(connect, jac_collection, nb=nb, index_params=get_jaccard_index, vec_type='binary') |
|
|
|
|
274
|
|
|
vector_ids = connect.list_id_in_segment(binary_collection, seg_id) |
275
|
|
|
# TODO: |
276
|
|
|
|
277
|
|
|
def test_list_id_in_segment_with_index_B(self, connect, binary_collection, get_jaccard_index): |
278
|
|
|
''' |
279
|
|
|
target: get vector ids when there is index and with partition |
280
|
|
|
method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors |
281
|
|
|
expected: status ok |
282
|
|
|
''' |
283
|
|
|
connect.create_partition(binary_collection, tag) |
284
|
|
|
ids = connect.insert(binary_collection, entities, partition_tag=tag) |
285
|
|
|
connect.flush([binary_collection]) |
286
|
|
|
stats = connect.get_collection_stats(binary_collection) |
287
|
|
|
assert stats["partitions"][1]["tag"] == tag |
288
|
|
|
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"]) |
289
|
|
|
# vector_ids should match ids |
290
|
|
|
# TODO |
291
|
|
|
|
292
|
|
|
def test_list_id_in_segment_after_delete_vectors(self, connect, binary_collection, get_jaccard_index): |
293
|
|
|
''' |
294
|
|
|
target: get vector ids after vectors are deleted |
295
|
|
|
method: add vectors and delete a few, call list_id_in_segment |
296
|
|
|
expected: status ok, vector_ids decreased after vectors deleted |
297
|
|
|
''' |
298
|
|
|
nb = 2 |
299
|
|
|
ids, seg_id = get_segment_id(connect, binary_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index) |
300
|
|
|
delete_ids = [ids[0]] |
301
|
|
|
status = connect.delete_entity_by_id(binary_collection, delete_ids) |
302
|
|
|
connect.flush([binary_collection]) |
303
|
|
|
stats = connect.get_collection_stats(binary_collection) |
304
|
|
|
vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"]) |
305
|
|
|
assert len(vector_ids) == 1 |
306
|
|
|
assert vector_ids[0] == ids[1] |
307
|
|
|
|