1
|
|
|
import pdb |
2
|
|
|
import copy |
3
|
|
|
import pytest |
4
|
|
|
import threading |
5
|
|
|
import datetime |
6
|
|
|
import logging |
7
|
|
|
from time import sleep |
8
|
|
|
from multiprocessing import Process |
9
|
|
|
import sklearn.preprocessing |
10
|
|
|
from milvus import IndexType, MetricType |
11
|
|
|
from utils import * |
12
|
|
|
|
13
|
|
|
dim = 128 |
14
|
|
|
index_file_size = 10 |
15
|
|
|
collection_id = "test_mix" |
16
|
|
|
add_interval_time = 5 |
17
|
|
|
vectors = gen_vectors(10000, dim) |
18
|
|
|
vectors = sklearn.preprocessing.normalize(vectors, axis=1, norm='l2') |
19
|
|
|
vectors = vectors.tolist() |
20
|
|
|
top_k = 1 |
21
|
|
|
nprobe = 1 |
22
|
|
|
epsilon = 0.001 |
23
|
|
|
nlist = 128 |
24
|
|
|
index_params = {'index_type': IndexType.IVFLAT, 'nlist': 16384} |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
class TestMixBase: |
28
|
|
|
|
29
|
|
|
# disable |
30
|
|
|
def _test_search_during_createIndex(self, args): |
31
|
|
|
loops = 10000 |
32
|
|
|
collection = gen_unique_str() |
33
|
|
|
query_vecs = [vectors[0], vectors[1]] |
34
|
|
|
uri = "tcp://%s:%s" % (args["ip"], args["port"]) |
35
|
|
|
id_0 = 0; id_1 = 0 |
36
|
|
|
milvus_instance = get_milvus(args["handler"]) |
37
|
|
|
# milvus_instance.connect(uri=uri) |
38
|
|
|
milvus_instance.create_collection({'collection_name': collection, |
39
|
|
|
'dimension': dim, |
40
|
|
|
'index_file_size': index_file_size, |
41
|
|
|
'metric_type': MetricType.L2}) |
42
|
|
|
for i in range(10): |
43
|
|
|
status, ids = milvus_instance.insert(collection, vectors) |
44
|
|
|
# logging.getLogger().info(ids) |
45
|
|
|
if i == 0: |
46
|
|
|
id_0 = ids[0]; id_1 = ids[1] |
47
|
|
|
def create_index(milvus_instance): |
48
|
|
|
logging.getLogger().info("In create index") |
49
|
|
|
status = milvus_instance.create_index(collection, index_params) |
50
|
|
|
logging.getLogger().info(status) |
51
|
|
|
status, result = milvus_instance.get_index_info(collection) |
52
|
|
|
logging.getLogger().info(result) |
53
|
|
|
def insert(milvus_instance): |
54
|
|
|
logging.getLogger().info("In add vectors") |
55
|
|
|
status, ids = milvus_instance.insert(collection, vectors) |
56
|
|
|
logging.getLogger().info(status) |
57
|
|
|
def search(milvus_instance): |
58
|
|
|
logging.getLogger().info("In search vectors") |
59
|
|
|
for i in range(loops): |
60
|
|
|
status, result = milvus_instance.search(collection, top_k, nprobe, query_vecs) |
61
|
|
|
logging.getLogger().info(status) |
62
|
|
|
assert result[0][0].id == id_0 |
63
|
|
|
assert result[1][0].id == id_1 |
64
|
|
|
milvus_instance = get_milvus(args["handler"]) |
65
|
|
|
# milvus_instance.connect(uri=uri) |
66
|
|
|
p_search = Process(target=search, args=(milvus_instance, )) |
67
|
|
|
p_search.start() |
68
|
|
|
milvus_instance = get_milvus(args["handler"]) |
69
|
|
|
# milvus_instance.connect(uri=uri) |
70
|
|
|
p_create = Process(target=insert, args=(milvus_instance, )) |
71
|
|
|
p_create.start() |
72
|
|
|
p_create.join() |
73
|
|
|
|
74
|
|
|
@pytest.mark.level(2) |
75
|
|
|
def _test_mix_multi_collections(self, connect): |
76
|
|
|
''' |
77
|
|
|
target: test functions with multiple collections of different metric_types and index_types |
78
|
|
|
method: create 60 collections which 30 are L2 and the other are IP, add vectors into them |
79
|
|
|
and test describe index and search |
80
|
|
|
expected: status ok |
81
|
|
|
''' |
82
|
|
|
nq = 10000 |
83
|
|
|
collection_list = [] |
84
|
|
|
idx = [] |
85
|
|
|
index_param = {'nlist': nlist} |
86
|
|
|
|
87
|
|
|
#create collection and add vectors |
88
|
|
|
for i in range(30): |
89
|
|
|
collection_name = gen_unique_str('test_mix_multi_collections') |
90
|
|
|
collection_list.append(collection_name) |
91
|
|
|
param = {'collection_name': collection_name, |
92
|
|
|
'dimension': dim, |
93
|
|
|
'index_file_size': index_file_size, |
94
|
|
|
'metric_type': MetricType.L2} |
95
|
|
|
connect.create_collection(param) |
96
|
|
|
status, ids = connect.insert(collection_name=collection_name, records=vectors) |
97
|
|
|
idx.append(ids[0]) |
98
|
|
|
idx.append(ids[10]) |
99
|
|
|
idx.append(ids[20]) |
100
|
|
|
assert status.OK() |
101
|
|
|
for i in range(30): |
102
|
|
|
collection_name = gen_unique_str('test_mix_multi_collections') |
103
|
|
|
collection_list.append(collection_name) |
104
|
|
|
param = {'collection_name': collection_name, |
105
|
|
|
'dimension': dim, |
106
|
|
|
'index_file_size': index_file_size, |
107
|
|
|
'metric_type': MetricType.IP} |
108
|
|
|
connect.create_collection(param) |
109
|
|
|
status, ids = connect.insert(collection_name=collection_name, records=vectors) |
110
|
|
|
assert status.OK() |
111
|
|
|
status = connect.flush([collection_name]) |
112
|
|
|
assert status.OK() |
113
|
|
|
idx.append(ids[0]) |
114
|
|
|
idx.append(ids[10]) |
115
|
|
|
idx.append(ids[20]) |
116
|
|
|
assert status.OK() |
117
|
|
|
for i in range(10): |
118
|
|
|
status = connect.create_index(collection_list[i], IndexType.FLAT, index_param) |
119
|
|
|
assert status.OK() |
120
|
|
|
status = connect.create_index(collection_list[30 + i], IndexType.FLAT, index_param) |
121
|
|
|
assert status.OK() |
122
|
|
|
status = connect.create_index(collection_list[10 + i], IndexType.IVFLAT, index_param) |
123
|
|
|
assert status.OK() |
124
|
|
|
status = connect.create_index(collection_list[40 + i], IndexType.IVFLAT, index_param) |
125
|
|
|
assert status.OK() |
126
|
|
|
status = connect.create_index(collection_list[20 + i], IndexType.IVF_SQ8, index_param) |
127
|
|
|
assert status.OK() |
128
|
|
|
status = connect.create_index(collection_list[50 + i], IndexType.IVF_SQ8, index_param) |
129
|
|
|
assert status.OK() |
130
|
|
|
|
131
|
|
|
#describe index |
132
|
|
|
for i in range(10): |
133
|
|
|
status, result = connect.get_index_info(collection_list[i]) |
134
|
|
|
assert result._index_type == IndexType.FLAT |
135
|
|
|
status, result = connect.get_index_info(collection_list[10 + i]) |
136
|
|
|
assert result._index_type == IndexType.IVFLAT |
137
|
|
|
status, result = connect.get_index_info(collection_list[20 + i]) |
138
|
|
|
assert result._index_type == IndexType.IVF_SQ8 |
139
|
|
|
status, result = connect.get_index_info(collection_list[30 + i]) |
140
|
|
|
assert result._index_type == IndexType.FLAT |
141
|
|
|
status, result = connect.get_index_info(collection_list[40 + i]) |
142
|
|
|
assert result._index_type == IndexType.IVFLAT |
143
|
|
|
status, result = connect.get_index_info(collection_list[50 + i]) |
144
|
|
|
assert result._index_type == IndexType.IVF_SQ8 |
145
|
|
|
|
146
|
|
|
#search |
147
|
|
|
query_vecs = [vectors[0], vectors[10], vectors[20]] |
148
|
|
|
for i in range(60): |
149
|
|
|
collection = collection_list[i] |
150
|
|
|
status, result = connect.search(collection, top_k, query_records=query_vecs, params={"nprobe": 1}) |
151
|
|
|
assert status.OK() |
152
|
|
|
assert len(result) == len(query_vecs) |
153
|
|
|
logging.getLogger().info(i) |
154
|
|
|
for j in range(len(query_vecs)): |
155
|
|
|
assert len(result[j]) == top_k |
156
|
|
|
for j in range(len(query_vecs)): |
157
|
|
|
if not check_result(result[j], idx[3 * i + j]): |
158
|
|
|
logging.getLogger().info(result[j]._id_list) |
159
|
|
|
logging.getLogger().info(idx[3 * i + j]) |
160
|
|
|
assert check_result(result[j], idx[3 * i + j]) |
161
|
|
|
|
162
|
|
|
def check_result(result, id): |
163
|
|
|
if len(result) >= 5: |
164
|
|
|
return id in [result[0].id, result[1].id, result[2].id, result[3].id, result[4].id] |
165
|
|
|
else: |
166
|
|
|
return id in (i.id for i in result) |
|
|
|
|
167
|
|
|
|