Passed
Push — master ( 7861da...b12a19 )
by
unknown
05:56 queued 04:03
created

test_list_id_in_segment   A

Complexity

Total Complexity 35

Size/Duplication

Total Lines 307
Duplicated Lines 14.98 %

Importance

Changes 0
Metric Value
eloc 176
dl 46
loc 307
rs 9.6
c 0
b 0
f 0
wmc 35

19 Methods

Rating   Name   Duplication   Size   Complexity  
A TestListIdInSegmentBase.get_collection_name() 0 6 1
A TestListIdInSegmentBase.test_list_id_in_segment_collection_name_not_existed() 0 10 2
A TestListIdInSegmentBase.test_list_id_in_segment_without_index_A() 0 13 1
A TestListIdInSegmentBase.test_list_id_in_segment_collection_name_invalid() 0 10 2
A TestListIdInSegmentBase.test_list_id_in_segment_with_index_A() 0 12 2
A TestListIdInSegmentBase.test_list_id_in_segment_name_not_existed() 0 10 2
A TestListIdInSegmentBase.test_list_id_in_segment_collection_name_None() 0 10 2
A TestListIdInSegmentBase.test_list_id_in_segment_name_None() 0 10 2
A TestListIdInSegmentBase.get_simple_index() 0 9 3
A TestListIdInSegmentBinary.test_list_id_in_segment_without_index_B() 18 18 2
A TestListIdInSegmentBase.test_list_id_in_segment_with_index_B() 0 16 2
A TestListIdInSegmentBase.test_list_id_in_segment_without_index_B() 0 19 2
A TestListIdInSegmentBinary.test_list_id_in_segment_without_index_A() 17 17 2
A TestListIdInSegmentBinary.test_list_id_in_segment_with_index_A() 0 8 1
A TestListIdInSegmentBinary.test_list_id_in_segment_after_delete_vectors() 0 15 1
A TestListIdInSegmentBase.test_list_id_in_segment_after_delete_vectors() 0 16 1
A TestListIdInSegmentBinary.test_list_id_in_segment_with_index_B() 0 12 1
A TestListIdInSegmentBase.test_list_id_in_segment_with_index_ip() 0 10 1
A TestListIdInSegmentBinary.get_jaccard_index() 11 11 2

1 Function

Rating   Name   Duplication   Size   Complexity  
A get_segment_id() 0 11 3

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
import time
2
import random
3
import pdb
4
import threading
5
import logging
6
from multiprocessing import Pool, Process
7
import pytest
8
from utils import *
9
10
dim = 128
11
segment_row_count = 100000
12
nb = 6000
13
tag = "1970-01-01"
14
field_name = default_float_vec_field_name
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable default_float_vec_field_name does not seem to be defined.
Loading history...
15
collection_id = "list_id_in_segment"
16
entity = gen_entities(1)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_entities does not seem to be defined.
Loading history...
17
raw_vector, binary_entity = gen_binary_entities(1)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
18
entities = gen_entities(nb)
19
raw_vectors, binary_entities = gen_binary_entities(nb)
20
default_fields = gen_default_fields() 
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_default_fields does not seem to be defined.
Loading history...
21
22
23
def get_segment_id(connect, collection, nb=1, vec_type='float', index_params=None):
24
    if vec_type != "float":
25
        vectors, entities = gen_binary_entities(nb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
26
    else:
27
        entities = gen_entities(nb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_entities does not seem to be defined.
Loading history...
28
    ids = connect.insert(collection, entities)
29
    connect.flush([collection])
30
    if index_params:
31
        connect.create_index(collection, field_name, index_params)
32
    stats = connect.get_collection_stats(collection)
33
    return ids, stats["partitions"][0]["segments"][0]["id"]
34
35
36
class TestListIdInSegmentBase:
37
        
38
    """
39
    ******************************************************************
40
      The following cases are used to test `list_id_in_segment` function
41
    ******************************************************************
42
    """
43
    def test_list_id_in_segment_collection_name_None(self, connect, collection):
44
        '''
45
        target: get vector ids where collection name is None
46
        method: call list_id_in_segment with the collection_name: None
47
        expected: exception raised
48
        '''
49
        collection_name = None
50
        ids, segment_id = get_segment_id(connect, collection)
51
        with pytest.raises(Exception) as e:
52
            connect.list_id_in_segment(collection_name, segment_id)
53
54
    def test_list_id_in_segment_collection_name_not_existed(self, connect, collection):
55
        '''
56
        target: get vector ids where collection name does not exist
57
        method: call list_id_in_segment with a random collection_name, which is not in db
58
        expected: status not ok
59
        '''
60
        collection_name = gen_unique_str(collection_id)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_unique_str does not seem to be defined.
Loading history...
61
        ids, segment_id = get_segment_id(connect, collection)
62
        with pytest.raises(Exception) as e:
63
            vector_ids = connect.list_id_in_segment(collection_name, segment_id)
64
    
65
    @pytest.fixture(
66
        scope="function",
67
        params=gen_invalid_strs()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_invalid_strs does not seem to be defined.
Loading history...
68
    )
69
    def get_collection_name(self, request):
70
        yield request.param
71
72
    def test_list_id_in_segment_collection_name_invalid(self, connect, collection, get_collection_name):
73
        '''
74
        target: get vector ids where collection name is invalid
75
        method: call list_id_in_segment with invalid collection_name
76
        expected: status not ok
77
        '''
78
        collection_name = get_collection_name
79
        ids, segment_id = get_segment_id(connect, collection)
80
        with pytest.raises(Exception) as e:
81
            connect.list_id_in_segment(collection_name, segment_id)
82
83
    def test_list_id_in_segment_name_None(self, connect, collection):
84
        '''
85
        target: get vector ids where segment name is None
86
        method: call list_id_in_segment with the name: None
87
        expected: exception raised
88
        '''
89
        ids, segment_id = get_segment_id(connect, collection)
90
        segment = None
91
        with pytest.raises(Exception) as e:
92
            vector_ids = connect.list_id_in_segment(collection, segment)
93
94
    def test_list_id_in_segment_name_not_existed(self, connect, collection):
95
        '''
96
        target: get vector ids where segment name does not exist
97
        method: call list_id_in_segment with a random segment name
98
        expected: status not ok
99
        '''
100
        ids, seg_id = get_segment_id(connect, collection)
101
        # segment = gen_unique_str(collection_id)
102
        with pytest.raises(Exception) as e:
103
            vector_ids = connect.list_id_in_segment(collection, seg_id + 10000)
104
105
    @pytest.mark.level(2)
106
    def test_list_id_in_segment_without_index_A(self, connect, collection):
107
        '''
108
        target: get vector ids when there is no index
109
        method: call list_id_in_segment and check if the segment contains vectors
110
        expected: status ok
111
        '''
112
        nb = 1
113
        ids, seg_id = get_segment_id(connect, collection, nb=nb)
114
        vector_ids = connect.list_id_in_segment(collection, seg_id)
115
        # vector_ids should match ids
116
        assert len(vector_ids) == nb
117
        assert vector_ids[0] == ids[0]
118
119
    @pytest.mark.level(2)
120
    def test_list_id_in_segment_without_index_B(self, connect, collection):
121
        '''
122
        target: get vector ids when there is no index but with partition
123
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
124
        expected: status ok
125
        '''
126
        nb = 10
127
        entities = gen_entities(nb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_entities does not seem to be defined.
Loading history...
128
        connect.create_partition(collection, tag)
129
        ids = connect.insert(collection, entities, partition_tag=tag)
130
        connect.flush([collection])
131
        stats = connect.get_collection_stats(collection)
132
        assert stats["partitions"][1]["tag"] == tag
133
        vector_ids = connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["id"])
134
        # vector_ids should match ids
135
        assert len(vector_ids) == nb
136
        for i in range(nb):
137
            assert vector_ids[i] == ids[i]
138
139
    @pytest.fixture(
140
        scope="function",
141
        params=gen_simple_index()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_simple_index does not seem to be defined.
Loading history...
142
    )
143
    def get_simple_index(self, request, connect):
144
        if str(connect._cmd("mode")) == "CPU":
145
            if request.param["index_type"] in index_cpu_not_support():
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable index_cpu_not_support does not seem to be defined.
Loading history...
146
                pytest.skip("CPU not support index_type: ivf_sq8h")
147
        return request.param
148
149
    @pytest.mark.level(2)
150
    def test_list_id_in_segment_with_index_A(self, connect, collection, get_simple_index):
151
        '''
152
        target: get vector ids when there is index
153
        method: call list_id_in_segment and check if the segment contains vectors
154
        expected: status ok
155
        '''
156
        ids, seg_id = get_segment_id(connect, collection, nb=nb, index_params=get_simple_index)
157
        try:
158
            connect.list_id_in_segment(collection, seg_id)
159
        except Exception as e:
160
            assert False, str(e)
161
        # TODO: 
162
163
    @pytest.mark.level(2)
164
    def test_list_id_in_segment_with_index_B(self, connect, collection, get_simple_index):
165
        '''
166
        target: get vector ids when there is index and with partition
167
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
168
        expected: status ok
169
        '''
170
        connect.create_partition(collection, tag)
171
        ids = connect.insert(collection, entities, partition_tag=tag)
172
        connect.flush([collection])
173
        stats = connect.get_collection_stats(collection)
174
        assert stats["partitions"][1]["tag"] == tag
175
        try:
176
            connect.list_id_in_segment(collection, stats["partitions"][1]["segments"][0]["id"])
177
        except Exception as e:
178
            assert False, str(e)
179
        # vector_ids should match ids
180
        # TODO
181
182
    @pytest.mark.level(2)
183
    def test_list_id_in_segment_after_delete_vectors(self, connect, collection):
184
        '''
185
        target: get vector ids after vectors are deleted
186
        method: add vectors and delete a few, call list_id_in_segment
187
        expected: status ok, vector_ids decreased after vectors deleted
188
        '''
189
        nb = 2
190
        ids, seg_id = get_segment_id(connect, collection, nb=nb)
191
        delete_ids = [ids[0]]
192
        status = connect.delete_entity_by_id(collection, delete_ids)
193
        connect.flush([collection])
194
        stats = connect.get_collection_stats(collection)
195
        vector_ids = connect.list_id_in_segment(collection, stats["partitions"][0]["segments"][0]["id"])
196
        assert len(vector_ids) == 1
197
        assert vector_ids[0] == ids[1]
198
199
    @pytest.mark.level(2)
200
    def test_list_id_in_segment_with_index_ip(self, connect, collection, get_simple_index):
201
        '''
202
        target: get vector ids when there is index
203
        method: call list_id_in_segment and check if the segment contains vectors
204
        expected: status ok
205
        '''
206
        get_simple_index["metric_type"] = "IP"
207
        ids, seg_id = get_segment_id(connect, collection, nb=nb, index_params=get_simple_index)
208
        vector_ids = connect.list_id_in_segment(collection, seg_id)
209
        # TODO: 
210
211
212
class TestListIdInSegmentBinary:
213
    """
214
    ******************************************************************
215
      The following cases are used to test `list_id_in_segment` function
216
    ******************************************************************
217
    """
218 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
219
    def test_list_id_in_segment_without_index_A(self, connect, binary_collection):
220
        '''
221
        target: get vector ids when there is no index
222
        method: call list_id_in_segment and check if the segment contains vectors
223
        expected: status ok
224
        '''
225
        nb = 10
226
        vectors, entities = gen_binary_entities(nb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
227
        ids = connect.insert(binary_collection, entities)
228
        connect.flush([binary_collection])
229
        stats = connect.get_collection_stats(binary_collection)
230
        vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"])
231
        # vector_ids should match ids
232
        assert len(vector_ids) == nb
233
        for i in range(nb):
234
            assert vector_ids[i] == ids[i]
235
236 View Code Duplication
    @pytest.mark.level(2)
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
237
    def test_list_id_in_segment_without_index_B(self, connect, binary_collection):
238
        '''
239
        target: get vector ids when there is no index but with partition
240
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
241
        expected: status ok
242
        '''
243
        connect.create_partition(binary_collection, tag)
244
        nb = 10
245
        vectors, entities = gen_binary_entities(nb)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_binary_entities does not seem to be defined.
Loading history...
246
        ids = connect.insert(binary_collection, entities, partition_tag=tag)
247
        connect.flush([binary_collection])
248
        stats = connect.get_collection_stats(binary_collection)
249
        vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"])
250
        # vector_ids should match ids
251
        assert len(vector_ids) == nb
252
        for i in range(nb):
253
            assert vector_ids[i] == ids[i]
254
255 View Code Duplication
    @pytest.fixture(
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
256
        scope="function",
257
        params=gen_simple_index()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable gen_simple_index does not seem to be defined.
Loading history...
258
    )
259
    def get_jaccard_index(self, request, connect):
260
        logging.getLogger().info(request.param)
261
        if request.param["index_type"] in binary_support():
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable binary_support does not seem to be defined.
Loading history...
262
            request.param["metric_type"] = "JACCARD"
263
            return request.param
264
        else:
265
            pytest.skip("not support")
266
267
    def test_list_id_in_segment_with_index_A(self, connect, binary_collection, get_jaccard_index):
268
        '''
269
        target: get vector ids when there is index
270
        method: call list_id_in_segment and check if the segment contains vectors
271
        expected: status ok
272
        '''
273
        ids, seg_id = get_segment_id(connect, jac_collection, nb=nb, index_params=get_jaccard_index, vec_type='binary')
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable jac_collection does not seem to be defined.
Loading history...
274
        vector_ids = connect.list_id_in_segment(binary_collection, seg_id)
275
        # TODO: 
276
277
    def test_list_id_in_segment_with_index_B(self, connect, binary_collection, get_jaccard_index):
278
        '''
279
        target: get vector ids when there is index and with partition
280
        method: create partition, add vectors to it and call list_id_in_segment, check if the segment contains vectors
281
        expected: status ok
282
        '''
283
        connect.create_partition(binary_collection, tag)
284
        ids = connect.insert(binary_collection, entities, partition_tag=tag)
285
        connect.flush([binary_collection])
286
        stats = connect.get_collection_stats(binary_collection)
287
        assert stats["partitions"][1]["tag"] == tag
288
        vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][1]["segments"][0]["id"])
289
        # vector_ids should match ids
290
        # TODO
291
292
    def test_list_id_in_segment_after_delete_vectors(self, connect, binary_collection, get_jaccard_index):
293
        '''
294
        target: get vector ids after vectors are deleted
295
        method: add vectors and delete a few, call list_id_in_segment
296
        expected: status ok, vector_ids decreased after vectors deleted
297
        '''
298
        nb = 2
299
        ids, seg_id = get_segment_id(connect, binary_collection, nb=nb, vec_type='binary', index_params=get_jaccard_index)
300
        delete_ids = [ids[0]]
301
        status = connect.delete_entity_by_id(binary_collection, delete_ids)
302
        connect.flush([binary_collection])
303
        stats = connect.get_collection_stats(binary_collection)
304
        vector_ids = connect.list_id_in_segment(binary_collection, stats["partitions"][0]["segments"][0]["id"])
305
        assert len(vector_ids) == 1
306
        assert vector_ids[0] == ids[1]
307