utils.gen_binary_sub_vectors()   A
last analyzed

Complexity

Conditions 4

Size

Total Lines 13
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 13
nop 2
dl 0
loc 13
rs 9.75
c 0
b 0
f 0
1
import os
2
import sys
3
import random
4
import string
5
import struct
6
import logging
7
import time, datetime
8
import copy
9
import numpy as np
10
from milvus import Milvus, IndexType, MetricType
11
12
port = 19530
13
epsilon = 0.000001
14
default_flush_interval = 1
15
big_flush_interval = 1000
16
17
all_index_types = [
18
    IndexType.FLAT,
19
    IndexType.IVFLAT,
20
    IndexType.IVF_SQ8,
21
    IndexType.IVF_SQ8H,
22
    IndexType.IVF_PQ,
23
    IndexType.HNSW,
24
    IndexType.RNSG,
25
    IndexType.ANNOY
26
]
27
28
29
def get_milvus(host, port, uri=None, handler=None, **kwargs):
30
    if handler is None:
31
        handler = "GRPC"
32
    try_connect = kwargs.get("try_connect", True)
33
    if uri is not None:
34
        milvus = Milvus(uri=uri, handler=handler, try_connect=try_connect)
35
    else:
36
        milvus = Milvus(host=host, port=port, handler=handler, try_connect=try_connect)
37
    return milvus
38
39
40
def disable_flush(connect):
41
    status, reply = connect.set_config("storage", "auto_flush_interval", big_flush_interval)
42
    assert status.OK()
43
44
45
def enable_flush(connect):
46
    # reset auto_flush_interval=1
47
    status, reply = connect.set_config("storage", "auto_flush_interval", default_flush_interval)
48
    assert status.OK()
49
    status, config_value = connect.get_config("storage", "auto_flush_interval")
50
    assert status.OK()
51
    assert config_value == str(default_flush_interval)
52
53
54
def gen_inaccuracy(num):
55
    return num / 255.0
56
57
58
def gen_vectors(num, dim):
59
    return [[random.random() for _ in range(dim)] for _ in range(num)]
60
61
62
def gen_binary_vectors(num, dim):
63
    raw_vectors = []
64
    binary_vectors = []
65
    for i in range(num):
66
        raw_vector = [random.randint(0, 1) for i in range(dim)]
67
        raw_vectors.append(raw_vector)
68
        binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
69
    return raw_vectors, binary_vectors
70
71
72
def jaccard(x, y):
73
    x = np.asarray(x, np.bool)
74
    y = np.asarray(y, np.bool)
75
    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum())
76
77
78
def hamming(x, y):
79
    x = np.asarray(x, np.bool)
80
    y = np.asarray(y, np.bool)
81
    return np.bitwise_xor(x, y).sum()
82
83
84
def tanimoto(x, y):
85
    x = np.asarray(x, np.bool)
86
    y = np.asarray(y, np.bool)
87
    return -np.log2(np.double(np.bitwise_and(x, y).sum()) / np.double(np.bitwise_or(x, y).sum()))
88
89
90
def substructure(x, y):
91
    x = np.asarray(x, np.bool)
92
    y = np.asarray(y, np.bool)
93
    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(y)
94
95
96
def superstructure(x, y):
97
    x = np.asarray(x, np.bool)
98
    y = np.asarray(y, np.bool)
99
    return 1 - np.double(np.bitwise_and(x, y).sum()) / np.count_nonzero(x)
100
101
102
def gen_binary_sub_vectors(vectors, length):
103
    raw_vectors = []
104
    binary_vectors = []
105
    dim = len(vectors[0])
106
    for i in range(length):
107
        raw_vector = [0 for i in range(dim)]
108
        vector = vectors[i]
109
        for index, j in enumerate(vector):
110
            if j == 1:
111
                raw_vector[index] = 1
112
        raw_vectors.append(raw_vector)
113
        binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
114
    return raw_vectors, binary_vectors
115
116
117
def gen_binary_super_vectors(vectors, length):
118
    raw_vectors = []
119
    binary_vectors = []
120
    dim = len(vectors[0])
121
    for i in range(length):
122
        cnt_1 = np.count_nonzero(vectors[i])
123
        raw_vector = [1 for i in range(dim)] 
124
        raw_vectors.append(raw_vector)
125
        binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
126
    return raw_vectors, binary_vectors
127
    
128
129
def gen_single_vector(dim):
130
    return [[random.random() for _ in range(dim)]]
131
132
133
def gen_vector(nb, d, seed=np.random.RandomState(1234)):
134
    xb = seed.rand(nb, d).astype("float32")
135
    return xb.tolist()
136
137
138
def gen_unique_str(str_value=None):
139
    prefix = "".join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
140
    return "test_" + prefix if str_value is None else str_value + "_" + prefix
141
142
143
def gen_long_str(num):
144
    string = ''
145
    for _ in range(num):
146
        char = random.choice('tomorrow')
147
        string += char
148
149
150
def gen_invalid_ips():
151
    ips = [
152
            # "255.0.0.0",
153
            # "255.255.0.0",
154
            # "255.255.255.0",
155
            # "255.255.255.255",
156
            "127.0.0",
157
            # "123.0.0.2",
158
            "12-s",
159
            " ",
160
            "12 s",
161
            "BB。A",
162
            " siede ",
163
            "(mn)",
164
            "中文",
165
            "a".join("a" for _ in range(256))
166
    ]
167
    return ips
168
169
170
def gen_invalid_ports():
171
    ports = [
172
            # empty
173
            " ",
174
            -1,
175
            # too big port
176
            100000,
177
            # not correct port
178
            39540,
179
            "BB。A",
180
            " siede ",
181
            "(mn)",
182
            "中文"
183
    ]
184
    return ports
185
186
187
def gen_invalid_uris():
188
    ip = None
189
    uris = [
190
            " ",
191
            "中文",
192
            # invalid protocol
193
            # "tc://%s:%s" % (ip, port),
194
            # "tcp%s:%s" % (ip, port),
195
196
            # # invalid port
197
            # "tcp://%s:100000" % ip,
198
            # "tcp://%s: " % ip,
199
            # "tcp://%s:19540" % ip,
200
            # "tcp://%s:-1" % ip,
201
            # "tcp://%s:string" % ip,
202
203
            # invalid ip
204
            "tcp:// :19530",
205
            # "tcp://123.0.0.1:%s" % port,
206
            "tcp://127.0.0:19530",
207
            # "tcp://255.0.0.0:%s" % port,
208
            # "tcp://255.255.0.0:%s" % port,
209
            # "tcp://255.255.255.0:%s" % port,
210
            # "tcp://255.255.255.255:%s" % port,
211
            "tcp://\n:19530",
212
    ]
213
    return uris
214
215
216
def gen_invalid_collection_names():
217
    collection_names = [
218
            "12-s",
219
            " ",
220
            # "",
221
            # None,
222
            "12 s",
223
            "BB。A",
224
            "c|c",
225
            " siede ",
226
            "(mn)",
227
            "pip+",
228
            "=c",
229
            "中文",
230
            "a".join("a" for i in range(256))
231
    ]
232
    return collection_names
233
234
235
def gen_invalid_top_ks():
236
    top_ks = [
237
            0,
238
            -1,
239
            None,
240
            [1,2,3],
241
            (1,2),
242
            {"a": 1},
243
            " ",
244
            "",
245
            "String",
246
            "12-s",
247
            "BB。A",
248
            " siede ",
249
            "(mn)",
250
            "pip+",
251
            "=c",
252
            "中文",
253
            "a".join("a" for i in range(256))
254
    ]
255
    return top_ks
256
257
258
def gen_invalid_dims():
259
    dims = [
260
            0,
261
            -1,
262
            100001,
263
            1000000000000001,
264
            None,
265
            False,
266
            [1,2,3],
267
            (1,2),
268
            {"a": 1},
269
            " ",
270
            "",
271
            "String",
272
            "12-s",
273
            "BB。A",
274
            " siede ",
275
            "(mn)",
276
            "pip+",
277
            "=c",
278
            "中文",
279
            "a".join("a" for i in range(256))
280
    ]
281
    return dims
282
283
284
def gen_invalid_file_sizes():
285
    file_sizes = [
286
            0,
287
            -1,
288
            1000000000000001,
289
            None,
290
            False,
291
            [1,2,3],
292
            (1,2),
293
            {"a": 1},
294
            " ",
295
            "",
296
            "String",
297
            "12-s",
298
            "BB。A",
299
            " siede ",
300
            "(mn)",
301
            "pip+",
302
            "=c",
303
            "中文",
304
            "a".join("a" for i in range(256))
305
    ]
306
    return file_sizes
307
308
309
def gen_invalid_index_types():
310
    invalid_types = [
311
            0,
312
            -1,
313
            100,
314
            1000000000000001,
315
            # None,
316
            False,
317
            [1,2,3],
318
            (1,2),
319
            {"a": 1},
320
            " ",
321
            "",
322
            "String",
323
            "12-s",
324
            "BB。A",
325
            " siede ",
326
            "(mn)",
327
            "pip+",
328
            "=c",
329
            "中文",
330
            "a".join("a" for i in range(256))
331
    ]
332
    return invalid_types
333
334
335
def gen_invalid_params():
336
    params = [
337
            9999999999,
338
            -1,
339
            # None,
340
            [1,2,3],
341
            (1,2),
342
            {"a": 1},
343
            " ",
344
            "",
345
            "String",
346
            "12-s",
347
            "BB。A",
348
            " siede ",
349
            "(mn)",
350
            "pip+",
351
            "=c",
352
            "中文"
353
    ]
354
    return params
355
356
357
def gen_invalid_nprobes():
358
    nprobes = [
359
            0,
360
            -1,
361
            1000000000000001,
362
            None,
363
            [1,2,3],
364
            (1,2),
365
            {"a": 1},
366
            " ",
367
            "",
368
            "String",
369
            "12-s",
370
            "BB。A",
371
            " siede ",
372
            "(mn)",
373
            "pip+",
374
            "=c",
375
            "中文"
376
    ]
377
    return nprobes
378
379
380
def gen_invalid_metric_types():
381
    metric_types = [
382
            0,
383
            -1,
384
            1000000000000001,
385
            # None,
386
            [1,2,3],
387
            (1,2),
388
            {"a": 1},
389
            " ",
390
            "",
391
            "String",
392
            "12-s",
393
            "BB。A",
394
            " siede ",
395
            "(mn)",
396
            "pip+",
397
            "=c",
398
            "中文"    
399
    ]
400
    return metric_types
401
402
403
def gen_invalid_vectors():
404
    invalid_vectors = [
405
            "1*2",
406
            [],
407
            [1],
408
            [1,2],
409
            [" "],
410
            ['a'],
411
            [None],
412
            None,
413
            (1,2),
414
            {"a": 1},
415
            " ",
416
            "",
417
            "String",
418
            "12-s",
419
            "BB。A",
420
            " siede ",
421
            "(mn)",
422
            "pip+",
423
            "=c",
424
            "中文",
425
            "a".join("a" for i in range(256))
426
    ]
427
    return invalid_vectors
428
429
430
def gen_invalid_vector_ids():
431
    invalid_vector_ids = [
432
            1.0,
433
            -1.0,
434
            None,
435
            # int 64
436
            10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,
437
            " ",
438
            "",
439
            "String",
440
            "BB。A",
441
            " siede ",
442
            "(mn)",
443
            "=c",
444
            "中文",
445
    ]
446
    return invalid_vector_ids
447
448
449
def gen_invalid_cache_config():
450
    invalid_configs = [
451
            0,
452
            -1,
453
            9223372036854775808,
454
            [1,2,3],
455
            (1,2),
456
            {"a": 1},
457
            " ",
458
            "",
459
            "String",
460
            "12-s",
461
            "BB。A",
462
            " siede ",
463
            "(mn)",
464
            "pip+",
465
            "=c",
466
            "中文",
467
            "'123'",
468
            "さようなら"
469
    ]
470
    return invalid_configs
471
472
473
def gen_invalid_gpu_config():
474
    invalid_configs = [
475
            -1,
476
            [1,2,3],
477
            (1,2),
478
            {"a": 1},
479
            " ",
480
            "",
481
            "String",
482
            "12-s",
483
            "BB。A",
484
            " siede ",
485
            "(mn)",
486
            "pip+",
487
            "=c",
488
            "中文",
489
            "'123'",
490
    ]
491
    return invalid_configs
492
493
494
def gen_invaild_search_params():
495
    invalid_search_key = 100
496
    search_params = []
497
    for index_type in all_index_types:
498
        if index_type == IndexType.FLAT:
499
            continue
500
        search_params.append({"index_type": index_type, "search_param": {"invalid_key": invalid_search_key}})
501
        if index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
502
            for nprobe in gen_invalid_params():
503
                ivf_search_params = {"index_type": index_type, "search_param": {"nprobe": nprobe}}
504
                search_params.append(ivf_search_params)
505
        elif index_type == IndexType.HNSW:
506
            for ef in gen_invalid_params():
507
                hnsw_search_param = {"index_type": index_type, "search_param": {"ef": ef}}
508
                search_params.append(hnsw_search_param)
509
        elif index_type == IndexType.RNSG:
510
            for search_length in gen_invalid_params():
511
                nsg_search_param = {"index_type": index_type, "search_param": {"search_length": search_length}}
512
                search_params.append(nsg_search_param)
513
            search_params.append({"index_type": index_type, "search_param": {"invalid_key": 100}})
514
        elif index_type == IndexType.ANNOY:
515
            for search_k in gen_invalid_params():
516
                if isinstance(search_k, int):
517
                    continue
518
                annoy_search_param = {"index_type": index_type, "search_param": {"search_k": search_k}}
519
                search_params.append(annoy_search_param)
520
    return search_params
521
522
523
def gen_invalid_index():
524
    index_params = []
525
    for index_type in gen_invalid_index_types():
526
        index_param = {"index_type": index_type, "index_param": {"nlist": 1024}}
527
        index_params.append(index_param)
528
    for nlist in gen_invalid_params():
529
        index_param = {"index_type": IndexType.IVFLAT, "index_param": {"nlist": nlist}}
530
        index_params.append(index_param)
531
    for nbits in gen_invalid_params():
532
        index_param = {"index_type": IndexType.IVF_PQ, "index_param": {"nlist": 1024, "m": 16, "nbits": nbits}}
533
        index_params.append(index_param)
534
    for M in gen_invalid_params():
535
        index_param = {"index_type": IndexType.HNSW, "index_param": {"M": M, "efConstruction": 100}}
536
        index_params.append(index_param)
537
    for efConstruction in gen_invalid_params():
538
        index_param = {"index_type": IndexType.HNSW, "index_param": {"M": 16, "efConstruction": efConstruction}}
539
        index_params.append(index_param)
540
    for search_length in gen_invalid_params():
541
        index_param = {"index_type": IndexType.RNSG,
542
                       "index_param": {"search_length": search_length, "out_degree": 40, "candidate_pool_size": 50,
543
                                       "knng": 100}}
544
        index_params.append(index_param)
545
    for out_degree in gen_invalid_params():
546
        index_param = {"index_type": IndexType.RNSG,
547
                       "index_param": {"search_length": 100, "out_degree": out_degree, "candidate_pool_size": 50,
548
                                       "knng": 100}}
549
        index_params.append(index_param)
550
    for candidate_pool_size in gen_invalid_params():
551
        index_param = {"index_type": IndexType.RNSG, "index_param": {"search_length": 100, "out_degree": 40,
552
                                                                     "candidate_pool_size": candidate_pool_size,
553
                                                                     "knng": 100}}
554
        index_params.append(index_param)
555
    index_params.append({"index_type": IndexType.IVF_FLAT, "index_param": {"invalid_key": 1024}})
556
    index_params.append({"index_type": IndexType.HNSW, "index_param": {"invalid_key": 16, "efConstruction": 100}})
557
    index_params.append({"index_type": IndexType.RNSG,
558
                         "index_param": {"invalid_key": 100, "out_degree": 40, "candidate_pool_size": 300,
559
                                         "knng": 100}})
560
    for invalid_n_trees in gen_invalid_params():
561
        index_params.append({"index_type": IndexType.ANNOY, "index_param": {"n_trees": invalid_n_trees}})
562
563
    return index_params
564
565
566
def gen_index():
567
    nlists = [1, 1024, 16384]
568
    pq_ms = [128, 64, 32, 16, 8, 4]
569
    pq_nbits = [1, 2, 4, 8, 9]
570
    Ms = [5, 24, 48]
571
    efConstructions = [100, 300, 500]
572
    search_lengths = [10, 100, 300]
573
    out_degrees = [5, 40, 300]
574
    candidate_pool_sizes = [50, 100, 300]
575
    knngs = [5, 100, 300]
576
577
    index_params = []
578
    for index_type in all_index_types:
579
        if index_type == IndexType.FLAT:
580
            index_params.append({"index_type": index_type, "index_param": {"nlist": 1024}})
581
        elif index_type in [IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H]:
582
            ivf_params = [{"index_type": index_type, "index_param": {"nlist": nlist}} \
583
                          for nlist in nlists]
584
            index_params.extend(ivf_params)
585
        elif index_type == IndexType.IVF_PQ:
586
            ivf_pq_params = [{"index_type": index_type, "index_param": {"nlist": nlist, "m": m, "nbits": nbits}} \
587
                        for nlist in nlists \
588
                        for m in pq_ms \
589
                        for nbits in pq_nbits]
590
            index_params.extend(ivf_pq_params)
591
        elif index_type == IndexType.HNSW:
592
            hnsw_params = [{"index_type": index_type, "index_param": {"M": M, "efConstruction": efConstruction}} \
593
                           for M in Ms \
594
                           for efConstruction in efConstructions]
595
            index_params.extend(hnsw_params)
596
        elif index_type == IndexType.RNSG:
597
            nsg_params = [{"index_type": index_type,
598
                           "index_param": {"search_length": search_length, "out_degree": out_degree,
599
                                           "candidate_pool_size": candidate_pool_size, "knng": knng}} \
600
                          for search_length in search_lengths \
601
                          for out_degree in out_degrees \
602
                          for candidate_pool_size in candidate_pool_sizes \
603
                          for knng in knngs]
604
            index_params.extend(nsg_params)
605
606
    return index_params
607
608
609
def gen_simple_index():
610
    params = [
611
        {"nlist": 1024},
612
        {"nlist": 1024},
613
        {"nlist": 1024},
614
        {"nlist": 1024},
615
        {"nlist": 1024, "m": 16},
616
        {"M": 48, "efConstruction": 500},
617
        {"search_length": 50, "out_degree": 40, "candidate_pool_size": 100, "knng": 50},
618
        {"n_trees": 4}
619
    ]
620
    index_params = []
621
    for i in range(len(all_index_types)):
622
        index_params.append({"index_type": all_index_types[i], "index_param": params[i]})
623
    return index_params
624
625
626
def get_search_param(index_type):
627
    if index_type in [IndexType.FLAT, IndexType.IVFLAT, IndexType.IVF_SQ8, IndexType.IVF_SQ8H, IndexType.IVF_PQ]:
628
        return {"nprobe": 32}
629
    elif index_type == IndexType.HNSW:
630
        return {"ef": 64}
631
    elif index_type == IndexType.RNSG:
632
        return {"search_length": 100}
633
    elif index_type == IndexType.ANNOY:
634
        return {"search_k": 100}
635
636
    else:
637
        logging.getLogger().info("Invalid index_type.")
638
639
640
def assert_has_collection(conn, collection_name):
641
    status, ok = conn.has_collection(collection_name)
642
    return status.OK() and ok
643
644
645
def assert_equal_vector(v1, v2):
646
    if len(v1) != len(v2):
647
        assert False
648
    for i in range(len(v1)):
649
        assert abs(v1[i] - v2[i]) < epsilon
650
651
652
def restart_server(helm_release_name):
653
    res = True
654
    timeout = 120
655
    from kubernetes import client, config
656
    client.rest.logger.setLevel(logging.WARNING)
657
658
    namespace = "milvus"
659
    # service_name = "%s.%s.svc.cluster.local" % (helm_release_name, namespace)
660
    config.load_kube_config()
661
    v1 = client.CoreV1Api()
662
    pod_name = None
663
    # config_map_names = v1.list_namespaced_config_map(namespace, pretty='true')
664
    # body = {"replicas": 0}
665
    pods = v1.list_namespaced_pod(namespace)
666
    for i in pods.items:
667
        if i.metadata.name.find(helm_release_name) != -1 and i.metadata.name.find("mysql") == -1:
668
            pod_name = i.metadata.name
669
            break
670
            # v1.patch_namespaced_config_map(config_map_name, namespace, body, pretty='true')
671
    # status_res = v1.read_namespaced_service_status(helm_release_name, namespace, pretty='true')
672
    # print(status_res)
673
    if pod_name is not None:
674
        try:
675
            v1.delete_namespaced_pod(pod_name, namespace)
676
        except Exception as e:
677
            logging.error(str(e))
678
            logging.error("Exception when calling CoreV1Api->delete_namespaced_pod")
679
            res = False
680
            return res
681
        time.sleep(5)
682
        # check if restart successfully
683
        pods = v1.list_namespaced_pod(namespace)
684
        for i in pods.items:
685
            pod_name_tmp = i.metadata.name
686
            if pod_name_tmp.find(helm_release_name) != -1:
687
                logging.debug(pod_name_tmp)
688
                start_time = time.time()
689
                while time.time() - start_time > timeout:
690
                    status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
691
                    if status_res.status.phase == "Running":
692
                        break
693
                    time.sleep(1)
694
                if time.time() - start_time > timeout:
695
                    logging.error("Restart pod: %s timeout" % pod_name_tmp)
696
                    res = False
697
                    return res
698
    else:
699
        logging.error("Pod: %s not found" % helm_release_name)
700
        res = False
701
    return res
702