Issues (718)

tests/milvus_benchmark/parser.py (1 issue)

Severity
1
import pdb
2
import logging
3
4
logger = logging.getLogger("milvus_benchmark.parser")
5
6
7
def operations_parser(operations):
8
    if not operations:
9
        raise Exception("No operations in suite defined")
10
    for run_type, run_params in operations.items():
11
        logger.debug(run_type)
12
        return (run_type, run_params)
13
14
15
def collection_parser(collection_name):
16
    """
17
    Parse the collection name defined in the suites.
18
    
19
    Return data info with the given collection_name
20
    """
21
    tmp = collection_name.split("_")
22
    # if len(tmp) != 5:
23
    #     return None
24
    data_type = tmp[0]
25
    collection_size_unit = tmp[1][-1]
26
    collection_size = tmp[1][0:-1]
27
    if collection_size_unit == "m":
28
        collection_size = int(collection_size) * 1000000
29
    elif collection_size_unit == "b":
30
        collection_size = int(collection_size) * 1000000000
31
    index_file_size = int(tmp[2])
32
    dimension = int(tmp[3])
33
    metric_type = str(tmp[4])
34
    return (data_type, collection_size, index_file_size, dimension, metric_type)
35
36
37
def parse_ann_collection_name(collection_name):
38
    data_type = collection_name.split("_")[0]
39
    dimension = int(collection_name.split("_")[1])
40
    metric = collection_name.split("_")[2]
41
    # metric = collection_name.attrs['distance']
42
    # dimension = len(collection_name["train"][0])
43
    if metric == "euclidean":
44
        metric_type = "l2"
45
    elif metric  == "angular":
46
        metric_type = "ip"
47
    elif metric  == "jaccard":
48
        metric_type = "jaccard"
49
    elif metric == "hamming":
50
        metric_type = "hamming"
51
    return ("ann_"+data_type, dimension, metric_type)
0 ignored issues
show
The variable metric_type does not seem to be defined for all execution paths.
Loading history...
52
53
54
def search_params_parser(param):
55
    """
56
    Parse the params on search field defined in suites.
57
    
58
    Return search params: nq/top-k/nprobe
59
    """
60
    # parse top-k, set default value if top-k not in param
61
    if "top_ks" not in param:
62
        top_ks = [10]
63
    else:
64
        top_ks = param["top_ks"]
65
    if isinstance(top_ks, int):
66
        top_ks = [top_ks]
67
    elif isinstance(top_ks, list):
68
        top_ks = list(top_ks)
69
    else:
70
        logger.warning("Invalid format top-ks: %s" % str(top_ks))
71
72
    # parse nqs, set default value if nq not in param
73
    if "nqs" not in param:
74
        nqs = [10]
75
    else:
76
        nqs = param["nqs"]
77
    if isinstance(nqs, int):
78
        nqs = [nqs]
79
    elif isinstance(nqs, list):
80
        nqs = list(nqs)
81
    else:
82
        logger.warning("Invalid format nqs: %s" % str(nqs))
83
84
    # parse nprobes
85
    if "nprobes" not in param:
86
        nprobes = [1]
87
    else:
88
        nprobes = param["nprobes"]
89
    if isinstance(nprobes, int):
90
        nprobes = [nprobes]
91
    elif isinstance(nprobes, list):
92
        nprobes = list(nprobes)
93
    else:
94
        logger.warning("Invalid format nprobes: %s" % str(nprobes))    
95
96
    return top_ks, nqs, nprobes
97