1 | import pdb |
||
2 | import logging |
||
3 | |||
4 | logger = logging.getLogger("milvus_benchmark.parser") |
||
5 | |||
6 | |||
7 | def operations_parser(operations): |
||
8 | if not operations: |
||
9 | raise Exception("No operations in suite defined") |
||
10 | for run_type, run_params in operations.items(): |
||
11 | logger.debug(run_type) |
||
12 | return (run_type, run_params) |
||
13 | |||
14 | |||
15 | def collection_parser(collection_name): |
||
16 | """ |
||
17 | Parse the collection name defined in the suites. |
||
18 | |||
19 | Return data info with the given collection_name |
||
20 | """ |
||
21 | tmp = collection_name.split("_") |
||
22 | # if len(tmp) != 5: |
||
23 | # return None |
||
24 | data_type = tmp[0] |
||
25 | collection_size_unit = tmp[1][-1] |
||
26 | collection_size = tmp[1][0:-1] |
||
27 | if collection_size_unit == "m": |
||
28 | collection_size = int(collection_size) * 1000000 |
||
29 | elif collection_size_unit == "b": |
||
30 | collection_size = int(collection_size) * 1000000000 |
||
31 | index_file_size = int(tmp[2]) |
||
32 | dimension = int(tmp[3]) |
||
33 | metric_type = str(tmp[4]) |
||
34 | return (data_type, collection_size, index_file_size, dimension, metric_type) |
||
35 | |||
36 | |||
37 | def parse_ann_collection_name(collection_name): |
||
38 | data_type = collection_name.split("_")[0] |
||
39 | dimension = int(collection_name.split("_")[1]) |
||
40 | metric = collection_name.split("_")[2] |
||
41 | # metric = collection_name.attrs['distance'] |
||
42 | # dimension = len(collection_name["train"][0]) |
||
43 | if metric == "euclidean": |
||
44 | metric_type = "l2" |
||
45 | elif metric == "angular": |
||
46 | metric_type = "ip" |
||
47 | elif metric == "jaccard": |
||
48 | metric_type = "jaccard" |
||
49 | elif metric == "hamming": |
||
50 | metric_type = "hamming" |
||
51 | return ("ann_"+data_type, dimension, metric_type) |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
52 | |||
53 | |||
54 | def search_params_parser(param): |
||
55 | """ |
||
56 | Parse the params on search field defined in suites. |
||
57 | |||
58 | Return search params: nq/top-k/nprobe |
||
59 | """ |
||
60 | # parse top-k, set default value if top-k not in param |
||
61 | if "top_ks" not in param: |
||
62 | top_ks = [10] |
||
63 | else: |
||
64 | top_ks = param["top_ks"] |
||
65 | if isinstance(top_ks, int): |
||
66 | top_ks = [top_ks] |
||
67 | elif isinstance(top_ks, list): |
||
68 | top_ks = list(top_ks) |
||
69 | else: |
||
70 | logger.warning("Invalid format top-ks: %s" % str(top_ks)) |
||
71 | |||
72 | # parse nqs, set default value if nq not in param |
||
73 | if "nqs" not in param: |
||
74 | nqs = [10] |
||
75 | else: |
||
76 | nqs = param["nqs"] |
||
77 | if isinstance(nqs, int): |
||
78 | nqs = [nqs] |
||
79 | elif isinstance(nqs, list): |
||
80 | nqs = list(nqs) |
||
81 | else: |
||
82 | logger.warning("Invalid format nqs: %s" % str(nqs)) |
||
83 | |||
84 | # parse nprobes |
||
85 | if "nprobes" not in param: |
||
86 | nprobes = [1] |
||
87 | else: |
||
88 | nprobes = param["nprobes"] |
||
89 | if isinstance(nprobes, int): |
||
90 | nprobes = [nprobes] |
||
91 | elif isinstance(nprobes, list): |
||
92 | nprobes = list(nprobes) |
||
93 | else: |
||
94 | logger.warning("Invalid format nprobes: %s" % str(nprobes)) |
||
95 | |||
96 | return top_ks, nqs, nprobes |
||
97 |