| Conditions | 23 |
| Total Lines | 108 |
| Code Lines | 81 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like parse_bench_all_ivf.plot_tradeoffs() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | # Copyright (c) Facebook, Inc. and its affiliates. |
||
| 156 | def plot_tradeoffs(allres, code_size, recall_rank): |
||
| 157 | dbsize = dbsize_from_name(db) |
||
| 158 | recall_idx = int(np.log10(recall_rank)) |
||
| 159 | |||
| 160 | bigtab = [] |
||
| 161 | names = [] |
||
| 162 | |||
| 163 | for k,v in sorted(allres.items()): |
||
| 164 | if v.ndim != 2: continue |
||
| 165 | us = unitsize(k) |
||
| 166 | if us != code_size: continue |
||
| 167 | perf = v[:, recall_idx] |
||
| 168 | times = v[:, 3] |
||
| 169 | bigtab.append( |
||
| 170 | np.vstack(( |
||
| 171 | np.ones(times.size, dtype=int) * len(names), |
||
| 172 | perf, times |
||
| 173 | )) |
||
| 174 | ) |
||
| 175 | names.append(k) |
||
| 176 | |||
| 177 | bigtab = np.hstack(bigtab) |
||
| 178 | |||
| 179 | perm = np.argsort(bigtab[1, :]) |
||
| 180 | bigtab = bigtab[:, perm] |
||
| 181 | |||
| 182 | times = np.minimum.accumulate(bigtab[2, ::-1])[::-1] |
||
| 183 | selection = np.where(bigtab[2, :] == times) |
||
| 184 | |||
| 185 | selected_methods = [names[i] for i in |
||
| 186 | np.unique(bigtab[0, selection].astype(int))] |
||
| 187 | not_selected = list(set(names) - set(selected_methods)) |
||
| 188 | |||
| 189 | print "methods without an optimal OP: ", not_selected |
||
| 190 | |||
| 191 | nq = 10000 |
||
| 192 | pyplot.title('database ' + db + ' code_size=%d' % code_size) |
||
| 193 | |||
| 194 | # grayed out lines |
||
| 195 | |||
| 196 | for k in not_selected: |
||
| 197 | v = allres[k] |
||
| 198 | if v.ndim != 2: continue |
||
| 199 | us = unitsize(k) |
||
| 200 | if us != code_size: continue |
||
| 201 | |||
| 202 | linestyle = (':' if 'PQ' in k else |
||
| 203 | '-.' if 'SQ4' in k else |
||
| 204 | '--' if 'SQ8' in k else '-') |
||
| 205 | |||
| 206 | pyplot.semilogy(v[:, recall_idx], v[:, 3], label=None, |
||
| 207 | linestyle=linestyle, |
||
| 208 | marker='o' if 'HNSW' in k else '+', |
||
| 209 | color='#cccccc', linewidth=0.2) |
||
| 210 | |||
| 211 | # important methods |
||
| 212 | for k in selected_methods: |
||
| 213 | v = allres[k] |
||
| 214 | if v.ndim != 2: continue |
||
| 215 | us = unitsize(k) |
||
| 216 | if us != code_size: continue |
||
| 217 | |||
| 218 | stats = allstats[k] |
||
| 219 | tot_size = stats['index_size'] + stats['tables_size'] |
||
| 220 | id_size = 8 # 64 bit |
||
| 221 | |||
| 222 | addt = '' |
||
| 223 | if 'add_time' in stats: |
||
| 224 | add_time = stats['add_time'] |
||
| 225 | if add_time > 7200: |
||
| 226 | add_min = add_time / 60 |
||
| 227 | addt = ', %dh%02d' % (add_min / 60, add_min % 60) |
||
| 228 | else: |
||
| 229 | add_sec = int(add_time) |
||
| 230 | addt = ', %dm%02d' % (add_sec / 60, add_sec % 60) |
||
| 231 | |||
| 232 | |||
| 233 | label = k + ' (size+%.1f%%%s)' % ( |
||
| 234 | tot_size / float((code_size + id_size) * dbsize) * 100 - 100, |
||
| 235 | addt) |
||
| 236 | |||
| 237 | linestyle = (':' if 'PQ' in k else |
||
| 238 | '-.' if 'SQ4' in k else |
||
| 239 | '--' if 'SQ8' in k else '-') |
||
| 240 | |||
| 241 | pyplot.semilogy(v[:, recall_idx], v[:, 3], label=label, |
||
| 242 | linestyle=linestyle, |
||
| 243 | marker='o' if 'HNSW' in k else '+') |
||
| 244 | |||
| 245 | if len(not_selected) == 0: |
||
| 246 | om = '' |
||
| 247 | else: |
||
| 248 | om = '\nomitted:' |
||
| 249 | nc = len(om) |
||
| 250 | for m in not_selected: |
||
| 251 | if nc > 80: |
||
| 252 | om += '\n' |
||
| 253 | nc = 0 |
||
| 254 | om += ' ' + m |
||
| 255 | nc += len(m) + 1 |
||
| 256 | |||
| 257 | pyplot.xlabel('1-recall at %d %s' % (recall_rank, om) ) |
||
| 258 | pyplot.ylabel('search time per query (ms, %d threads)' % n_threads) |
||
| 259 | pyplot.legend() |
||
| 260 | pyplot.grid() |
||
| 261 | pyplot.savefig('figs/tradeoffs_%s_cs%d_r%d.png' % ( |
||
| 262 | db, code_size, recall_rank)) |
||
| 263 | return selected_methods, not_selected |
||
| 264 | |||
| 269 |