Conditions | 25 |
Total Lines | 188 |
Code Lines | 133 |
Lines | 0 |
Ratio | 0 % |
Tests | 116 |
CRAP Score | 25 |
Changes | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like crowdtruth.models.metrics.Metrics.run() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | |||
230 | 1 | @staticmethod |
|
231 | 1 | def run(results, config, max_delta = 0.001): |
|
232 | |||
233 | 1 | judgments = results['judgments'].copy() |
|
234 | 1 | units = results['units'].copy() |
|
235 | |||
236 | #sent_work_rel_dict, work_sent_rel_dict, sent_rel_dict |
||
237 | # TODO: change to use all vectors in one unit |
||
238 | 1 | col = list(config.output.values())[0] |
|
239 | 1 | sent_rel_dict = dict(units.copy()[col]) |
|
240 | |||
241 | 1 | def expandedVector(worker, unit): |
|
242 | #print worker, unit |
||
243 | 1 | vector = Counter() |
|
244 | 1 | for rel in unit: |
|
245 | 1 | if rel in worker: |
|
246 | 1 | vector[rel] = worker[rel] |
|
247 | else: |
||
248 | 1 | vector[rel] = 0 |
|
249 | 1 | return vector |
|
250 | |||
251 | # fill judgment vectors with unit keys |
||
252 | 1 | for index,row in judgments.iterrows(): |
|
253 | # judgments.set_value(index, col, expandedVector(row[col], units.at[row['unit'], col])) |
||
254 | 1 | judgments.at[index, col] = expandedVector(row[col], units.at[row['unit'], col]) |
|
255 | |||
256 | #print judgments.head() |
||
257 | |||
258 | 1 | sent_work_rel_dict = judgments[['unit','worker',col]].copy().groupby('unit') |
|
259 | 1 | sent_work_rel_dict = {name : group.set_index('worker')[col].to_dict() for name, group in sent_work_rel_dict} |
|
260 | |||
261 | #print sent_work_rel_dict |
||
262 | |||
263 | 1 | work_sent_rel_dict = judgments[['worker','unit',col]].copy().groupby('worker') |
|
264 | 1 | work_sent_rel_dict = {name : group.set_index('unit')[col].to_dict() for name, group in work_sent_rel_dict} |
|
265 | # print [i for i in list(sent_work_rel_dict)] |
||
266 | # sent_work_rel_dict = {k : dict(sent_work_rel_dict[k]) for k in sent_work_rel_dict} |
||
267 | |||
268 | #pprint(work_sent_rel_dict) |
||
269 | |||
270 | #initialize data structures |
||
271 | 1 | sqs_list = list() |
|
272 | 1 | wqs_list = list() |
|
273 | 1 | wwa_list = list() |
|
274 | 1 | wsa_list = list() |
|
275 | 1 | rqs_list = list() |
|
276 | |||
277 | 1 | sqs = dict((sentence_id, 1.0) for sentence_id in sent_work_rel_dict) |
|
278 | 1 | wqs = dict((worker_id, 1.0) for worker_id in work_sent_rel_dict) |
|
279 | 1 | wwa = dict((worker_id, 1.0) for worker_id in work_sent_rel_dict) |
|
280 | 1 | wsa = dict((worker_id, 1.0) for worker_id in work_sent_rel_dict) |
|
281 | |||
282 | 1 | sqs_list.append(sqs.copy()) |
|
283 | 1 | wqs_list.append(wqs.copy()) |
|
284 | 1 | wwa_list.append(wwa.copy()) |
|
285 | 1 | wsa_list.append(wsa.copy()) |
|
286 | |||
287 | # initialize RQS depending on whether or not it is an open ended task |
||
288 | 1 | rqs = dict() |
|
289 | 1 | if not config.open_ended_task: |
|
290 | 1 | rqs_keys = list(sent_rel_dict[list(sent_rel_dict.keys())[0]].keys()) |
|
291 | 1 | for relation in rqs_keys: |
|
292 | 1 | rqs[relation] = 1.0 |
|
293 | else: |
||
294 | 1 | for sentence_id in sent_rel_dict: |
|
295 | 1 | for relation in sent_rel_dict[sentence_id]: |
|
296 | 1 | rqs[relation] = 1.0 |
|
297 | 1 | rqs_list.append(rqs.copy()) |
|
298 | |||
299 | 1 | sqs_len = len(list(sqs.keys())) * 1.0 |
|
300 | 1 | wqs_len = len(list(wqs.keys())) * 1.0 |
|
301 | 1 | rqs_len = len(list(rqs.keys())) * 1.0 |
|
302 | |||
303 | # compute metrics until stable values |
||
304 | 1 | iterations = 0 |
|
305 | 1 | while max_delta >= 0.001: |
|
306 | 1 | sqs_new = dict() |
|
307 | 1 | wqs_new = dict() |
|
308 | 1 | wwa_new = dict() |
|
309 | 1 | wsa_new = dict() |
|
310 | |||
311 | 1 | avg_sqs_delta = 0.0 |
|
312 | 1 | avg_wqs_delta = 0.0 |
|
313 | 1 | avg_rqs_delta = 0.0 |
|
314 | 1 | max_delta = 0.0 |
|
315 | |||
316 | # pdb.set_trace() |
||
317 | |||
318 | 1 | if not config.open_ended_task: |
|
319 | # compute relation quality score (RQS) |
||
320 | 1 | rqs_new = Metrics.relation_quality_score(list(rqs.keys()), work_sent_rel_dict, |
|
321 | sqs_list[len(sqs_list) - 1], |
||
322 | wqs_list[len(wqs_list) - 1]) |
||
323 | 1 | for relation, _ in rqs_new.items(): |
|
324 | 1 | max_delta = max(max_delta, abs(rqs_new[relation] - rqs_list[len(rqs_list) - 1][relation])) |
|
325 | 1 | avg_rqs_delta += abs(rqs_new[relation] - rqs_list[len(rqs_list) - 1][relation]) |
|
326 | 1 | avg_rqs_delta /= rqs_len |
|
327 | |||
328 | # compute sentence quality score (SQS) |
||
329 | 1 | for sentence_id, _ in sent_work_rel_dict.items(): |
|
330 | 1 | sqs_new[sentence_id] = Metrics.sentence_quality_score(sentence_id, sent_work_rel_dict, |
|
331 | wqs_list[len(wqs_list) - 1], |
||
332 | rqs_list[len(rqs_list) - 1]) |
||
333 | 1 | max_delta = max(max_delta, abs(sqs_new[sentence_id] - sqs_list[len(sqs_list) - 1][sentence_id])) |
|
334 | 1 | avg_sqs_delta += abs(sqs_new[sentence_id] - sqs_list[len(sqs_list) - 1][sentence_id]) |
|
335 | 1 | avg_sqs_delta /= sqs_len |
|
336 | |||
337 | # compute worker quality score (WQS) |
||
338 | 1 | for worker_id, _ in work_sent_rel_dict.items(): |
|
339 | 1 | wwa_new[worker_id] = Metrics.worker_worker_agreement( |
|
340 | worker_id, work_sent_rel_dict, |
||
341 | sent_work_rel_dict, |
||
342 | wqs_list[len(wqs_list) - 1], |
||
343 | sqs_list[len(sqs_list) - 1], |
||
344 | rqs_list[len(rqs_list) - 1]) |
||
345 | 1 | wsa_new[worker_id] = Metrics.worker_sentence_agreement( |
|
346 | worker_id, |
||
347 | sent_rel_dict, |
||
348 | work_sent_rel_dict, |
||
349 | sqs_list[len(sqs_list) - 1], |
||
350 | rqs_list[len(rqs_list) - 1], |
||
351 | wqs_list[len(rqs_list) - 1][worker_id]) |
||
352 | 1 | wqs_new[worker_id] = wwa_new[worker_id] * wsa_new[worker_id] |
|
353 | 1 | max_delta = max( |
|
354 | max_delta, |
||
355 | abs(wqs_new[worker_id] - wqs_list[len(wqs_list) - 1][worker_id])) |
||
356 | 1 | avg_wqs_delta += abs(wqs_new[worker_id] - wqs_list[len(wqs_list) - 1][worker_id]) |
|
357 | 1 | avg_wqs_delta /= wqs_len |
|
358 | |||
359 | # save results for current iteration |
||
360 | 1 | sqs_list.append(sqs_new.copy()) |
|
361 | 1 | wqs_list.append(wqs_new.copy()) |
|
362 | 1 | wwa_list.append(wwa_new.copy()) |
|
363 | 1 | wsa_list.append(wsa_new.copy()) |
|
364 | 1 | if not config.open_ended_task: |
|
365 | 1 | rqs_list.append(rqs_new.copy()) |
|
|
|||
366 | 1 | iterations += 1 |
|
367 | |||
368 | # reconstruct sent_rel_dict with worker scores |
||
369 | 1 | new_sent_rel_dict = dict() |
|
370 | 1 | for sent_id, rel_dict in sent_rel_dict.items(): |
|
371 | 1 | new_sent_rel_dict[sent_id] = dict() |
|
372 | 1 | for relation, _ in rel_dict.items(): |
|
373 | 1 | new_sent_rel_dict[sent_id][relation] = 0.0 |
|
374 | 1 | for work_id, srd in work_sent_rel_dict.items(): |
|
375 | 1 | wqs_work_id = wqs_new[work_id] |
|
376 | 1 | for sent_id, rel_dict in srd.items(): |
|
377 | 1 | for relation, score in rel_dict.items(): |
|
378 | 1 | new_sent_rel_dict[sent_id][relation] += score * wqs_work_id |
|
379 | # pdb.set_trace() |
||
380 | 1 | sent_rel_dict = new_sent_rel_dict |
|
381 | |||
382 | 1 | logging.info(str(iterations) + " iterations; max d= " + str(max_delta) + " ; wqs d= " + str(avg_wqs_delta) + "; sqs d= " + str(avg_sqs_delta) + "; rqs d= " + str(avg_rqs_delta)) |
|
383 | |||
384 | #if iterations == 1: |
||
385 | # break |
||
386 | #pprint(sqs_list) |
||
387 | #pprint(wqs_list) |
||
388 | #pprint(rqs_list) |
||
389 | |||
390 | 1 | srs = Counter() |
|
391 | 1 | for sentence_id in sent_rel_dict: |
|
392 | 1 | srs[sentence_id] = Counter() |
|
393 | 1 | for relation in sent_rel_dict[sentence_id]: |
|
394 | 1 | srs[sentence_id][relation] = Metrics.sentence_relation_score(sentence_id, relation, sent_work_rel_dict, wqs_list[len(wqs_list) - 1]) |
|
395 | |||
396 | 1 | srs_initial = Counter() |
|
397 | 1 | for sentence_id in sent_rel_dict: |
|
398 | 1 | srs_initial[sentence_id] = Counter() |
|
399 | 1 | for relation in sent_rel_dict[sentence_id]: |
|
400 | 1 | srs_initial[sentence_id][relation] = Metrics.sentence_relation_score(sentence_id, relation, sent_work_rel_dict, wqs_list[0]) |
|
401 | |||
402 | 1 | results['units']['uqs'] = pd.Series(sqs_list[-1]) |
|
403 | 1 | results['units']['unit_annotation_score'] = pd.Series(srs) |
|
404 | 1 | results['workers']['wqs'] = pd.Series(wqs_list[-1]) |
|
405 | 1 | results['workers']['wwa'] = pd.Series(wwa_list[-1]) |
|
406 | 1 | results['workers']['wsa'] = pd.Series(wsa_list[-1]) |
|
407 | 1 | if not config.open_ended_task: |
|
408 | 1 | results['annotations']['aqs'] = pd.Series(rqs_list[-1]) |
|
409 | |||
410 | 1 | results['units']['uqs_initial'] = pd.Series(sqs_list[1]) |
|
411 | 1 | results['units']['unit_annotation_score_initial'] = pd.Series(srs_initial) |
|
412 | 1 | results['workers']['wqs_initial'] = pd.Series(wqs_list[1]) |
|
413 | 1 | results['workers']['wwa_initial'] = pd.Series(wwa_list[1]) |
|
414 | 1 | results['workers']['wsa_initial'] = pd.Series(wsa_list[1]) |
|
415 | 1 | if not config.open_ended_task: |
|
416 | 1 | results['annotations']['aqs_initial'] = pd.Series(rqs_list[1]) |
|
417 | return results |
||
418 |