Total Complexity | 3 |
Total Lines | 47 |
Duplicated Lines | 0 % |
Coverage | 100% |
Changes | 0 |
1 | """ |
||
2 | Job initialization. |
||
3 | """ |
||
4 | |||
5 | 1 | class Job(): |
|
6 | """ |
||
7 | Performs general statistics over the crowdsourcing jobs. |
||
8 | """ |
||
9 | |||
10 | 1 | @staticmethod |
|
11 | def aggregate(units, judgments, config): |
||
12 | """ |
||
13 | Aggregates information about the total number of units, total number of judgments, |
||
14 | total number of workers that provided annotations and the total duration of the job. |
||
15 | |||
16 | Args: |
||
17 | units: Units contained in the job. |
||
18 | judgments: Judgments contained in the job. |
||
19 | config: Job configuration as provided as input for the metrics. |
||
20 | |||
21 | Returns: |
||
22 | A dataframe of one row that stores general stats on the crowdsourcing jobs. |
||
23 | """ |
||
24 | 1 | agg = { |
|
25 | 'unit' : 'nunique', |
||
26 | 'judgment' : 'nunique', |
||
27 | 'worker' : 'nunique', |
||
28 | 'duration' : 'mean' |
||
29 | } |
||
30 | 1 | job = judgments.groupby('job').agg(agg) |
|
31 | |||
32 | # compute job runtime |
||
33 | 1 | runtime = (max(judgments['submitted']) - min(judgments['started'])) |
|
34 | 1 | job['runtime'] = runtime #float(runtime.days) * 24 + float(runtime.seconds) / 3600 |
|
35 | 1 | job['runtime.per_unit'] = job['runtime'] / job['unit'] |
|
36 | 1 | job['judgments.per.worker'] = job['judgment'] / job['worker'] |
|
37 | |||
38 | 1 | metrics = ['unique_annotations', 'annotations'] |
|
39 | 1 | for metric in metrics: |
|
40 | 1 | for col in config.output.values(): |
|
41 | # aggregate unit metrics |
||
42 | 1 | job[col+'.'+metric] = units[col+'.'+metric].mean() |
|
43 | |||
44 | 1 | job = job.reindex(sorted(job.columns), axis=1) |
|
45 | |||
46 | return job |
||
47 |