1
|
|
|
""" |
2
|
|
|
Unit initialization. |
3
|
|
|
""" |
4
|
1 |
|
class Unit(): |
5
|
|
|
""" |
6
|
|
|
Performs general statistics over the units in the jobs. |
7
|
|
|
""" |
8
|
|
|
|
9
|
1 |
|
@staticmethod |
10
|
|
|
def aggregate(judgments, config): |
11
|
|
|
""" |
12
|
|
|
Aggregates information for each unit in the job. For each unit we save the |
13
|
|
|
data that was used as input (in the crowdsourcing template), the job in which |
14
|
|
|
it appeared, the number of workers that annotated the unit and the total |
15
|
|
|
amount of time spent by the workers to annotate it. |
16
|
|
|
|
17
|
|
|
Args: |
18
|
|
|
judgments: Judgments contained in the job. |
19
|
|
|
config: Job configuration as provided as input for the metrics. |
20
|
|
|
|
21
|
|
|
Returns: |
22
|
|
|
A dataframe containing all units that appear in the jobs and the |
23
|
|
|
statistics relevant for them. |
24
|
|
|
""" |
25
|
1 |
|
agg = {} |
26
|
1 |
|
for col in config.input.values(): |
27
|
|
|
# for each input column the first value is taken. |
28
|
|
|
# all rows have the same value for each unit. |
29
|
1 |
|
agg[col] = 'first' |
30
|
1 |
|
for col in config.output.values(): |
31
|
|
|
# each output column dict is summed |
32
|
1 |
|
agg[col] = 'sum' |
33
|
1 |
|
agg['job'] = 'first' |
34
|
1 |
|
agg['worker'] = 'count' |
35
|
1 |
|
agg['duration'] = 'mean' |
36
|
|
|
|
37
|
1 |
|
units = judgments.groupby('unit').agg(agg) |
38
|
|
|
|
39
|
|
|
# |
40
|
|
|
# get unit metrics |
41
|
|
|
# |
42
|
|
|
# for each vector in the unit get the unit metrics |
43
|
1 |
|
units = units.apply(lambda row: Unit.get_metrics(row, config), axis=1) |
44
|
|
|
|
45
|
|
|
# sort columns |
46
|
1 |
|
units = units.reindex(sorted(units.columns), axis=1) |
47
|
|
|
|
48
|
1 |
|
return units |
49
|
|
|
|
50
|
1 |
|
@staticmethod |
51
|
|
|
def get_metrics(row, config): |
52
|
|
|
""" |
53
|
|
|
Counts the number of annotations and the number of unique annotations for each unit. |
54
|
|
|
""" |
55
|
1 |
|
for col in config.output.values(): |
56
|
1 |
|
row[col+'.unique_annotations'] = len(row[col]) |
57
|
1 |
|
row[col+'.annotations'] = sum(row[col].values()) |
58
|
|
|
return row |
59
|
|
|
|