GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( f0fb7b...c83738 )
by Oana
19:32
created

Metrics.sentence_quality_score()   B

Complexity

Conditions 5

Size

Total Lines 39
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 23
CRAP Score 5

Importance

Changes 0
Metric Value
eloc 25
dl 0
loc 39
rs 8.8133
c 0
b 0
f 0
ccs 23
cts 23
cp 1
cc 5
nop 4
crap 5
1
"""
2
Initialization of CrowdTruth metrics
3
"""
4 1
import logging
5 1
import math
6
7 1
from collections import Counter
8
9 1
import numpy as np
10 1
import pandas as pd
11
12 1
SMALL_NUMBER_CONST = 0.00000001
13
14 1
class Metrics():
15
    """
16
    Computes and applies the CrowdTruth metrics for evaluating units, workers and annotations.
17
    """
18
19
    # Unit Quality Score
20 1
    @staticmethod
21
    def unit_quality_score(unit_id, unit_work_ann_dict, wqs, aqs):
22
        """
23
        Computes the unit quality score.
24
25
        The unit quality score (UQS) is computed as the average cosine similarity between
26
        all worker vectors for a given unit, weighted by the worker quality (WQS) and the
27
        annotation quality (AQS). The goal is to capture the degree of agreement in annotating
28
        the media unit.
29
30
        Through the weighted average, workers and annotations with lower quality will have
31
        less of an impact on the final score.
32
33
        To weigh the metrics with the annotation quality, we compute weighted_cosine, the weighted
34
        version of the cosine similarity.
35
36
        Args:
37
            unit_id: Unit id.
38
            unit_work_ann_dict: A dictionary that contains all the workers judgments for the unit.
39
            aqs: Dict of annotation_id (string) that contains the annotation quality score (float)
40
            wqs: Dict of worker_id (string) that contains the worker quality score (float)
41
42
        Returns:
43
            The quality score (UQS) of the given unit.
44
        """
45
46 1
        uqs_numerator = 0.0
47 1
        uqs_denominator = 0.0
48 1
        worker_ids = list(unit_work_ann_dict[unit_id].keys())
49
50 1
        for worker_i in range(len(worker_ids) - 1):
51 1
            for worker_j in range(worker_i + 1, len(worker_ids)):
52
                # print worker_ids[i] + " - " + worker_ids[j] + "\n"
53 1
                numerator = 0.0
54 1
                denominator_i = 0.0
55 1
                denominator_j = 0.0
56
57 1
                worker_i_vector = unit_work_ann_dict[unit_id][worker_ids[worker_i]]
58 1
                worker_j_vector = unit_work_ann_dict[unit_id][worker_ids[worker_j]]
59
60 1
                for ann in worker_i_vector:
61 1
                    worker_i_vector_ann = worker_i_vector[ann]
62 1
                    worker_j_vector_ann = worker_j_vector[ann]
63 1
                    numerator += aqs[ann] * (worker_i_vector_ann * worker_j_vector_ann)
64 1
                    denominator_i += aqs[ann] * (worker_i_vector_ann * worker_i_vector_ann)
65 1
                    denominator_j += aqs[ann] * (worker_j_vector_ann * worker_j_vector_ann)
66
67 1
                weighted_cosine = numerator / math.sqrt(denominator_i * denominator_j)
68
69 1
                uqs_numerator += weighted_cosine * wqs[worker_ids[worker_i]] * \
70
                                 wqs[worker_ids[worker_j]]
71 1
                uqs_denominator += wqs[worker_ids[worker_i]] * wqs[worker_ids[worker_j]]
72
73 1
        if uqs_denominator < SMALL_NUMBER_CONST:
74 1
            uqs_denominator = SMALL_NUMBER_CONST
75 1
        return uqs_numerator / uqs_denominator
76
77
78
    # Worker - Unit Agreement
79 1
    @staticmethod
80
    def worker_unit_agreement(worker_id, unit_ann_dict, work_unit_ann_dict, uqs, aqs, wqs):
81
        """
82
        Computes the worker agreement on a unit.
83
84
        The worker unit agreement (WUA) is the average cosine distance between the annotations
85
        of a worker i and all the other annotations for the units they have worked on,
86
        weighted by the unit and annotation quality. It calculates how much a worker disagrees
87
        with the crowd on a unit basis.
88
89
        Through the weighted average, units and anntation with lower quality will have less
90
        of an impact on the final score.
91
92
        Args:
93
            worker_id: Worker id.
94
            unit_ann_dict: Dictionary of units and their aggregated annotations.
95
            work_unit_ann_dict: Dictionary of units (and its annotation) annotated by the worker.
96
            uqs: Dict unit_id that contains the unit quality scores (float).
97
            aqs: Dict of annotation_id (string) that contains the annotation quality scores (float).
98
            wqs: Dict of worker_id (string) that contains the worker quality scores (float).
99
100
        Returns:
101
            The worker unit agreement score for the given worker.
102
        """
103
104 1
        wsa_numerator = 0.0
105 1
        wsa_denominator = 0.0
106 1
        work_unit_ann_dict_worker_id = work_unit_ann_dict[worker_id]
107
108 1
        for unit_id in work_unit_ann_dict_worker_id:
109 1
            numerator = 0.0
110 1
            denominator_w = 0.0
111 1
            denominator_s = 0.0
112
113 1
            worker_vector = work_unit_ann_dict[worker_id][unit_id]
114 1
            unit_vector = unit_ann_dict[unit_id]
115
116 1
            for ann in worker_vector:
117 1
                worker_vector_ann = worker_vector[ann] * wqs
118 1
                unit_vector_ann = unit_vector[ann]
119
120 1
                numerator += aqs[ann] * worker_vector_ann * \
121
                    (unit_vector_ann - worker_vector_ann)
122 1
                denominator_w += aqs[ann] * \
123
                    (worker_vector_ann * worker_vector_ann)
124 1
                denominator_s += aqs[ann] * ( \
125
                    (unit_vector_ann - worker_vector_ann) * \
126
                    (unit_vector_ann - worker_vector_ann))
127 1
            weighted_cosine = None
128 1
            if math.sqrt(denominator_w * denominator_s) < SMALL_NUMBER_CONST:
129 1
                weighted_cosine = SMALL_NUMBER_CONST
130
            else:
131 1
                weighted_cosine = numerator / math.sqrt(denominator_w * denominator_s)
132 1
            wsa_numerator += weighted_cosine * uqs[unit_id]
133 1
            wsa_denominator += uqs[unit_id]
134 1
        if wsa_denominator < SMALL_NUMBER_CONST:
135 1
            wsa_denominator = SMALL_NUMBER_CONST
136 1
        return wsa_numerator / wsa_denominator
137
138
    # Worker - Worker Agreement
139 1
    @staticmethod
140
    def worker_worker_agreement(worker_id, work_unit_ann_dict, unit_work_ann_dict, wqs, uqs, aqs):
141
        """
142
        Computes the agreement between every two workers.
143
144
        The worker-worker agreement (WWA) is the average cosine distance between the annotations of
145
        a worker i and all other workers that have worked on the same media units as worker i,
146
        weighted by the worker and annotation qualities.
147
148
        The metric gives an indication as to whether there are consisently like-minded workers.
149
        This is useful for identifying communities of thought.
150
151
        Through the weighted average, workers and annotations with lower quality will have less
152
        of an impact on the final score of the given worker.
153
154
        Args:
155
            worker_id: Worker id.
156
            work_unit_ann_dict: Dictionary of worker annotation vectors on annotated units.
157
            unit_work_ann_dict: Dictionary of unit annotation vectors.
158
            uqs: Dict unit_id that contains the unit quality scores (float).
159
            aqs: Dict of annotation_id (string) that contains the annotation quality scores (float).
160
            wqs: Dict of worker_id (string) that contains the worker quality scores (float).
161
162
        Returns:
163
            The worker worker agreement score for the given worker.
164
        """
165
166 1
        wwa_numerator = 0.0
167 1
        wwa_denominator = 0.0
168
169 1
        worker_vector = work_unit_ann_dict[worker_id]
170 1
        unit_ids = list(work_unit_ann_dict[worker_id].keys())
171
172 1
        for unit_id in unit_ids:
173 1
            wv_unit_id = worker_vector[unit_id]
174 1
            unit_work_ann_dict_unit_id = unit_work_ann_dict[unit_id]
175 1
            for other_workid in unit_work_ann_dict_unit_id:
176 1
                if worker_id != other_workid:
177 1
                    numerator = 0.0
178 1
                    denominator_w = 0.0
179 1
                    denominator_ow = 0.0
180
181 1
                    unit_work_ann_dict_uid_oworkid = unit_work_ann_dict_unit_id[other_workid]
182 1
                    for ann in wv_unit_id:
183 1
                        unit_work_ann_dict_uid_oworkid_ann = unit_work_ann_dict_uid_oworkid[ann]
184 1
                        wv_unit_id_ann = wv_unit_id[ann]
185
186 1
                        numerator += aqs[ann] * (wv_unit_id_ann * \
187
                                     unit_work_ann_dict_uid_oworkid_ann)
188
189 1
                        denominator_w += aqs[ann] * (wv_unit_id_ann * wv_unit_id_ann)
190
191 1
                        denominator_ow += aqs[ann] * \
192
                                         (unit_work_ann_dict_uid_oworkid_ann *\
193
                                          unit_work_ann_dict_uid_oworkid_ann)
194
195 1
                    weighted_cosine = numerator / math.sqrt(denominator_w * denominator_ow)
196
                    # pdb.set_trace()
197 1
                    wwa_numerator += weighted_cosine * wqs[other_workid] * uqs[unit_id]
198 1
                    wwa_denominator += wqs[other_workid] * uqs[unit_id]
199 1
        if wwa_denominator < SMALL_NUMBER_CONST:
200 1
            wwa_denominator = SMALL_NUMBER_CONST
201 1
        return wwa_numerator / wwa_denominator
202
203
204
205
    # Unit - Annotation Score (UAS)
206 1
    @staticmethod
207
    def unit_annotation_score(unit_id, annotation, unit_work_annotation_dict, wqs):
208
        """
209
        Computes the unit annotation score.
210
211
        The unit - annotation score (UAS) calculates the likelihood that annotation a
212
        is expressed in unit u. It is the ratio of the number of workers that picked
213
        annotation a over all workers that annotated the unit, weighted by the worker quality.
214
215
        Args:
216
            unit_id: Unit id.
217
            annotation: Annotation.
218
            unit_work_annotation_dict: Dictionary of unit annotation vectors.
219
            wqs: Dict of worker_id (string) that contains the worker quality scores (float).
220
221
        Returns:
222
            The unit annotation score for the given unit and annotation.
223
        """
224
225 1
        uas_numerator = 0.0
226 1
        uas_denominator = 0.0
227
228 1
        worker_ids = unit_work_annotation_dict[unit_id]
229 1
        for worker_id in worker_ids:
230 1
            uas_numerator += worker_ids[worker_id][annotation] * wqs[worker_id]
231 1
            uas_denominator += wqs[worker_id]
232 1
        if uas_denominator < SMALL_NUMBER_CONST:
233 1
            uas_denominator = SMALL_NUMBER_CONST
234 1
        return uas_numerator / uas_denominator
235
236
237
    # Annotation Quality Score (AQS)
238 1
    @staticmethod
239
    def annotation_quality_score(annotations, work_unit_ann_dict, uqs, wqs):
240
        """
241
        Computes the annotation quality score.
242
243
        The annotation quality score AQS calculates the agreement of selecting an annotation a,
244
        over all the units it appears in. Therefore, it is only applicable to closed tasks, where
245
        the same annotation set is used for all units. It is based on the probability that if a
246
        worker j annotates annotation a in a unit, worker i will also annotate it.
247
248
        The annotation quality score is the weighted average of these probabilities for all possible
249
        pairs of workers. Through the weighted average, units and workers with lower quality will
250
        have less of an impact on the final score of the annotation.
251
252
        Args:
253
            annotations: Possible annotations.
254
            work_unit_annotation_dict: Dictionary of worker annotation vectors on annotated units.
255
            uqs: Dict unit_id that contains the unit quality scores (float).
256
            wqs: Dict of worker_id (string) that contains the worker quality scores (float).
257
258
        Returns:
259
            The worker worker agreement score for the given worker.
260
        """
261
262 1
        aqs_numerator = dict()
263 1
        aqs_denominator = dict()
264
265 1
        for ann in annotations:
266 1
            aqs_numerator[ann] = 0.0
267 1
            aqs_denominator[ann] = 0.0
268
269 1
        for worker_i, work_unit_ann_dict_worker_i in work_unit_ann_dict.items():
270
            #work_unit_ann_dict_worker_i = work_unit_ann_dict[worker_i]
271 1
            work_unit_ann_dict_i_keys = list(work_unit_ann_dict_worker_i.keys())
272 1
            for worker_j, work_unit_ann_dict_worker_j in work_unit_ann_dict.items():
273
                #work_unit_ann_dict_worker_j = work_unit_ann_dict[worker_j]
274 1
                work_unit_ann_dict_j_keys = list(work_unit_ann_dict_worker_j.keys())
275
276 1
                length_keys = len(np.intersect1d(np.array(work_unit_ann_dict_i_keys), \
277
                                                 np.array(work_unit_ann_dict_j_keys)))
278
279 1
                if worker_i != worker_j and length_keys > 0:
280 1
                    for ann in annotations:
281 1
                        numerator = 0.0
282 1
                        denominator = 0.0
283
284 1
                        for unit_id, work_unit_ann_dict_worker_i_unit in work_unit_ann_dict_worker_i.items():
285 1
                            if unit_id in work_unit_ann_dict_worker_j:
286 1
                                work_unit_ann_dict_worker_j_unit = work_unit_ann_dict_worker_j[unit_id]
287
288 1
                                work_unit_ann_dict_worker_j_unit_ann = work_unit_ann_dict_worker_j_unit[ann]
289
290 1
                                def compute_numerator_aqs(unit_id_ann_value, worker_i_ann_value, \
291
                                                          worker_j_ann_value):
292
                                    """ compute numerator """
293 1
                                    numerator = unit_id_ann_value * worker_i_ann_value * \
294
                                                worker_j_ann_value
295 1
                                    return numerator
296
297 1
                                def compute_denominator_aqs(unit_id_ann_value, worker_j_ann_value):
298
                                    """ compute denominator """
299 1
                                    denominator = unit_id_ann_value * worker_j_ann_value
300 1
                                    return denominator
301
302 1
                                numerator += compute_numerator_aqs(uqs[unit_id], \
303
                                                    work_unit_ann_dict_worker_i_unit[ann], \
304
                                                    work_unit_ann_dict_worker_j_unit_ann)
305 1
                                denominator += compute_denominator_aqs(uqs[unit_id], \
306
                                                        work_unit_ann_dict_worker_j_unit_ann)
307
308 1
                        if denominator > 0:
309 1
                            aqs_numerator[ann] += wqs[worker_i] * wqs[worker_j] * \
310
                                                        numerator / denominator
311 1
                            aqs_denominator[ann] += wqs[worker_i] * wqs[worker_j]
312
313 1
        def aqs_dict(annotations, aqs_numerator, aqs_denominator):
314
            """ create the dictionary of aqs values """
315 1
            aqs = dict()
316 1
            for ann in annotations:
317 1
                if aqs_denominator[ann] > SMALL_NUMBER_CONST:
318 1
                    aqs[ann] = aqs_numerator[ann] / aqs_denominator[ann]
319
320
                    # prevent division by zero by storing very small value instead
321 1
                    if aqs[ann] < SMALL_NUMBER_CONST:
322 1
                        aqs[ann] = SMALL_NUMBER_CONST
323
                else:
324 1
                    aqs[ann] = SMALL_NUMBER_CONST
325 1
            return aqs
326
327 1
        return aqs_dict(annotations, aqs_numerator, aqs_denominator)
328
329
330 1
    @staticmethod
331 1
    def run(results, config, max_delta=0.001):
332
        '''
333
        iteratively run the CrowdTruth metrics
334
        '''
335
336 1
        judgments = results['judgments'].copy()
337 1
        units = results['units'].copy()
338
339
        # unit_work_ann_dict, work_unit_ann_dict, unit_ann_dict
340
        # to be done: change to use all vectors in one unit
341 1
        col = list(config.output.values())[0]
342 1
        unit_ann_dict = dict(units.copy()[col])
343
344 1
        def expanded_vector(worker, unit):
345
            '''
346
            expand the vector of a worker on a given unit
347
            '''
348 1
            vector = Counter()
349 1
            for ann in unit:
350 1
                if ann in worker:
351 1
                    vector[ann] = worker[ann]
352
                else:
353 1
                    vector[ann] = 0
354 1
            return vector
355
356
        # fill judgment vectors with unit keys
357 1
        for index, row in judgments.iterrows():
358 1
            judgments.at[index, col] = expanded_vector(row[col], units.at[row['unit'], col])
359
360 1
        unit_work_ann_dict = judgments[['unit', 'worker', col]].copy().groupby('unit')
361 1
        unit_work_ann_dict = {name : group.set_index('worker')[col].to_dict() \
362
                                for name, group in unit_work_ann_dict}
363
364 1
        work_unit_ann_dict = judgments[['worker', 'unit', col]].copy().groupby('worker')
365 1
        work_unit_ann_dict = {name : group.set_index('unit')[col].to_dict() \
366
                                for name, group in work_unit_ann_dict}
367
368
        #initialize data structures
369 1
        uqs_list = list()
370 1
        wqs_list = list()
371 1
        wwa_list = list()
372 1
        wsa_list = list()
373 1
        aqs_list = list()
374
375 1
        uqs = dict((unit_id, 1.0) for unit_id in unit_work_ann_dict)
376 1
        wqs = dict((worker_id, 1.0) for worker_id in work_unit_ann_dict)
377 1
        wwa = dict((worker_id, 1.0) for worker_id in work_unit_ann_dict)
378 1
        wsa = dict((worker_id, 1.0) for worker_id in work_unit_ann_dict)
379
380 1
        uqs_list.append(uqs.copy())
381 1
        wqs_list.append(wqs.copy())
382 1
        wwa_list.append(wwa.copy())
383 1
        wsa_list.append(wsa.copy())
384
385 1
        def init_aqs(config, unit_ann_dict):
386
            """ initialize aqs depending on whether or not it is an open ended task """
387 1
            aqs = dict()
388 1
            if not config.open_ended_task:
389 1
                aqs_keys = list(unit_ann_dict[list(unit_ann_dict.keys())[0]].keys())
390 1
                for ann in aqs_keys:
391 1
                    aqs[ann] = 1.0
392
            else:
393 1
                for unit_id in unit_ann_dict:
394 1
                    for ann in unit_ann_dict[unit_id]:
395 1
                        aqs[ann] = 1.0
396 1
            return aqs
397
398 1
        aqs = init_aqs(config, unit_ann_dict)
399 1
        aqs_list.append(aqs.copy())
400
401 1
        uqs_len = len(list(uqs.keys())) * 1.0
402 1
        wqs_len = len(list(wqs.keys())) * 1.0
403 1
        aqs_len = len(list(aqs.keys())) * 1.0
404
405
        # compute metrics until stable values
406 1
        iterations = 0
407 1
        while max_delta >= 0.001:
408 1
            uqs_new = dict()
409 1
            wqs_new = dict()
410 1
            wwa_new = dict()
411 1
            wsa_new = dict()
412
413 1
            avg_uqs_delta = 0.0
414 1
            avg_wqs_delta = 0.0
415 1
            avg_aqs_delta = 0.0
416 1
            max_delta = 0.0
417
418
            # pdb.set_trace()
419
420 1
            def compute_wqs(wwa_new, wsa_new, wqs_new, work_unit_ann_dict, unit_ann_dict, \
421
                            unit_work_ann_dict, wqs_list, uqs_list, aqs_list, wqs_len, \
422
                            max_delta, avg_wqs_delta):
423
                """ compute worker quality score (WQS) """
424 1
                for worker_id, _ in work_unit_ann_dict.items():
425 1
                    wwa_new[worker_id] = Metrics.worker_worker_agreement( \
426
                             worker_id, work_unit_ann_dict, \
427
                             unit_work_ann_dict, \
428
                             wqs_list[len(wqs_list) - 1], \
429
                             uqs_list[len(uqs_list) - 1], \
430
                             aqs_list[len(aqs_list) - 1])
431 1
                    wsa_new[worker_id] = Metrics.worker_unit_agreement( \
432
                             worker_id, \
433
                             unit_ann_dict, \
434
                             work_unit_ann_dict, \
435
                             uqs_list[len(uqs_list) - 1], \
436
                             aqs_list[len(aqs_list) - 1], \
437
                             wqs_list[len(aqs_list) - 1][worker_id])
438 1
                    wqs_new[worker_id] = wwa_new[worker_id] * wsa_new[worker_id]
439 1
                    max_delta = max(max_delta, \
440
                                abs(wqs_new[worker_id] - wqs_list[len(wqs_list) - 1][worker_id]))
441 1
                    avg_wqs_delta += abs(wqs_new[worker_id] - \
442
                                         wqs_list[len(wqs_list) - 1][worker_id])
443 1
                avg_wqs_delta /= wqs_len
444
445 1
                return wwa_new, wsa_new, wqs_new, max_delta, avg_wqs_delta
446
447 1
            def reconstruct_unit_ann_dict(unit_ann_dict, work_unit_ann_dict, wqs_new):
448
                """ reconstruct unit_ann_dict with worker scores """
449 1
                new_unit_ann_dict = dict()
450 1
                for unit_id, ann_dict in unit_ann_dict.items():
451 1
                    new_unit_ann_dict[unit_id] = dict()
452 1
                    for ann, _ in ann_dict.items():
453 1
                        new_unit_ann_dict[unit_id][ann] = 0.0
454 1
                for work_id, srd in work_unit_ann_dict.items():
455 1
                    wqs_work_id = wqs_new[work_id]
456 1
                    for unit_id, ann_dict in srd.items():
457 1
                        for ann, score in ann_dict.items():
458 1
                            new_unit_ann_dict[unit_id][ann] += score * wqs_work_id
459
460 1
                return new_unit_ann_dict
461
462 1
            def compute_aqs(aqs, work_unit_ann_dict, uqs_list, wqs_list, aqs_list, aqs_len, max_delta, avg_aqs_delta):
463
                """ compute annotation quality score (aqs) """
464 1
                aqs_new = Metrics.annotation_quality_score(list(aqs.keys()), work_unit_ann_dict, \
465
                                                        uqs_list[len(uqs_list) - 1], \
466
                                                        wqs_list[len(wqs_list) - 1])
467 1
                for ann, _ in aqs_new.items():
468 1
                    max_delta = max(max_delta, abs(aqs_new[ann] - aqs_list[len(aqs_list) - 1][ann]))
469 1
                    avg_aqs_delta += abs(aqs_new[ann] - aqs_list[len(aqs_list) - 1][ann])
470 1
                avg_aqs_delta /= aqs_len
471 1
                return aqs_new, max_delta, avg_aqs_delta
472
473 1
            def compute_uqs(uqs_new, unit_work_ann_dict, wqs_list, aqs_list, uqs_list, uqs_len, max_delta, avg_uqs_delta):
474
                """ compute unit quality score (uqs) """
475 1
                for unit_id, _ in unit_work_ann_dict.items():
476 1
                    uqs_new[unit_id] = Metrics.unit_quality_score(unit_id, unit_work_ann_dict, \
477
                                                                      wqs_list[len(wqs_list) - 1], \
478
                                                                      aqs_list[len(aqs_list) - 1])
479 1
                    max_delta = max(max_delta, \
480
                                abs(uqs_new[unit_id] - uqs_list[len(uqs_list) - 1][unit_id]))
481 1
                    avg_uqs_delta += abs(uqs_new[unit_id] - uqs_list[len(uqs_list) - 1][unit_id])
482 1
                avg_uqs_delta /= uqs_len
483 1
                return uqs_new, max_delta, avg_uqs_delta
484
485 1
            if not config.open_ended_task:
486
                # compute annotation quality score (aqs)
487 1
                aqs_new, max_delta, avg_aqs_delta = compute_aqs(aqs, work_unit_ann_dict, \
488
                                uqs_list, wqs_list, aqs_list, aqs_len, max_delta, avg_aqs_delta)
489
490
            # compute unit quality score (uqs)
491 1
            uqs_new, max_delta, avg_uqs_delta = compute_uqs(uqs_new, unit_work_ann_dict, \
492
                                    wqs_list, aqs_list, uqs_list, uqs_len, max_delta, avg_uqs_delta)
493
494
            # compute worker quality score (WQS)
495 1
            wwa_new, wsa_new, wqs_new, max_delta, avg_wqs_delta = compute_wqs(\
496
                        wwa_new, wsa_new, wqs_new, \
497
                        work_unit_ann_dict, unit_ann_dict, unit_work_ann_dict, wqs_list, \
498
                        uqs_list, aqs_list, wqs_len, max_delta, avg_wqs_delta)
499
500
            # save results for current iteration
501 1
            uqs_list.append(uqs_new.copy())
502 1
            wqs_list.append(wqs_new.copy())
503 1
            wwa_list.append(wwa_new.copy())
504 1
            wsa_list.append(wsa_new.copy())
505 1
            if not config.open_ended_task:
506 1
                aqs_list.append(aqs_new.copy())
0 ignored issues
show
introduced by
The variable aqs_new does not seem to be defined for all execution paths.
Loading history...
507 1
            iterations += 1
508
509 1
            unit_ann_dict = reconstruct_unit_ann_dict(unit_ann_dict, work_unit_ann_dict, wqs_new)
510
511 1
            logging.info(str(iterations) + " iterations; max d= " + str(max_delta) + \
512
                        " ; wqs d= " + str(avg_wqs_delta) + "; uqs d= " + str(avg_uqs_delta) + \
513
                        "; aqs d= " + str(avg_aqs_delta))
514
515 1
        def save_unit_ann_score(unit_ann_dict, unit_work_ann_dict, iteration_value):
516
            """ save the unit annotation score for print """
517 1
            srs = Counter()
518 1
            for unit_id in unit_ann_dict:
519 1
                srs[unit_id] = Counter()
520 1
                for ann in unit_ann_dict[unit_id]:
521 1
                    srs[unit_id][ann] = Metrics.unit_annotation_score(unit_id, \
522
                                                ann, unit_work_ann_dict, \
523
                                                iteration_value)
524 1
            return srs
525
526 1
        srs = save_unit_ann_score(unit_ann_dict, unit_work_ann_dict, wqs_list[len(wqs_list) - 1])
527 1
        srs_initial = save_unit_ann_score(unit_ann_dict, unit_work_ann_dict, wqs_list[0])
528
529 1
        results['units']['uqs'] = pd.Series(uqs_list[-1])
530 1
        results['units']['unit_annotation_score'] = pd.Series(srs)
531 1
        results['workers']['wqs'] = pd.Series(wqs_list[-1])
532 1
        results['workers']['wwa'] = pd.Series(wwa_list[-1])
533 1
        results['workers']['wsa'] = pd.Series(wsa_list[-1])
534 1
        if not config.open_ended_task:
535 1
            results['annotations']['aqs'] = pd.Series(aqs_list[-1])
536
537 1
        results['units']['uqs_initial'] = pd.Series(uqs_list[1])
538 1
        results['units']['unit_annotation_score_initial'] = pd.Series(srs_initial)
539 1
        results['workers']['wqs_initial'] = pd.Series(wqs_list[1])
540 1
        results['workers']['wwa_initial'] = pd.Series(wwa_list[1])
541 1
        results['workers']['wsa_initial'] = pd.Series(wsa_list[1])
542 1
        if not config.open_ended_task:
543 1
            results['annotations']['aqs_initial'] = pd.Series(aqs_list[1])
544
        return results
545