GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( aeff40...59002d )
by Anca
17:59
created

test_metrics   A

Complexity

Total Complexity 28

Size/Duplication

Total Lines 199
Duplicated Lines 13.07 %

Importance

Changes 0
Metric Value
wmc 28
eloc 156
dl 26
loc 199
rs 10
c 0
b 0
f 0

8 Methods

Rating   Name   Duplication   Size   Complexity  
A TestConfigOpen.processJudgments() 0 2 1
A TestConfigClosed.processJudgments() 0 2 1
B TestAgreementClosed.test_incremental_worker_agreement() 0 63 5
A TestTutorial.test_metrics_correct_interval() 0 20 4
A TutorialConfig.processJudgments() 0 8 5
A TestAgreementClosed.test_outlier_worker() 0 23 4
A TestAgreementClosed.test_all_workers_disagree() 14 14 4
A TestAgreementClosed.test_all_workers_agree() 12 12 4

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
2
import unittest
3
import string
4
# import logging
5
6
import crowdtruth
7
from crowdtruth.configuration import DefaultConfig
8
9
class TestConfigOpen(DefaultConfig):
10
    inputColumns = ["in_col"]
11
    outputColumns = ["out_col"]
12
    open_ended_task = True
13
    annotation_vector = list(string.ascii_uppercase)
14
    def processJudgments(self, judgments):
15
        return judgments
16
17
class TestConfigClosed(DefaultConfig):
18
    inputColumns = ["in_col"]
19
    outputColumns = ["out_col"]
20
    open_ended_task = False
21
    annotation_separator = " "
22
    annotation_vector = list(string.ascii_uppercase)
23
    def processJudgments(self, judgments):
24
        return judgments
25
26
class TutorialConfig(DefaultConfig):
27
    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
28
    outputColumns = ["relations"]
29
30
    # processing of a closed task
31
    open_ended_task = False
32
    annotation_separator = " "
33
    annotation_vector = [
34
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
35
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
36
        "other", "none"]
37
38
    def processJudgments(self, judgments):
39
        # any pre-processing of the input data goes here
40
        for col in self.outputColumns:
41
            # remove square brackets from annotations
42
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
43
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
44
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
45
        return judgments
46
47
# test_conf_const = TutorialConfig()
48
# test_config = test_conf_const.__class__
49
# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
50
# results = crowdtruth.run(data, config)
51
52
class TestAgreementClosed(unittest.TestCase):
53
    test_conf_const = TestConfigClosed()
54
55 View Code Duplication
    def test_all_workers_agree(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
56
        for w in range(2,11):
57
            test_config = self.test_conf_const.__class__
58
            data, config = crowdtruth.load(
59
                file="test/" + str(w) + "work_agr.csv",
60
                config=test_config())
61
            results = crowdtruth.run(data, config)
62
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
63
            for wid in range(w):
64
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
65
            if not config.open_ended_task:
66
                self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
67
68 View Code Duplication
    def test_all_workers_disagree(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
69
        for w in range(2, 11):
70
            test_config = self.test_conf_const.__class__
71
            data, config = crowdtruth.load(
72
                file="test/" + str(w) + "work_disagr.csv",
73
                config=test_config())
74
            results = crowdtruth.run(data, config)
75
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
76
            for wid in range(w):
77
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
78
                if not config.open_ended_task:
79
                    self.assertAlmostEqual(
80
                        results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
81
                        0.0)
82
83
    def test_outlier_worker(self):
84
        for w in range(3, 11):
85
            test_config = self.test_conf_const.__class__
86
            data, config = crowdtruth.load(
87
                file="test/" + str(w) + "work_outlier.csv",
88
                config=test_config())
89
            results = crowdtruth.run(data, config)
90
            self.assertAlmostEqual(
91
                results["workers"]["wqs"].at["W1"],
92
                0.0)
93
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
94
            for x in range(1, w):
95
                self.assertAlmostEqual(
96
                    results["workers"]["wqs"].at["W" + str(x + 1)],
97
                    1.0)
98
99
            if not config.open_ended_task:
100
                self.assertAlmostEqual(
101
                    results["annotations"]["aqs"].at["A"],
102
                    0.0)
103
                self.assertAlmostEqual(
104
                    results["annotations"]["aqs"].at["B"],
105
                    1.0)
106
107
    
108
    def test_incremental_worker_agreement(self):
109
        for w in range(4, 11):
110
            test_config = self.test_conf_const.__class__
111
            data, config = crowdtruth.load(
112
                file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
113
                config=test_config())
114
            results = crowdtruth.run(data, config)
115
116
            # print str(config.open_ended_task)
117
118
            # check that workers that agree on the same unit have the same quality score
119
            for x in range(2, w):
120
                if x != (w - 1):
121
                    self.assertAlmostEqual(
122
                        results["workers"]["wqs"].at["W1"],
123
                        results["workers"]["wqs"].at["W" + str(x)],)
124
                self.assertAlmostEqual(
125
                    results["workers"]["wqs"].at["W" + str(w)],
126
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])
127
128
            # workers that agree have a greater WQS than the worker that disagrees
129
            self.assertGreater(
130
                results["workers"]["wqs"].at["W1"],
131
                results["workers"]["wqs"].at["W" + str(w - 1)])
132
            self.assertGreater(
133
                results["workers"]["wqs"].at["W" + str(w)],
134
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])
135
136
            # the more workers agree on a unit, the higher the worker quality score
137
            self.assertGreater(
138
                results["workers"]["wqs"].at["W" + str(w)],
139
                results["workers"]["wqs"].at["W1"])
140
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
141
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])
142
143
            # the more workers agree on a unit, the higher the unit quality score
144
            self.assertLess(
145
                results["units"]["uqs"].at[1],
146
                results["units"]["uqs"].at[2])
147
            self.assertLess(
148
                results["units"]["uqs"].at[1],
149
                results["units"]["uqs"].at[3])
150
            self.assertLess(
151
                results["units"]["uqs"].at[2],
152
                results["units"]["uqs"].at[3])
153
154
            # the more workers agree on an annotation, the higher the unit quality score
155
            if not config.open_ended_task:
156
                self.assertLess(
157
                    results["annotations"]["aqs"].at["A"],
158
                    results["annotations"]["aqs"].at["C"])
159
                self.assertLess(
160
                    results["annotations"]["aqs"].at["B"],
161
                    results["annotations"]["aqs"].at["A"])
162
                self.assertLess(
163
                    results["annotations"]["aqs"].at["D"],
164
                    results["annotations"]["aqs"].at["C"])
165
                self.assertLess(
166
                    results["annotations"]["aqs"].at["A"],
167
                    results["annotations"]["aqs"].at["E"])
168
                self.assertLess(
169
                    results["annotations"]["aqs"].at["C"],
170
                    results["annotations"]["aqs"].at["E"])
171
172
class TestAgreementOpen(TestAgreementClosed):
173
    test_conf_const = TestConfigOpen()
174
175
class TestTutorial(unittest.TestCase):
176
    def test_metrics_correct_interval(self):
177
        test_conf_const = TutorialConfig()
178
        test_config = test_conf_const.__class__
179
        data, config = crowdtruth.load(
180
            file="tutorial/relex_example.csv",
181
            config=test_config())
182
        results = crowdtruth.run(data, config)
183
        # for _, val_arr in results["units"]["unit_annotation_score"].items():
184
        #     for _, val in val_arr.items():
185
        #         self.assertGreaterEqual(val, 0.0)
186
        #         self.assertLessEqual(val, 1.0)
187
        for _, val in results["units"]["uqs"].items():
188
            self.assertGreaterEqual(val, 0.0)
189
            self.assertLessEqual(val, 1.0)
190
        for _, val in results["workers"]["wqs"].items():
191
            self.assertGreaterEqual(val, 0.0)
192
            self.assertLessEqual(val, 1.0)
193
        for _, val in results["annotations"]["aqs"].items():
194
            self.assertGreaterEqual(val, 0.0)
195
            self.assertLessEqual(val, 1.0)
196
197
if __name__ == '__main__':
198
    unittest.main()
199