GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

test_metrics   A
last analyzed

Complexity

Total Complexity 36

Size/Duplication

Total Lines 223
Duplicated Lines 30.49 %

Importance

Changes 0
Metric Value
wmc 36
eloc 182
dl 68
loc 223
rs 9.52
c 0
b 0
f 0

9 Methods

Rating   Name   Duplication   Size   Complexity  
B TestAgreementClosed.test_incremental_worker_agreement() 0 63 5
B TestTutorial.test_metrics_correct_interval() 20 20 6
A TestConfigOpen.processJudgments() 0 2 1
A TutorialConfig.processJudgments() 0 8 5
B TestCustomizedTutorial.test_metrics_correct_interval() 20 20 6
A TestAgreementClosed.test_outlier_worker() 0 23 4
A TestAgreementClosed.test_all_workers_disagree() 14 14 4
A TestAgreementClosed.test_all_workers_agree() 12 12 4
A TestConfigClosed.processJudgments() 0 2 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
""" Unit testing module for metrics functions """
2
3
import unittest
4
import string
5
6
import crowdtruth
7
from crowdtruth.configuration import DefaultConfig
8
9
TEST_FILE_PREF = "test/test_data/metrics/"
10
11
class TestConfigOpen(DefaultConfig):
12
    inputColumns = ["in_col"]
13
    outputColumns = ["out_col"]
14
    open_ended_task = True
15
    annotation_vector = list(string.ascii_uppercase)
16
    def processJudgments(self, judgments):
17
        return judgments
18
19
class TestConfigClosed(DefaultConfig):
20
    inputColumns = ["in_col"]
21
    outputColumns = ["out_col"]
22
    open_ended_task = False
23
    annotation_separator = " "
24
    annotation_vector = list(string.ascii_uppercase)
25
    def processJudgments(self, judgments):
26
        return judgments
27
28
class TutorialConfig(DefaultConfig):
29
    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
30
    outputColumns = ["relations"]
31
32
    # processing of a closed task
33
    open_ended_task = False
34
    annotation_separator = " "
35
    annotation_vector = [
36
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
37
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
38
        "other", "none"]
39
40
    def processJudgments(self, judgments):
41
        # any pre-processing of the input data goes here
42
        for col in self.outputColumns:
43
            # remove square brackets from annotations
44
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
45
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
46
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
47
        return judgments
48
49
class TutorialCustomizedConfig(TutorialConfig):
50
    customPlatformColumns = ["_id", "unit_id", "_worker_id", "_started_at", "_created_at"]
51
52
# test_conf_const = TutorialConfig()
53
# test_config = test_conf_const.__class__
54
# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
55
# results = crowdtruth.run(data, config)
56
57
class TestAgreementClosed(unittest.TestCase):
58
    test_conf_const = TestConfigClosed()
59
60 View Code Duplication
    def test_all_workers_agree(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
61
        for w in range(2,11):
62
            test_config = self.test_conf_const.__class__
63
            data, config = crowdtruth.load(
64
                file=TEST_FILE_PREF + str(w) + "work_agr.csv",
65
                config=test_config())
66
            results = crowdtruth.run(data, config)
67
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
68
            for wid in range(w):
69
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
70
            if not config.open_ended_task:
71
                self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
72
73 View Code Duplication
    def test_all_workers_disagree(self):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
74
        for w in range(2, 11):
75
            test_config = self.test_conf_const.__class__
76
            data, config = crowdtruth.load(
77
                file=TEST_FILE_PREF + str(w) + "work_disagr.csv",
78
                config=test_config())
79
            results = crowdtruth.run(data, config)
80
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
81
            for wid in range(w):
82
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
83
                if not config.open_ended_task:
84
                    self.assertAlmostEqual(
85
                        results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
86
                        0.0)
87
88
    def test_outlier_worker(self):
89
        for w in range(3, 11):
90
            test_config = self.test_conf_const.__class__
91
            data, config = crowdtruth.load(
92
                file=TEST_FILE_PREF + str(w) + "work_outlier.csv",
93
                config=test_config())
94
            results = crowdtruth.run(data, config)
95
            self.assertAlmostEqual(
96
                results["workers"]["wqs"].at["W1"],
97
                0.0)
98
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
99
            for x in range(1, w):
100
                self.assertAlmostEqual(
101
                    results["workers"]["wqs"].at["W" + str(x + 1)],
102
                    1.0)
103
104
            if not config.open_ended_task:
105
                self.assertAlmostEqual(
106
                    results["annotations"]["aqs"].at["A"],
107
                    0.0)
108
                self.assertAlmostEqual(
109
                    results["annotations"]["aqs"].at["B"],
110
                    1.0)
111
112
113
    def test_incremental_worker_agreement(self):
114
        for w in range(4, 11):
115
            test_config = self.test_conf_const.__class__
116
            data, config = crowdtruth.load(
117
                file=TEST_FILE_PREF + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
118
                config=test_config())
119
            results = crowdtruth.run(data, config)
120
121
            # print str(config.open_ended_task)
122
123
            # check that workers that agree on the same unit have the same quality score
124
            for x in range(2, w):
125
                if x != (w - 1):
126
                    self.assertAlmostEqual(
127
                        results["workers"]["wqs"].at["W1"],
128
                        results["workers"]["wqs"].at["W" + str(x)],)
129
                self.assertAlmostEqual(
130
                    results["workers"]["wqs"].at["W" + str(w)],
131
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])
132
133
            # workers that agree have a greater WQS than the worker that disagrees
134
            self.assertGreater(
135
                results["workers"]["wqs"].at["W1"],
136
                results["workers"]["wqs"].at["W" + str(w - 1)])
137
            self.assertGreater(
138
                results["workers"]["wqs"].at["W" + str(w)],
139
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])
140
141
            # the more workers agree on a unit, the higher the worker quality score
142
            self.assertGreater(
143
                results["workers"]["wqs"].at["W" + str(w)],
144
                results["workers"]["wqs"].at["W1"])
145
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
146
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])
147
148
            # the more workers agree on a unit, the higher the unit quality score
149
            self.assertLess(
150
                results["units"]["uqs"].at[1],
151
                results["units"]["uqs"].at[2])
152
            self.assertLess(
153
                results["units"]["uqs"].at[1],
154
                results["units"]["uqs"].at[3])
155
            self.assertLess(
156
                results["units"]["uqs"].at[2],
157
                results["units"]["uqs"].at[3])
158
159
            # the more workers agree on an annotation, the higher the unit quality score
160
            if not config.open_ended_task:
161
                self.assertLess(
162
                    results["annotations"]["aqs"].at["A"],
163
                    results["annotations"]["aqs"].at["C"])
164
                self.assertLess(
165
                    results["annotations"]["aqs"].at["B"],
166
                    results["annotations"]["aqs"].at["A"])
167
                self.assertLess(
168
                    results["annotations"]["aqs"].at["D"],
169
                    results["annotations"]["aqs"].at["C"])
170
                self.assertLess(
171
                    results["annotations"]["aqs"].at["A"],
172
                    results["annotations"]["aqs"].at["E"])
173
                self.assertLess(
174
                    results["annotations"]["aqs"].at["C"],
175
                    results["annotations"]["aqs"].at["E"])
176
177
class TestAgreementOpen(TestAgreementClosed):
178
    test_conf_const = TestConfigOpen()
179
180 View Code Duplication
class TestTutorial(unittest.TestCase):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
181
    def test_metrics_correct_interval(self):
182
        test_conf_const = TutorialConfig()
183
        test_config = test_conf_const.__class__
184
        data, config = crowdtruth.load(
185
            file="tutorial/relex_example.csv",
186
            config=test_config())
187
        results = crowdtruth.run(data, config)
188
        for _, val_arr in results["units"]["unit_annotation_score"].items():
189
            for _, val in val_arr.items():
190
                self.assertGreaterEqual(val, 0.0)
191
                self.assertLessEqual(val, 1.0)
192
        for _, val in results["units"]["uqs"].items():
193
            self.assertGreaterEqual(val, 0.0)
194
            self.assertLessEqual(val, 1.0)
195
        for _, val in results["workers"]["wqs"].items():
196
            self.assertGreaterEqual(val, 0.0)
197
            self.assertLessEqual(val, 1.0)
198
        for _, val in results["annotations"]["aqs"].items():
199
            self.assertGreaterEqual(val, 0.0)
200
            self.assertLessEqual(val, 1.0)
201
202 View Code Duplication
class TestCustomizedTutorial(unittest.TestCase):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
203
    def test_metrics_correct_interval(self):
204
        test_conf_const = TutorialCustomizedConfig()
205
        test_config = test_conf_const.__class__
206
        data, config = crowdtruth.load(
207
            file="tutorial/relex_example_custom.csv",
208
            config=test_config())
209
        results = crowdtruth.run(data, config)
210
        for _, val_arr in results["units"]["unit_annotation_score"].items():
211
            for _, val in val_arr.items():
212
                self.assertGreaterEqual(val, 0.0)
213
                self.assertLessEqual(val, 1.0)
214
        for _, val in results["units"]["uqs"].items():
215
            self.assertGreaterEqual(val, 0.0)
216
            self.assertLessEqual(val, 1.0)
217
        for _, val in results["workers"]["wqs"].items():
218
            self.assertGreaterEqual(val, 0.0)
219
            self.assertLessEqual(val, 1.0)
220
        for _, val in results["annotations"]["aqs"].items():
221
            self.assertGreaterEqual(val, 0.0)
222
            self.assertLessEqual(val, 1.0)
223