test_metrics.TutorialCustomizedConfig.processJudgments() - Code Metrics - Inspection of "reformat test configs" - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( fe7765...77d3da )

by Anca

created 2018-08-18 13:11 UTC

A

↳ Parent: test_metrics.TestAgreementClosed.test_all_workers_agree()

Complexity

Conditions

Size

Total Lines	8
Code Lines	6

Duplication

Lines	8
Ratio	100 %

Importance

Changes

Metric	Value
eloc	6
dl	8
loc	8
rs	9.3333
c	0
b	0
f	0
cc	5
nop	2


import unittest
import string
# import logging

import crowdtruth
from crowdtruth.configuration import DefaultConfig

class TestConfigOpen(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = True
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TestConfigClosed(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TutorialConfig(DefaultConfig):
    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
    outputColumns = ["relations"]

    # processing of a closed task
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = [
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
        "other", "none"]

    def processJudgments(self, judgments):
        # any pre-processing of the input data goes here
        for col in self.outputColumns:
            # remove square brackets from annotations
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

class TutorialCustomizedConfig(TutorialConfig):
    customPlatformColumns = ["_id", "unit_id", "_worker_id", "_started_at", "_created_at"]

# test_conf_const = TutorialConfig()
# test_config = test_conf_const.__class__
# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
# results = crowdtruth.run(data, config)

class TestAgreementClosed(unittest.TestCase):
    test_conf_const = TestConfigClosed()

    def test_all_workers_agree(self):

        for w in range(2,11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
            if not config.open_ended_task:
                self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)

    def test_all_workers_disagree(self):

        for w in range(2, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_disagr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
                if not config.open_ended_task:
                    self.assertAlmostEqual(
                        results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
                        0.0)

    def test_outlier_worker(self):
        for w in range(3, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_outlier.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(
                results["workers"]["wqs"].at["W1"],
                0.0)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for x in range(1, w):
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(x + 1)],
                    1.0)

            if not config.open_ended_task:
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["A"],
                    0.0)
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["B"],
                    1.0)


    def test_incremental_worker_agreement(self):
        for w in range(4, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)

            # print str(config.open_ended_task)

            # check that workers that agree on the same unit have the same quality score
            for x in range(2, w):
                if x != (w - 1):
                    self.assertAlmostEqual(
                        results["workers"]["wqs"].at["W1"],
                        results["workers"]["wqs"].at["W" + str(x)],)
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(w)],
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])

            # workers that agree have a greater WQS than the worker that disagrees
            self.assertGreater(
                results["workers"]["wqs"].at["W1"],
                results["workers"]["wqs"].at["W" + str(w - 1)])
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])

            # the more workers agree on a unit, the higher the worker quality score
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W1"])
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])

            # the more workers agree on a unit, the higher the unit quality score
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[2])
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[3])
            self.assertLess(
                results["units"]["uqs"].at[2],
                results["units"]["uqs"].at[3])

            # the more workers agree on an annotation, the higher the unit quality score
            if not config.open_ended_task:
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["B"],
                    results["annotations"]["aqs"].at["A"])
                self.assertLess(
                    results["annotations"]["aqs"].at["D"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["E"])
                self.assertLess(
                    results["annotations"]["aqs"].at["C"],
                    results["annotations"]["aqs"].at["E"])

class TestAgreementOpen(TestAgreementClosed):
    test_conf_const = TestConfigOpen()

class TestTutorial(unittest.TestCase):

    def test_metrics_correct_interval(self):
        test_conf_const = TutorialConfig()
        test_config = test_conf_const.__class__
        data, config = crowdtruth.load(
            file="tutorial/relex_example.csv",
            config=test_config())
        results = crowdtruth.run(data, config)
        for _, val_arr in results["units"]["unit_annotation_score"].items():
            for _, val in val_arr.items():
                self.assertGreaterEqual(val, 0.0)
                self.assertLessEqual(val, 1.0)
        for _, val in results["units"]["uqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["workers"]["wqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["annotations"]["aqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)

class TestCustomizedTutorial(unittest.TestCase):

    def test_metrics_correct_interval(self):
        test_conf_const = TutorialCustomizedConfig()
        test_config = test_conf_const.__class__
        data, config = crowdtruth.load(
            file="tutorial/relex_example_custom.csv",
            config=test_config())
        results = crowdtruth.run(data, config)
        for _, val_arr in results["units"]["unit_annotation_score"].items():
            for _, val in val_arr.items():
                self.assertGreaterEqual(val, 0.0)
                self.assertLessEqual(val, 1.0)
        for _, val in results["units"]["uqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["workers"]["wqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["annotations"]["aqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)

if __name__ == '__main__':
    unittest.main()


1
2		import unittest
3		import string
4		# import logging
5
6		import crowdtruth
7		from crowdtruth.configuration import DefaultConfig
8
9		class TestConfigOpen(DefaultConfig):
10		inputColumns = ["in_col"]
11		outputColumns = ["out_col"]
12		open_ended_task = True
13		annotation_vector = list(string.ascii_uppercase)
14		def processJudgments(self, judgments):
15		return judgments
16
17		class TestConfigClosed(DefaultConfig):
18		inputColumns = ["in_col"]
19		outputColumns = ["out_col"]
20		open_ended_task = False
21		annotation_separator = " "
22		annotation_vector = list(string.ascii_uppercase)
23		def processJudgments(self, judgments):
24		return judgments
25
26		class TutorialConfig(DefaultConfig):
27		inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
28		outputColumns = ["relations"]
29
30		# processing of a closed task
31		open_ended_task = False
32		annotation_separator = " "
33		annotation_vector = [
34		"causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
35		"location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
36		"other", "none"]
37
38		def processJudgments(self, judgments):
39		# any pre-processing of the input data goes here
40		for col in self.outputColumns:
41		# remove square brackets from annotations
42		judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
43		judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
44		judgments[col] = judgments[col].apply(lambda x: str(x).lower())
45		return judgments
46
47		class TutorialCustomizedConfig(TutorialConfig):
48		customPlatformColumns = ["_id", "unit_id", "_worker_id", "_started_at", "_created_at"]
49
50		# test_conf_const = TutorialConfig()
51		# test_config = test_conf_const.__class__
52		# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
53		# results = crowdtruth.run(data, config)
54
55		class TestAgreementClosed(unittest.TestCase):
56		test_conf_const = TestConfigClosed()
57
58	View Code Duplication	def test_all_workers_agree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
59		for w in range(2,11):
60		test_config = self.test_conf_const.__class__
61		data, config = crowdtruth.load(
62		file="test/" + str(w) + "work_agr.csv",
63		config=test_config())
64		results = crowdtruth.run(data, config)
65		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
66		for wid in range(w):
67		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
68		if not config.open_ended_task:
69		self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
70
71	View Code Duplication	def test_all_workers_disagree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
72		for w in range(2, 11):
73		test_config = self.test_conf_const.__class__
74		data, config = crowdtruth.load(
75		file="test/" + str(w) + "work_disagr.csv",
76		config=test_config())
77		results = crowdtruth.run(data, config)
78		self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
79		for wid in range(w):
80		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
81		if not config.open_ended_task:
82		self.assertAlmostEqual(
83		results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
84		0.0)
85
86		def test_outlier_worker(self):
87		for w in range(3, 11):
88		test_config = self.test_conf_const.__class__
89		data, config = crowdtruth.load(
90		file="test/" + str(w) + "work_outlier.csv",
91		config=test_config())
92		results = crowdtruth.run(data, config)
93		self.assertAlmostEqual(
94		results["workers"]["wqs"].at["W1"],
95		0.0)
96		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
97		for x in range(1, w):
98		self.assertAlmostEqual(
99		results["workers"]["wqs"].at["W" + str(x + 1)],
100		1.0)
101
102		if not config.open_ended_task:
103		self.assertAlmostEqual(
104		results["annotations"]["aqs"].at["A"],
105		0.0)
106		self.assertAlmostEqual(
107		results["annotations"]["aqs"].at["B"],
108		1.0)
109
110
111		def test_incremental_worker_agreement(self):
112		for w in range(4, 11):
113		test_config = self.test_conf_const.__class__
114		data, config = crowdtruth.load(
115		file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
116		config=test_config())
117		results = crowdtruth.run(data, config)
118
119		# print str(config.open_ended_task)
120
121		# check that workers that agree on the same unit have the same quality score
122		for x in range(2, w):
123		if x != (w - 1):
124		self.assertAlmostEqual(
125		results["workers"]["wqs"].at["W1"],
126		results["workers"]["wqs"].at["W" + str(x)],)
127		self.assertAlmostEqual(
128		results["workers"]["wqs"].at["W" + str(w)],
129		results["workers"]["wqs"].at["W" + str(w + x - 1)])
130
131		# workers that agree have a greater WQS than the worker that disagrees
132		self.assertGreater(
133		results["workers"]["wqs"].at["W1"],
134		results["workers"]["wqs"].at["W" + str(w - 1)])
135		self.assertGreater(
136		results["workers"]["wqs"].at["W" + str(w)],
137		results["workers"]["wqs"].at["W" + str(2 * w - 1)])
138
139		# the more workers agree on a unit, the higher the worker quality score
140		self.assertGreater(
141		results["workers"]["wqs"].at["W" + str(w)],
142		results["workers"]["wqs"].at["W1"])
143		# print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
144		# print "W1: " + str(results["workers"]["wqs"].at["W1"])
145
146		# the more workers agree on a unit, the higher the unit quality score
147		self.assertLess(
148		results["units"]["uqs"].at[1],
149		results["units"]["uqs"].at[2])
150		self.assertLess(
151		results["units"]["uqs"].at[1],
152		results["units"]["uqs"].at[3])
153		self.assertLess(
154		results["units"]["uqs"].at[2],
155		results["units"]["uqs"].at[3])
156
157		# the more workers agree on an annotation, the higher the unit quality score
158		if not config.open_ended_task:
159		self.assertLess(
160		results["annotations"]["aqs"].at["A"],
161		results["annotations"]["aqs"].at["C"])
162		self.assertLess(
163		results["annotations"]["aqs"].at["B"],
164		results["annotations"]["aqs"].at["A"])
165		self.assertLess(
166		results["annotations"]["aqs"].at["D"],
167		results["annotations"]["aqs"].at["C"])
168		self.assertLess(
169		results["annotations"]["aqs"].at["A"],
170		results["annotations"]["aqs"].at["E"])
171		self.assertLess(
172		results["annotations"]["aqs"].at["C"],
173		results["annotations"]["aqs"].at["E"])
174
175		class TestAgreementOpen(TestAgreementClosed):
176		test_conf_const = TestConfigOpen()
177
178	View Code Duplication	class TestTutorial(unittest.TestCase):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
179		def test_metrics_correct_interval(self):
180		test_conf_const = TutorialConfig()
181		test_config = test_conf_const.__class__
182		data, config = crowdtruth.load(
183		file="tutorial/relex_example.csv",
184		config=test_config())
185		results = crowdtruth.run(data, config)
186		for _, val_arr in results["units"]["unit_annotation_score"].items():
187		for _, val in val_arr.items():
188		self.assertGreaterEqual(val, 0.0)
189		self.assertLessEqual(val, 1.0)
190		for _, val in results["units"]["uqs"].items():
191		self.assertGreaterEqual(val, 0.0)
192		self.assertLessEqual(val, 1.0)
193		for _, val in results["workers"]["wqs"].items():
194		self.assertGreaterEqual(val, 0.0)
195		self.assertLessEqual(val, 1.0)
196		for _, val in results["annotations"]["aqs"].items():
197		self.assertGreaterEqual(val, 0.0)
198		self.assertLessEqual(val, 1.0)
199
200	View Code Duplication	class TestCustomizedTutorial(unittest.TestCase):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
201		def test_metrics_correct_interval(self):
202		test_conf_const = TutorialCustomizedConfig()
203		test_config = test_conf_const.__class__
204		data, config = crowdtruth.load(
205		file="tutorial/relex_example_custom.csv",
206		config=test_config())
207		results = crowdtruth.run(data, config)
208		for _, val_arr in results["units"]["unit_annotation_score"].items():
209		for _, val in val_arr.items():
210		self.assertGreaterEqual(val, 0.0)
211		self.assertLessEqual(val, 1.0)
212		for _, val in results["units"]["uqs"].items():
213		self.assertGreaterEqual(val, 0.0)
214		self.assertLessEqual(val, 1.0)
215		for _, val in results["workers"]["wqs"].items():
216		self.assertGreaterEqual(val, 0.0)
217		self.assertLessEqual(val, 1.0)
218		for _, val in results["annotations"]["aqs"].items():
219		self.assertGreaterEqual(val, 0.0)
220		self.assertLessEqual(val, 1.0)
221
222		if __name__ == '__main__':
223		unittest.main()
224

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( fe7765...77d3da )

A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like