test_metrics.TutorialCustomizedConfig.processJudgments() - Code Metrics - Inspection of "added test for custom input file" - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( c83738...fbc546 )

by Oana

created 2018-08-17 14:12 UTC

TutorialCustomizedConfig.processJudgments() A

↳ Parent: test_metrics

Complexity

Conditions

Size

Total Lines	8
Code Lines	6

Duplication

Lines	8
Ratio	100 %

Importance

Changes

Metric	Value
eloc	6
dl	8
loc	8
rs	9.3333
c	0
b	0
f	0
cc	5
nop	2


import unittest
import string
# import logging

import crowdtruth
from crowdtruth.configuration import DefaultConfig

class TestConfigOpen(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = True
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TestConfigClosed(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TutorialConfig(DefaultConfig):

    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
    outputColumns = ["relations"]

    # processing of a closed task
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = [
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
        "other", "none"]

    def processJudgments(self, judgments):
        # any pre-processing of the input data goes here
        for col in self.outputColumns:
            # remove square brackets from annotations
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

class TutorialCustomizedConfig(DefaultConfig):

    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
    outputColumns = ["relations"]
    customColumns = ["_id", "_unit_id", "_worker_id", "started_at", "created_at"]

    # processing of a closed task
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = [
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
        "other", "none"]

    def processJudgments(self, judgments):
        # any pre-processing of the input data goes here
        for col in self.outputColumns:
            # remove square brackets from annotations
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

# test_conf_const = TutorialConfig()
# test_config = test_conf_const.__class__
# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
# results = crowdtruth.run(data, config)

class TestAgreementClosed(unittest.TestCase):
    test_conf_const = TestConfigClosed()

    def test_all_workers_agree(self):

        for w in range(2,11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
            if not config.open_ended_task:
                self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)

    def test_all_workers_disagree(self):

        for w in range(2, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_disagr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
                if not config.open_ended_task:
                    self.assertAlmostEqual(
                        results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
                        0.0)

    def test_outlier_worker(self):
        for w in range(3, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_outlier.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(
                results["workers"]["wqs"].at["W1"],
                0.0)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for x in range(1, w):
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(x + 1)],
                    1.0)

            if not config.open_ended_task:
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["A"],
                    0.0)
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["B"],
                    1.0)


    def test_incremental_worker_agreement(self):
        for w in range(4, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)

            # print str(config.open_ended_task)

            # check that workers that agree on the same unit have the same quality score
            for x in range(2, w):
                if x != (w - 1):
                    self.assertAlmostEqual(
                        results["workers"]["wqs"].at["W1"],
                        results["workers"]["wqs"].at["W" + str(x)],)
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(w)],
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])

            # workers that agree have a greater WQS than the worker that disagrees
            self.assertGreater(
                results["workers"]["wqs"].at["W1"],
                results["workers"]["wqs"].at["W" + str(w - 1)])
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])

            # the more workers agree on a unit, the higher the worker quality score
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W1"])
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])

            # the more workers agree on a unit, the higher the unit quality score
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[2])
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[3])
            self.assertLess(
                results["units"]["uqs"].at[2],
                results["units"]["uqs"].at[3])

            # the more workers agree on an annotation, the higher the unit quality score
            if not config.open_ended_task:
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["B"],
                    results["annotations"]["aqs"].at["A"])
                self.assertLess(
                    results["annotations"]["aqs"].at["D"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["E"])
                self.assertLess(
                    results["annotations"]["aqs"].at["C"],
                    results["annotations"]["aqs"].at["E"])

class TestAgreementOpen(TestAgreementClosed):
    test_conf_const = TestConfigOpen()

class TestTutorial(unittest.TestCase):

    def test_metrics_correct_interval(self):
        test_conf_const = TutorialConfig()
        test_config = test_conf_const.__class__
        data, config = crowdtruth.load(
            file="tutorial/relex_example.csv",
            config=test_config())
        results = crowdtruth.run(data, config)
        # for _, val_arr in results["units"]["unit_annotation_score"].items():
        #     for _, val in val_arr.items():
        #         self.assertGreaterEqual(val, 0.0)
        #         self.assertLessEqual(val, 1.0)
        for _, val in results["units"]["uqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["workers"]["wqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["annotations"]["aqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)

class TestCustomizedTutorial(unittest.TestCase):

    def test_metrics_correct_interval(self):
        test_conf_const = TutorialCustomizedConfig()
        test_config = test_conf_const.__class__
        data, config = crowdtruth.load(
            file="tutorial/relex_example_custom.csv",
            config=test_config())
        results = crowdtruth.run(data, config)
        # for _, val_arr in results["units"]["unit_annotation_score"].items():
        #     for _, val in val_arr.items():
        #         self.assertGreaterEqual(val, 0.0)
        #         self.assertLessEqual(val, 1.0)
        for _, val in results["units"]["uqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["workers"]["wqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["annotations"]["aqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)

if __name__ == '__main__':
    unittest.main()


1
2		import unittest
3		import string
4		# import logging
5
6		import crowdtruth
7		from crowdtruth.configuration import DefaultConfig
8
9		class TestConfigOpen(DefaultConfig):
10		inputColumns = ["in_col"]
11		outputColumns = ["out_col"]
12		open_ended_task = True
13		annotation_vector = list(string.ascii_uppercase)
14		def processJudgments(self, judgments):
15		return judgments
16
17		class TestConfigClosed(DefaultConfig):
18		inputColumns = ["in_col"]
19		outputColumns = ["out_col"]
20		open_ended_task = False
21		annotation_separator = " "
22		annotation_vector = list(string.ascii_uppercase)
23		def processJudgments(self, judgments):
24		return judgments
25
26	View Code Duplication	class TutorialConfig(DefaultConfig):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
27		inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
28		outputColumns = ["relations"]
29
30		# processing of a closed task
31		open_ended_task = False
32		annotation_separator = " "
33		annotation_vector = [
34		"causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
35		"location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
36		"other", "none"]
37
38		def processJudgments(self, judgments):
39		# any pre-processing of the input data goes here
40		for col in self.outputColumns:
41		# remove square brackets from annotations
42		judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
43		judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
44		judgments[col] = judgments[col].apply(lambda x: str(x).lower())
45		return judgments
46
47	View Code Duplication	class TutorialCustomizedConfig(DefaultConfig):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
48		inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
49		outputColumns = ["relations"]
50		customColumns = ["_id", "_unit_id", "_worker_id", "started_at", "created_at"]
51
52		# processing of a closed task
53		open_ended_task = False
54		annotation_separator = " "
55		annotation_vector = [
56		"causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
57		"location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
58		"other", "none"]
59
60		def processJudgments(self, judgments):
61		# any pre-processing of the input data goes here
62		for col in self.outputColumns:
63		# remove square brackets from annotations
64		judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
65		judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
66		judgments[col] = judgments[col].apply(lambda x: str(x).lower())
67		return judgments
68
69		# test_conf_const = TutorialConfig()
70		# test_config = test_conf_const.__class__
71		# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
72		# results = crowdtruth.run(data, config)
73
74		class TestAgreementClosed(unittest.TestCase):
75		test_conf_const = TestConfigClosed()
76
77	View Code Duplication	def test_all_workers_agree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
78		for w in range(2,11):
79		test_config = self.test_conf_const.__class__
80		data, config = crowdtruth.load(
81		file="test/" + str(w) + "work_agr.csv",
82		config=test_config())
83		results = crowdtruth.run(data, config)
84		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
85		for wid in range(w):
86		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
87		if not config.open_ended_task:
88		self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
89
90	View Code Duplication	def test_all_workers_disagree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
91		for w in range(2, 11):
92		test_config = self.test_conf_const.__class__
93		data, config = crowdtruth.load(
94		file="test/" + str(w) + "work_disagr.csv",
95		config=test_config())
96		results = crowdtruth.run(data, config)
97		self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
98		for wid in range(w):
99		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
100		if not config.open_ended_task:
101		self.assertAlmostEqual(
102		results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
103		0.0)
104
105		def test_outlier_worker(self):
106		for w in range(3, 11):
107		test_config = self.test_conf_const.__class__
108		data, config = crowdtruth.load(
109		file="test/" + str(w) + "work_outlier.csv",
110		config=test_config())
111		results = crowdtruth.run(data, config)
112		self.assertAlmostEqual(
113		results["workers"]["wqs"].at["W1"],
114		0.0)
115		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
116		for x in range(1, w):
117		self.assertAlmostEqual(
118		results["workers"]["wqs"].at["W" + str(x + 1)],
119		1.0)
120
121		if not config.open_ended_task:
122		self.assertAlmostEqual(
123		results["annotations"]["aqs"].at["A"],
124		0.0)
125		self.assertAlmostEqual(
126		results["annotations"]["aqs"].at["B"],
127		1.0)
128
129
130		def test_incremental_worker_agreement(self):
131		for w in range(4, 11):
132		test_config = self.test_conf_const.__class__
133		data, config = crowdtruth.load(
134		file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
135		config=test_config())
136		results = crowdtruth.run(data, config)
137
138		# print str(config.open_ended_task)
139
140		# check that workers that agree on the same unit have the same quality score
141		for x in range(2, w):
142		if x != (w - 1):
143		self.assertAlmostEqual(
144		results["workers"]["wqs"].at["W1"],
145		results["workers"]["wqs"].at["W" + str(x)],)
146		self.assertAlmostEqual(
147		results["workers"]["wqs"].at["W" + str(w)],
148		results["workers"]["wqs"].at["W" + str(w + x - 1)])
149
150		# workers that agree have a greater WQS than the worker that disagrees
151		self.assertGreater(
152		results["workers"]["wqs"].at["W1"],
153		results["workers"]["wqs"].at["W" + str(w - 1)])
154		self.assertGreater(
155		results["workers"]["wqs"].at["W" + str(w)],
156		results["workers"]["wqs"].at["W" + str(2 * w - 1)])
157
158		# the more workers agree on a unit, the higher the worker quality score
159		self.assertGreater(
160		results["workers"]["wqs"].at["W" + str(w)],
161		results["workers"]["wqs"].at["W1"])
162		# print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
163		# print "W1: " + str(results["workers"]["wqs"].at["W1"])
164
165		# the more workers agree on a unit, the higher the unit quality score
166		self.assertLess(
167		results["units"]["uqs"].at[1],
168		results["units"]["uqs"].at[2])
169		self.assertLess(
170		results["units"]["uqs"].at[1],
171		results["units"]["uqs"].at[3])
172		self.assertLess(
173		results["units"]["uqs"].at[2],
174		results["units"]["uqs"].at[3])
175
176		# the more workers agree on an annotation, the higher the unit quality score
177		if not config.open_ended_task:
178		self.assertLess(
179		results["annotations"]["aqs"].at["A"],
180		results["annotations"]["aqs"].at["C"])
181		self.assertLess(
182		results["annotations"]["aqs"].at["B"],
183		results["annotations"]["aqs"].at["A"])
184		self.assertLess(
185		results["annotations"]["aqs"].at["D"],
186		results["annotations"]["aqs"].at["C"])
187		self.assertLess(
188		results["annotations"]["aqs"].at["A"],
189		results["annotations"]["aqs"].at["E"])
190		self.assertLess(
191		results["annotations"]["aqs"].at["C"],
192		results["annotations"]["aqs"].at["E"])
193
194		class TestAgreementOpen(TestAgreementClosed):
195		test_conf_const = TestConfigOpen()
196
197	View Code Duplication	class TestTutorial(unittest.TestCase):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
198		def test_metrics_correct_interval(self):
199		test_conf_const = TutorialConfig()
200		test_config = test_conf_const.__class__
201		data, config = crowdtruth.load(
202		file="tutorial/relex_example.csv",
203		config=test_config())
204		results = crowdtruth.run(data, config)
205		# for _, val_arr in results["units"]["unit_annotation_score"].items():
206		# for _, val in val_arr.items():
207		# self.assertGreaterEqual(val, 0.0)
208		# self.assertLessEqual(val, 1.0)
209		for _, val in results["units"]["uqs"].items():
210		self.assertGreaterEqual(val, 0.0)
211		self.assertLessEqual(val, 1.0)
212		for _, val in results["workers"]["wqs"].items():
213		self.assertGreaterEqual(val, 0.0)
214		self.assertLessEqual(val, 1.0)
215		for _, val in results["annotations"]["aqs"].items():
216		self.assertGreaterEqual(val, 0.0)
217		self.assertLessEqual(val, 1.0)
218
219	View Code Duplication	class TestCustomizedTutorial(unittest.TestCase):
		0 ignored issues – show Duplication introduced 2018-08-17 14:25 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
220		def test_metrics_correct_interval(self):
221		test_conf_const = TutorialCustomizedConfig()
222		test_config = test_conf_const.__class__
223		data, config = crowdtruth.load(
224		file="tutorial/relex_example_custom.csv",
225		config=test_config())
226		results = crowdtruth.run(data, config)
227		# for _, val_arr in results["units"]["unit_annotation_score"].items():
228		# for _, val in val_arr.items():
229		# self.assertGreaterEqual(val, 0.0)
230		# self.assertLessEqual(val, 1.0)
231		for _, val in results["units"]["uqs"].items():
232		self.assertGreaterEqual(val, 0.0)
233		self.assertLessEqual(val, 1.0)
234		for _, val in results["workers"]["wqs"].items():
235		self.assertGreaterEqual(val, 0.0)
236		self.assertLessEqual(val, 1.0)
237		for _, val in results["annotations"]["aqs"].items():
238		self.assertGreaterEqual(val, 0.0)
239		self.assertLessEqual(val, 1.0)
240
241		if __name__ == '__main__':
242		unittest.main()
243

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( c83738...fbc546 )

TutorialCustomizedConfig.processJudgments() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like