test_metrics.TestConfigClosed.processJudgments() - Code Metrics - Inspection of "fixed bug & added test so that metrics values are..." - CrowdTruth/CrowdTruth-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( aeff40...59002d )

by Anca

created 2018-08-16 16:15 UTC

test_metrics.TestConfigClosed.processJudgments() A

↳ Parent: test_metrics

Complexity

Conditions

Size

Total Lines	2
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	2
dl	0
loc	2
rs	10
c	0
b	0
f	0
cc	1
nop	2


import unittest
import string
# import logging

import crowdtruth
from crowdtruth.configuration import DefaultConfig

class TestConfigOpen(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = True
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TestConfigClosed(DefaultConfig):
    inputColumns = ["in_col"]
    outputColumns = ["out_col"]
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = list(string.ascii_uppercase)
    def processJudgments(self, judgments):
        return judgments

class TutorialConfig(DefaultConfig):
    inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
    outputColumns = ["relations"]

    # processing of a closed task
    open_ended_task = False
    annotation_separator = " "
    annotation_vector = [
        "causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
        "location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
        "other", "none"]

    def processJudgments(self, judgments):
        # any pre-processing of the input data goes here
        for col in self.outputColumns:
            # remove square brackets from annotations
            judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
            judgments[col] = judgments[col].apply(lambda x: str(x).lower())
        return judgments

# test_conf_const = TutorialConfig()
# test_config = test_conf_const.__class__
# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
# results = crowdtruth.run(data, config)

class TestAgreementClosed(unittest.TestCase):
    test_conf_const = TestConfigClosed()

    def test_all_workers_agree(self):

        for w in range(2,11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
            if not config.open_ended_task:
                self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)

    def test_all_workers_disagree(self):

        for w in range(2, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_disagr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
            for wid in range(w):
                self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
                if not config.open_ended_task:
                    self.assertAlmostEqual(
                        results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
                        0.0)

    def test_outlier_worker(self):
        for w in range(3, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w) + "work_outlier.csv",
                config=test_config())
            results = crowdtruth.run(data, config)
            self.assertAlmostEqual(
                results["workers"]["wqs"].at["W1"],
                0.0)
            self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
            for x in range(1, w):
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(x + 1)],
                    1.0)

            if not config.open_ended_task:
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["A"],
                    0.0)
                self.assertAlmostEqual(
                    results["annotations"]["aqs"].at["B"],
                    1.0)

    
    def test_incremental_worker_agreement(self):
        for w in range(4, 11):
            test_config = self.test_conf_const.__class__
            data, config = crowdtruth.load(
                file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
                config=test_config())
            results = crowdtruth.run(data, config)

            # print str(config.open_ended_task)

            # check that workers that agree on the same unit have the same quality score
            for x in range(2, w):
                if x != (w - 1):
                    self.assertAlmostEqual(
                        results["workers"]["wqs"].at["W1"],
                        results["workers"]["wqs"].at["W" + str(x)],)
                self.assertAlmostEqual(
                    results["workers"]["wqs"].at["W" + str(w)],
                    results["workers"]["wqs"].at["W" + str(w + x - 1)])

            # workers that agree have a greater WQS than the worker that disagrees
            self.assertGreater(
                results["workers"]["wqs"].at["W1"],
                results["workers"]["wqs"].at["W" + str(w - 1)])
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W" + str(2 * w - 1)])

            # the more workers agree on a unit, the higher the worker quality score
            self.assertGreater(
                results["workers"]["wqs"].at["W" + str(w)],
                results["workers"]["wqs"].at["W1"])
            # print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
            # print "W1: " + str(results["workers"]["wqs"].at["W1"])

            # the more workers agree on a unit, the higher the unit quality score
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[2])
            self.assertLess(
                results["units"]["uqs"].at[1],
                results["units"]["uqs"].at[3])
            self.assertLess(
                results["units"]["uqs"].at[2],
                results["units"]["uqs"].at[3])

            # the more workers agree on an annotation, the higher the unit quality score
            if not config.open_ended_task:
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["B"],
                    results["annotations"]["aqs"].at["A"])
                self.assertLess(
                    results["annotations"]["aqs"].at["D"],
                    results["annotations"]["aqs"].at["C"])
                self.assertLess(
                    results["annotations"]["aqs"].at["A"],
                    results["annotations"]["aqs"].at["E"])
                self.assertLess(
                    results["annotations"]["aqs"].at["C"],
                    results["annotations"]["aqs"].at["E"])

class TestAgreementOpen(TestAgreementClosed):
    test_conf_const = TestConfigOpen()

class TestTutorial(unittest.TestCase):
    def test_metrics_correct_interval(self):
        test_conf_const = TutorialConfig()
        test_config = test_conf_const.__class__
        data, config = crowdtruth.load(
            file="tutorial/relex_example.csv",
            config=test_config())
        results = crowdtruth.run(data, config)
        # for _, val_arr in results["units"]["unit_annotation_score"].items():
        #     for _, val in val_arr.items():
        #         self.assertGreaterEqual(val, 0.0)
        #         self.assertLessEqual(val, 1.0)
        for _, val in results["units"]["uqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["workers"]["wqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)
        for _, val in results["annotations"]["aqs"].items():
            self.assertGreaterEqual(val, 0.0)
            self.assertLessEqual(val, 1.0)

if __name__ == '__main__':
    unittest.main()


1
2		import unittest
3		import string
4		# import logging
5
6		import crowdtruth
7		from crowdtruth.configuration import DefaultConfig
8
9		class TestConfigOpen(DefaultConfig):
10		inputColumns = ["in_col"]
11		outputColumns = ["out_col"]
12		open_ended_task = True
13		annotation_vector = list(string.ascii_uppercase)
14		def processJudgments(self, judgments):
15		return judgments
16
17		class TestConfigClosed(DefaultConfig):
18		inputColumns = ["in_col"]
19		outputColumns = ["out_col"]
20		open_ended_task = False
21		annotation_separator = " "
22		annotation_vector = list(string.ascii_uppercase)
23		def processJudgments(self, judgments):
24		return judgments
25
26		class TutorialConfig(DefaultConfig):
27		inputColumns = ["term1", "b1", "e1", "term2", "b2", "e2", "sentence"]
28		outputColumns = ["relations"]
29
30		# processing of a closed task
31		open_ended_task = False
32		annotation_separator = " "
33		annotation_vector = [
34		"causes", "manifestation", "treats", "prevents", "symptom", "diagnose_by_test_or_drug",
35		"location", "side_effect", "contraindicates", "associated_with", "is_a", "part_of",
36		"other", "none"]
37
38		def processJudgments(self, judgments):
39		# any pre-processing of the input data goes here
40		for col in self.outputColumns:
41		# remove square brackets from annotations
42		judgments[col] = judgments[col].apply(lambda x: str(x).replace('[', ''))
43		judgments[col] = judgments[col].apply(lambda x: str(x).replace(']', ''))
44		judgments[col] = judgments[col].apply(lambda x: str(x).lower())
45		return judgments
46
47		# test_conf_const = TutorialConfig()
48		# test_config = test_conf_const.__class__
49		# data, config = crowdtruth.load(file = "tutorial/relex_example.csv", config = test_config())
50		# results = crowdtruth.run(data, config)
51
52		class TestAgreementClosed(unittest.TestCase):
53		test_conf_const = TestConfigClosed()
54
55	View Code Duplication	def test_all_workers_agree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
56		for w in range(2,11):
57		test_config = self.test_conf_const.__class__
58		data, config = crowdtruth.load(
59		file="test/" + str(w) + "work_agr.csv",
60		config=test_config())
61		results = crowdtruth.run(data, config)
62		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
63		for wid in range(w):
64		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 1.0)
65		if not config.open_ended_task:
66		self.assertAlmostEqual(results["annotations"]["aqs"]["A"], 1.0)
67
68	View Code Duplication	def test_all_workers_disagree(self):
		0 ignored issues – show Duplication introduced 2018-08-14 12:11 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
69		for w in range(2, 11):
70		test_config = self.test_conf_const.__class__
71		data, config = crowdtruth.load(
72		file="test/" + str(w) + "work_disagr.csv",
73		config=test_config())
74		results = crowdtruth.run(data, config)
75		self.assertAlmostEqual(results["units"]["uqs"].at[1], 0.0)
76		for wid in range(w):
77		self.assertAlmostEqual(results["workers"]["wqs"].at["W" + str(wid + 1)], 0.0)
78		if not config.open_ended_task:
79		self.assertAlmostEqual(
80		results["annotations"]["aqs"][list(string.ascii_uppercase)[w]],
81		0.0)
82
83		def test_outlier_worker(self):
84		for w in range(3, 11):
85		test_config = self.test_conf_const.__class__
86		data, config = crowdtruth.load(
87		file="test/" + str(w) + "work_outlier.csv",
88		config=test_config())
89		results = crowdtruth.run(data, config)
90		self.assertAlmostEqual(
91		results["workers"]["wqs"].at["W1"],
92		0.0)
93		self.assertAlmostEqual(results["units"]["uqs"].at[1], 1.0)
94		for x in range(1, w):
95		self.assertAlmostEqual(
96		results["workers"]["wqs"].at["W" + str(x + 1)],
97		1.0)
98
99		if not config.open_ended_task:
100		self.assertAlmostEqual(
101		results["annotations"]["aqs"].at["A"],
102		0.0)
103		self.assertAlmostEqual(
104		results["annotations"]["aqs"].at["B"],
105		1.0)
106
107
108		def test_incremental_worker_agreement(self):
109		for w in range(4, 11):
110		test_config = self.test_conf_const.__class__
111		data, config = crowdtruth.load(
112		file="test/" + str(w - 2) + "vs" + str(w - 1) + "work_agr.csv",
113		config=test_config())
114		results = crowdtruth.run(data, config)
115
116		# print str(config.open_ended_task)
117
118		# check that workers that agree on the same unit have the same quality score
119		for x in range(2, w):
120		if x != (w - 1):
121		self.assertAlmostEqual(
122		results["workers"]["wqs"].at["W1"],
123		results["workers"]["wqs"].at["W" + str(x)],)
124		self.assertAlmostEqual(
125		results["workers"]["wqs"].at["W" + str(w)],
126		results["workers"]["wqs"].at["W" + str(w + x - 1)])
127
128		# workers that agree have a greater WQS than the worker that disagrees
129		self.assertGreater(
130		results["workers"]["wqs"].at["W1"],
131		results["workers"]["wqs"].at["W" + str(w - 1)])
132		self.assertGreater(
133		results["workers"]["wqs"].at["W" + str(w)],
134		results["workers"]["wqs"].at["W" + str(2 * w - 1)])
135
136		# the more workers agree on a unit, the higher the worker quality score
137		self.assertGreater(
138		results["workers"]["wqs"].at["W" + str(w)],
139		results["workers"]["wqs"].at["W1"])
140		# print "W" + str(w) + ": " + str(results["workers"]["wqs"].at["W" + str(w)])
141		# print "W1: " + str(results["workers"]["wqs"].at["W1"])
142
143		# the more workers agree on a unit, the higher the unit quality score
144		self.assertLess(
145		results["units"]["uqs"].at[1],
146		results["units"]["uqs"].at[2])
147		self.assertLess(
148		results["units"]["uqs"].at[1],
149		results["units"]["uqs"].at[3])
150		self.assertLess(
151		results["units"]["uqs"].at[2],
152		results["units"]["uqs"].at[3])
153
154		# the more workers agree on an annotation, the higher the unit quality score
155		if not config.open_ended_task:
156		self.assertLess(
157		results["annotations"]["aqs"].at["A"],
158		results["annotations"]["aqs"].at["C"])
159		self.assertLess(
160		results["annotations"]["aqs"].at["B"],
161		results["annotations"]["aqs"].at["A"])
162		self.assertLess(
163		results["annotations"]["aqs"].at["D"],
164		results["annotations"]["aqs"].at["C"])
165		self.assertLess(
166		results["annotations"]["aqs"].at["A"],
167		results["annotations"]["aqs"].at["E"])
168		self.assertLess(
169		results["annotations"]["aqs"].at["C"],
170		results["annotations"]["aqs"].at["E"])
171
172		class TestAgreementOpen(TestAgreementClosed):
173		test_conf_const = TestConfigOpen()
174
175		class TestTutorial(unittest.TestCase):
176		def test_metrics_correct_interval(self):
177		test_conf_const = TutorialConfig()
178		test_config = test_conf_const.__class__
179		data, config = crowdtruth.load(
180		file="tutorial/relex_example.csv",
181		config=test_config())
182		results = crowdtruth.run(data, config)
183		# for _, val_arr in results["units"]["unit_annotation_score"].items():
184		# for _, val in val_arr.items():
185		# self.assertGreaterEqual(val, 0.0)
186		# self.assertLessEqual(val, 1.0)
187		for _, val in results["units"]["uqs"].items():
188		self.assertGreaterEqual(val, 0.0)
189		self.assertLessEqual(val, 1.0)
190		for _, val in results["workers"]["wqs"].items():
191		self.assertGreaterEqual(val, 0.0)
192		self.assertLessEqual(val, 1.0)
193		for _, val in results["annotations"]["aqs"].items():
194		self.assertGreaterEqual(val, 0.0)
195		self.assertLessEqual(val, 1.0)
196
197		if __name__ == '__main__':
198		unittest.main()
199

CrowdTruth / CrowdTruth-core

GitHub Access Token became invalid

Push — master ( aeff40...59002d )

test_metrics.TestConfigClosed.processJudgments() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like