1 | 1 | import json |
|
2 | 1 | import itertools |
|
3 | 1 | from nltk.stem.wordnet import WordNetLemmatizer |
|
4 | 1 | from ppp_questionparsing_grammatical import Word, DependenciesTree, computeTree, NamedEntityMerging, PrepositionMerging |
|
5 | 1 | import data |
|
6 | |||
7 | 1 | from unittest import TestCase |
|
8 | |||
9 | 1 | class PreprocessingMergeTests(TestCase): |
|
10 | |||
11 | 1 | def testBasicNamedEntityChildParent(self): |
|
12 | 1 | tagList = ['LOCATION', 'PERSON', 'NUMBER', 'MONEY', 'MISC'] |
|
13 | 1 | for tag in tagList: |
|
14 | 1 | parent = DependenciesTree('parent', 1, namedEntityTag = tag) |
|
15 | 1 | child = DependenciesTree('child', 2, parent = parent, namedEntityTag = tag) |
|
16 | 1 | parent.child.append(child) |
|
17 | 1 | child.dependency = 'conj_and' |
|
18 | 1 | NamedEntityMerging(parent).merge() |
|
19 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
20 | 1 | self.assertEqual(parent.child, [child]) |
|
21 | 1 | self.assertEqual(child.parent, parent) |
|
22 | 1 | child.dependency = 'foo' |
|
23 | 1 | NamedEntityMerging(parent).merge() |
|
24 | 1 | self.assertIn(Word('parent', 1), parent.wordList) |
|
25 | 1 | self.assertIn(Word('child', 2), parent.wordList) |
|
26 | 1 | self.assertEqual(parent.child, []) |
|
27 | 1 | for (tag1, tag2) in itertools.permutations(tagList, 2): |
|
28 | 1 | parent = DependenciesTree('parent', 1, namedEntityTag = tag1) |
|
29 | 1 | child = DependenciesTree('child', 2, parent = parent, namedEntityTag = tag2) |
|
30 | 1 | parent.child.append(child) |
|
31 | 1 | child.dependency = 'conj_and' |
|
32 | 1 | NamedEntityMerging(parent).merge() |
|
33 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
34 | 1 | self.assertEqual(parent.child, [child]) |
|
35 | 1 | self.assertEqual(child.parent, parent) |
|
36 | 1 | child.dependency = 'foo' |
|
37 | 1 | NamedEntityMerging(parent).merge() |
|
38 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
39 | 1 | self.assertEqual(parent.child, [child]) |
|
40 | 1 | self.assertEqual(child.parent, parent) |
|
41 | |||
42 | 1 | def testBasicNamedEntitySisterBrother(self): |
|
43 | 1 | tagList = ['LOCATION', 'PERSON', 'NUMBER', 'MONEY', 'MISC'] |
|
44 | 1 | for tag in tagList: |
|
45 | 1 | parent = DependenciesTree('parent', 1, namedEntityTag = 'undef') |
|
46 | 1 | child1 = DependenciesTree('child1', 2, parent = parent, dependency = 'conj_and', namedEntityTag = tag) |
|
47 | 1 | child2 = DependenciesTree('child2', 3, parent = parent, dependency = 'conj_and', namedEntityTag = tag) |
|
48 | 1 | parent.child += [child1, child2] |
|
49 | 1 | NamedEntityMerging(parent).merge() |
|
50 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
51 | 1 | self.assertEqual(parent.child, [child1, child2]) |
|
52 | 1 | self.assertEqual(child1.parent, parent) |
|
53 | 1 | self.assertEqual(child2.parent, parent) |
|
54 | 1 | child1.dependency = 'foo' |
|
55 | 1 | child2.dependency = 'foo' |
|
56 | 1 | NamedEntityMerging(parent).merge() |
|
57 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
58 | 1 | self.assertEqual(len(parent.child), 1) |
|
59 | 1 | self.assertIn(Word('child1', 2), parent.child[0].wordList) |
|
60 | 1 | self.assertIn(Word('child2', 3), parent.child[0].wordList) |
|
61 | 1 | self.assertEqual(parent.child[0].parent, parent) |
|
62 | 1 | for (tag1, tag2) in itertools.permutations(tagList, 2): |
|
63 | 1 | parent = DependenciesTree('parent', 1, namedEntityTag = 'undef') |
|
64 | 1 | child1 = DependenciesTree('child1', 2, parent = parent, dependency = 'conj_and', namedEntityTag = tag1) |
|
65 | 1 | child2 = DependenciesTree('child2', 3, parent = parent, dependency = 'conj_and', namedEntityTag = tag2) |
|
66 | 1 | parent.child += [child1, child2] |
|
67 | 1 | NamedEntityMerging(parent).merge() |
|
68 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
69 | 1 | self.assertEqual(parent.child, [child1, child2]) |
|
70 | 1 | self.assertEqual(child1.parent, parent) |
|
71 | 1 | self.assertEqual(child2.parent, parent) |
|
72 | 1 | child1.dependency = 'foo' |
|
73 | 1 | child2.dependency = 'foo' |
|
74 | 1 | NamedEntityMerging(parent).merge() |
|
75 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
76 | 1 | self.assertEqual(parent.child, [child1, child2]) |
|
77 | 1 | self.assertEqual(child1.parent, parent) |
|
78 | 1 | self.assertEqual(child2.parent, parent) |
|
79 | |||
80 | 1 | def testBasicPrepositionNode(self): |
|
81 | 1 | parent = DependenciesTree('parent', 1) |
|
82 | 1 | child = DependenciesTree('child', 2, parent = parent, dependency = 'foo') |
|
83 | 1 | parent.child.append(child) |
|
84 | 1 | PrepositionMerging(parent).merge() |
|
85 | 1 | self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
86 | 1 | self.assertEqual(parent.child, [child]) |
|
87 | 1 | self.assertEqual(child.parent, parent) |
|
88 | 1 | for prep in PrepositionMerging.prepositionSet: |
|
89 | 1 | parent = DependenciesTree('parent', 1) |
|
90 | 1 | child = DependenciesTree(prep, 2, parent = parent, dependency = 'foo') |
|
91 | 1 | parent.child.append(child) |
|
92 | 1 | child.dependency = 'conj_and' |
|
93 | 1 | PrepositionMerging(parent).merge() |
|
94 | 1 | self.assertIn(Word('parent', 1), parent.wordList) |
|
95 | 1 | self.assertIn(Word(prep, 2), parent.wordList) |
|
96 | 1 | self.assertEqual(parent.child, []) |
|
97 | |||
98 | 1 | def testBasicPrepositionEdge(self): |
|
99 | 1 | for prep in ['in', 'of', 'with', 'by']: |
|
100 | 1 | parent = DependenciesTree('parent', 1) |
|
101 | 1 | parent.wordList[0].pos = 'VB' |
|
102 | 1 | child = DependenciesTree('child', 2, parent = parent, dependency = 'prep_'+prep) |
|
103 | 1 | parent.child.append(child) |
|
104 | 1 | PrepositionMerging(parent).merge() |
|
105 | 1 | self.assertEqual(parent.wordList, [Word('parent '+prep, 1, 'VB')]) |
|
106 | 1 | self.assertEqual(parent.child, [child]) |
|
107 | 1 | self.assertEqual(child.dependency, 'prep') |
|
108 | 1 | parent = DependenciesTree('parent', 1) |
|
109 | 1 | parent.wordList[0].pos = 'VB' |
|
110 | 1 | child = DependenciesTree('child', 2, parent = parent, dependency = 'agent') |
|
111 | 1 | parent.child.append(child) |
|
112 | 1 | PrepositionMerging(parent).merge() |
|
113 | 1 | self.assertEqual(parent.wordList, [Word('parent by', 1, 'VB')]) |
|
114 | 1 | self.assertEqual(parent.child, [child]) |
|
115 | |||
116 | 1 | def testNamedEntity1(self): |
|
117 | 1 | tree=computeTree(data.give_john_smith()) |
|
118 | 1 | NamedEntityMerging(tree).merge() |
|
119 | 1 | tree.sort() |
|
120 | 1 | root=tree |
|
121 | # Root |
||
122 | 1 | self.assertEqual(root.wordList, [Word("ROOT", 0)]) |
|
123 | 1 | self.assertEqual(root.namedEntityTag, 'undef') |
|
124 | 1 | self.assertEqual(root.dependency, 'undef') |
|
125 | 1 | self.assertEqual(root.parent, None) |
|
126 | 1 | self.assertEqual(len(root.child), 1) |
|
127 | 1 | self.assertEqual(root.subtreeType, 'undef') |
|
128 | 1 | self.assertEqual(root.dfsTag, 0) |
|
129 | # Lives |
||
130 | 1 | lives=root.child[0] |
|
131 | 1 | self.assertEqual(lives.wordList, [Word("lives", 3, 'VBZ')]) |
|
132 | 1 | self.assertEqual(lives.namedEntityTag, 'undef') |
|
133 | 1 | self.assertEqual(lives.dependency, 'ROOT') |
|
134 | 1 | self.assertEqual(lives.parent, tree) |
|
135 | 1 | self.assertEqual(len(lives.child), 2) |
|
136 | 1 | self.assertEqual(lives.subtreeType, 'undef') |
|
137 | 1 | self.assertEqual(lives.dfsTag, 0) |
|
138 | # John Smith |
||
139 | 1 | smith=lives.child[0] |
|
140 | 1 | self.assertEqual(smith.wordList, [Word("John", 1, 'NNP'), Word("Smith", 2, 'NNP')]) |
|
141 | 1 | self.assertEqual(smith.namedEntityTag, 'PERSON') |
|
142 | 1 | self.assertEqual(smith.dependency, 'nsubj') |
|
143 | 1 | self.assertEqual(smith.parent, lives) |
|
144 | 1 | self.assertEqual(len(smith.child), 0) |
|
145 | 1 | self.assertEqual(smith.subtreeType, 'undef') |
|
146 | 1 | self.assertEqual(smith.dfsTag, 0) |
|
147 | # United Kingdom |
||
148 | 1 | kingdom=lives.child[1] |
|
149 | 1 | self.assertEqual(kingdom.wordList, [Word("United", 6, 'NNP'), Word("Kingdom", 7, 'NNP')]) |
|
150 | 1 | self.assertEqual(kingdom.namedEntityTag, 'LOCATION') |
|
151 | 1 | self.assertEqual(kingdom.dependency, 'prep_in') |
|
152 | 1 | self.assertEqual(kingdom.parent, lives) |
|
153 | 1 | self.assertEqual(len(kingdom.child), 1) |
|
154 | 1 | self.assertEqual(kingdom.subtreeType, 'undef') |
|
155 | 1 | self.assertEqual(kingdom.dfsTag, 0) |
|
156 | # The |
||
157 | 1 | the=kingdom.child[0] |
|
158 | 1 | self.assertEqual(the.wordList, [Word("the", 5, 'DT')]) |
|
159 | 1 | self.assertEqual(the.namedEntityTag, 'undef') |
|
160 | 1 | self.assertEqual(the.dependency, 'det') |
|
161 | 1 | self.assertEqual(the.parent, kingdom) |
|
162 | 1 | self.assertEqual(len(the.child), 0) |
|
163 | 1 | self.assertEqual(the.subtreeType, 'undef') |
|
164 | 1 | self.assertEqual(the.dfsTag, 0) |
|
165 | |||
166 | 1 | View Code Duplication | def testNamedEntity2(self): |
0 ignored issues
–
show
Duplication
introduced
by
![]() |
|||
167 | 1 | tree=computeTree(data.give_obama_president_usa()) |
|
168 | 1 | NamedEntityMerging(tree).merge() |
|
169 | 1 | tree.sort() |
|
170 | 1 | root=tree |
|
171 | # Root |
||
172 | 1 | self.assertEqual(root.wordList, [Word("ROOT", 0)]) |
|
173 | 1 | self.assertEqual(root.namedEntityTag, 'undef') |
|
174 | 1 | self.assertEqual(root.dependency, 'undef') |
|
175 | 1 | self.assertEqual(root.parent, None) |
|
176 | 1 | self.assertEqual(len(root.child), 1) |
|
177 | 1 | self.assertEqual(root.subtreeType, 'undef') |
|
178 | 1 | self.assertEqual(root.dfsTag, 0) |
|
179 | # Is |
||
180 | 1 | is_=root.child[0] |
|
181 | 1 | self.assertEqual(is_.wordList, [Word("is", 2, 'VBZ')]) |
|
182 | 1 | self.assertEqual(is_.namedEntityTag, 'undef') |
|
183 | 1 | self.assertEqual(is_.dependency, 'ROOT') |
|
184 | 1 | self.assertEqual(is_.parent, tree) |
|
185 | 1 | self.assertEqual(len(is_.child), 2) |
|
186 | 1 | self.assertEqual(is_.subtreeType, 'undef') |
|
187 | 1 | self.assertEqual(is_.dfsTag, 0) |
|
188 | # Obama |
||
189 | 1 | obama=is_.child[0] |
|
190 | 1 | self.assertEqual(obama.wordList, [Word("Obama", 1, 'NNP')]) |
|
191 | 1 | self.assertEqual(obama.namedEntityTag, 'PERSON') |
|
192 | 1 | self.assertEqual(obama.dependency, 'nsubj') |
|
193 | 1 | self.assertEqual(obama.parent, is_) |
|
194 | 1 | self.assertEqual(len(obama.child), 0) |
|
195 | 1 | self.assertEqual(obama.subtreeType, 'undef') |
|
196 | 1 | self.assertEqual(obama.dfsTag, 0) |
|
197 | # president |
||
198 | 1 | president =is_.child[1] |
|
199 | 1 | self.assertEqual(president.wordList, [Word("president", 6, 'NN')]) |
|
200 | 1 | self.assertEqual(president.namedEntityTag, 'undef') |
|
201 | 1 | self.assertEqual(president.dependency, 'xcomp') |
|
202 | 1 | self.assertEqual(president.parent, is_) |
|
203 | 1 | self.assertEqual(len(president.child), 2) |
|
204 | 1 | self.assertEqual(president.subtreeType, 'undef') |
|
205 | 1 | self.assertEqual(president.dfsTag, 0) |
|
206 | # The |
||
207 | 1 | the=president.child[0] |
|
208 | 1 | self.assertEqual(the.wordList, [Word("the", 3, 'DT')]) |
|
209 | 1 | self.assertEqual(the.namedEntityTag, 'undef') |
|
210 | 1 | self.assertEqual(the.dependency, 'det') |
|
211 | 1 | self.assertEqual(the.parent, president) |
|
212 | 1 | self.assertEqual(len(the.child), 0) |
|
213 | 1 | self.assertEqual(the.subtreeType, 'undef') |
|
214 | 1 | self.assertEqual(the.dfsTag, 0) |
|
215 | # United States |
||
216 | 1 | united=president.child[1] |
|
217 | 1 | self.assertEqual(united.wordList, [Word("United", 4, 'NNP'), Word("States", 5, 'NNPS')]) |
|
218 | 1 | self.assertEqual(united.namedEntityTag, 'LOCATION') |
|
219 | 1 | self.assertEqual(united.dependency, 'compound') |
|
220 | 1 | self.assertEqual(united.parent, president) |
|
221 | 1 | self.assertEqual(len(united.child), 0) |
|
222 | 1 | self.assertEqual(united.subtreeType, 'undef') |
|
223 | 1 | self.assertEqual(united.dfsTag, 0) |
|
224 | |||
225 | 1 | def testStr2(self): |
|
226 | 1 | tree=computeTree(data.give_john_smith()) |
|
227 | 1 | NamedEntityMerging(tree).merge() |
|
228 | 1 | PrepositionMerging(tree).merge() |
|
229 | 1 | self.maxDiff=None |
|
230 | 1 | tree.sort() |
|
231 | self.assertEqual(str(tree), data.give_john_smith_stringMerge()) |
||
232 |