|
1
|
1 |
|
import json |
|
2
|
1 |
|
import itertools |
|
3
|
1 |
|
from nltk.stem.wordnet import WordNetLemmatizer |
|
4
|
1 |
|
from ppp_questionparsing_grammatical import Word, DependenciesTree, computeTree, NamedEntityMerging, PrepositionMerging |
|
5
|
1 |
|
import data |
|
6
|
|
|
|
|
7
|
1 |
|
from unittest import TestCase |
|
8
|
|
|
|
|
9
|
1 |
|
class PreprocessingMergeTests(TestCase): |
|
10
|
|
|
|
|
11
|
1 |
|
def testBasicNamedEntityChildParent(self): |
|
12
|
1 |
|
tagList = ['LOCATION', 'PERSON', 'NUMBER', 'MONEY', 'MISC'] |
|
13
|
1 |
|
for tag in tagList: |
|
14
|
1 |
|
parent = DependenciesTree('parent', 1, namedEntityTag = tag) |
|
15
|
1 |
|
child = DependenciesTree('child', 2, parent = parent, namedEntityTag = tag) |
|
16
|
1 |
|
parent.child.append(child) |
|
17
|
1 |
|
child.dependency = 'conj_and' |
|
18
|
1 |
|
NamedEntityMerging(parent).merge() |
|
19
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
20
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
21
|
1 |
|
self.assertEqual(child.parent, parent) |
|
22
|
1 |
|
child.dependency = 'foo' |
|
23
|
1 |
|
NamedEntityMerging(parent).merge() |
|
24
|
1 |
|
self.assertIn(Word('parent', 1), parent.wordList) |
|
25
|
1 |
|
self.assertIn(Word('child', 2), parent.wordList) |
|
26
|
1 |
|
self.assertEqual(parent.child, []) |
|
27
|
1 |
|
for (tag1, tag2) in itertools.permutations(tagList, 2): |
|
28
|
1 |
|
parent = DependenciesTree('parent', 1, namedEntityTag = tag1) |
|
29
|
1 |
|
child = DependenciesTree('child', 2, parent = parent, namedEntityTag = tag2) |
|
30
|
1 |
|
parent.child.append(child) |
|
31
|
1 |
|
child.dependency = 'conj_and' |
|
32
|
1 |
|
NamedEntityMerging(parent).merge() |
|
33
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
34
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
35
|
1 |
|
self.assertEqual(child.parent, parent) |
|
36
|
1 |
|
child.dependency = 'foo' |
|
37
|
1 |
|
NamedEntityMerging(parent).merge() |
|
38
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
39
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
40
|
1 |
|
self.assertEqual(child.parent, parent) |
|
41
|
|
|
|
|
42
|
1 |
|
def testBasicNamedEntitySisterBrother(self): |
|
43
|
1 |
|
tagList = ['LOCATION', 'PERSON', 'NUMBER', 'MONEY', 'MISC'] |
|
44
|
1 |
|
for tag in tagList: |
|
45
|
1 |
|
parent = DependenciesTree('parent', 1, namedEntityTag = 'undef') |
|
46
|
1 |
|
child1 = DependenciesTree('child1', 2, parent = parent, dependency = 'conj_and', namedEntityTag = tag) |
|
47
|
1 |
|
child2 = DependenciesTree('child2', 3, parent = parent, dependency = 'conj_and', namedEntityTag = tag) |
|
48
|
1 |
|
parent.child += [child1, child2] |
|
49
|
1 |
|
NamedEntityMerging(parent).merge() |
|
50
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
51
|
1 |
|
self.assertEqual(parent.child, [child1, child2]) |
|
52
|
1 |
|
self.assertEqual(child1.parent, parent) |
|
53
|
1 |
|
self.assertEqual(child2.parent, parent) |
|
54
|
1 |
|
child1.dependency = 'foo' |
|
55
|
1 |
|
child2.dependency = 'foo' |
|
56
|
1 |
|
NamedEntityMerging(parent).merge() |
|
57
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
58
|
1 |
|
self.assertEqual(len(parent.child), 1) |
|
59
|
1 |
|
self.assertIn(Word('child1', 2), parent.child[0].wordList) |
|
60
|
1 |
|
self.assertIn(Word('child2', 3), parent.child[0].wordList) |
|
61
|
1 |
|
self.assertEqual(parent.child[0].parent, parent) |
|
62
|
1 |
|
for (tag1, tag2) in itertools.permutations(tagList, 2): |
|
63
|
1 |
|
parent = DependenciesTree('parent', 1, namedEntityTag = 'undef') |
|
64
|
1 |
|
child1 = DependenciesTree('child1', 2, parent = parent, dependency = 'conj_and', namedEntityTag = tag1) |
|
65
|
1 |
|
child2 = DependenciesTree('child2', 3, parent = parent, dependency = 'conj_and', namedEntityTag = tag2) |
|
66
|
1 |
|
parent.child += [child1, child2] |
|
67
|
1 |
|
NamedEntityMerging(parent).merge() |
|
68
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
69
|
1 |
|
self.assertEqual(parent.child, [child1, child2]) |
|
70
|
1 |
|
self.assertEqual(child1.parent, parent) |
|
71
|
1 |
|
self.assertEqual(child2.parent, parent) |
|
72
|
1 |
|
child1.dependency = 'foo' |
|
73
|
1 |
|
child2.dependency = 'foo' |
|
74
|
1 |
|
NamedEntityMerging(parent).merge() |
|
75
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
76
|
1 |
|
self.assertEqual(parent.child, [child1, child2]) |
|
77
|
1 |
|
self.assertEqual(child1.parent, parent) |
|
78
|
1 |
|
self.assertEqual(child2.parent, parent) |
|
79
|
|
|
|
|
80
|
1 |
|
def testBasicPrepositionNode(self): |
|
81
|
1 |
|
parent = DependenciesTree('parent', 1) |
|
82
|
1 |
|
child = DependenciesTree('child', 2, parent = parent, dependency = 'foo') |
|
83
|
1 |
|
parent.child.append(child) |
|
84
|
1 |
|
PrepositionMerging(parent).merge() |
|
85
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent', 1)]) |
|
86
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
87
|
1 |
|
self.assertEqual(child.parent, parent) |
|
88
|
1 |
|
for prep in PrepositionMerging.prepositionSet: |
|
89
|
1 |
|
parent = DependenciesTree('parent', 1) |
|
90
|
1 |
|
child = DependenciesTree(prep, 2, parent = parent, dependency = 'foo') |
|
91
|
1 |
|
parent.child.append(child) |
|
92
|
1 |
|
child.dependency = 'conj_and' |
|
93
|
1 |
|
PrepositionMerging(parent).merge() |
|
94
|
1 |
|
self.assertIn(Word('parent', 1), parent.wordList) |
|
95
|
1 |
|
self.assertIn(Word(prep, 2), parent.wordList) |
|
96
|
1 |
|
self.assertEqual(parent.child, []) |
|
97
|
|
|
|
|
98
|
1 |
|
def testBasicPrepositionEdge(self): |
|
99
|
1 |
|
for prep in ['in', 'of', 'with', 'by']: |
|
100
|
1 |
|
parent = DependenciesTree('parent', 1) |
|
101
|
1 |
|
parent.wordList[0].pos = 'VB' |
|
102
|
1 |
|
child = DependenciesTree('child', 2, parent = parent, dependency = 'prep_'+prep) |
|
103
|
1 |
|
parent.child.append(child) |
|
104
|
1 |
|
PrepositionMerging(parent).merge() |
|
105
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent '+prep, 1, 'VB')]) |
|
106
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
107
|
1 |
|
self.assertEqual(child.dependency, 'prep') |
|
108
|
1 |
|
parent = DependenciesTree('parent', 1) |
|
109
|
1 |
|
parent.wordList[0].pos = 'VB' |
|
110
|
1 |
|
child = DependenciesTree('child', 2, parent = parent, dependency = 'agent') |
|
111
|
1 |
|
parent.child.append(child) |
|
112
|
1 |
|
PrepositionMerging(parent).merge() |
|
113
|
1 |
|
self.assertEqual(parent.wordList, [Word('parent by', 1, 'VB')]) |
|
114
|
1 |
|
self.assertEqual(parent.child, [child]) |
|
115
|
|
|
|
|
116
|
1 |
|
def testNamedEntity1(self): |
|
117
|
1 |
|
tree=computeTree(data.give_john_smith()) |
|
118
|
1 |
|
NamedEntityMerging(tree).merge() |
|
119
|
1 |
|
tree.sort() |
|
120
|
1 |
|
root=tree |
|
121
|
|
|
# Root |
|
122
|
1 |
|
self.assertEqual(root.wordList, [Word("ROOT", 0)]) |
|
123
|
1 |
|
self.assertEqual(root.namedEntityTag, 'undef') |
|
124
|
1 |
|
self.assertEqual(root.dependency, 'undef') |
|
125
|
1 |
|
self.assertEqual(root.parent, None) |
|
126
|
1 |
|
self.assertEqual(len(root.child), 1) |
|
127
|
1 |
|
self.assertEqual(root.subtreeType, 'undef') |
|
128
|
1 |
|
self.assertEqual(root.dfsTag, 0) |
|
129
|
|
|
# Lives |
|
130
|
1 |
|
lives=root.child[0] |
|
131
|
1 |
|
self.assertEqual(lives.wordList, [Word("lives", 3, 'VBZ')]) |
|
132
|
1 |
|
self.assertEqual(lives.namedEntityTag, 'undef') |
|
133
|
1 |
|
self.assertEqual(lives.dependency, 'ROOT') |
|
134
|
1 |
|
self.assertEqual(lives.parent, tree) |
|
135
|
1 |
|
self.assertEqual(len(lives.child), 2) |
|
136
|
1 |
|
self.assertEqual(lives.subtreeType, 'undef') |
|
137
|
1 |
|
self.assertEqual(lives.dfsTag, 0) |
|
138
|
|
|
# John Smith |
|
139
|
1 |
|
smith=lives.child[0] |
|
140
|
1 |
|
self.assertEqual(smith.wordList, [Word("John", 1, 'NNP'), Word("Smith", 2, 'NNP')]) |
|
141
|
1 |
|
self.assertEqual(smith.namedEntityTag, 'PERSON') |
|
142
|
1 |
|
self.assertEqual(smith.dependency, 'nsubj') |
|
143
|
1 |
|
self.assertEqual(smith.parent, lives) |
|
144
|
1 |
|
self.assertEqual(len(smith.child), 0) |
|
145
|
1 |
|
self.assertEqual(smith.subtreeType, 'undef') |
|
146
|
1 |
|
self.assertEqual(smith.dfsTag, 0) |
|
147
|
|
|
# United Kingdom |
|
148
|
1 |
|
kingdom=lives.child[1] |
|
149
|
1 |
|
self.assertEqual(kingdom.wordList, [Word("United", 6, 'NNP'), Word("Kingdom", 7, 'NNP')]) |
|
150
|
1 |
|
self.assertEqual(kingdom.namedEntityTag, 'LOCATION') |
|
151
|
1 |
|
self.assertEqual(kingdom.dependency, 'prep_in') |
|
152
|
1 |
|
self.assertEqual(kingdom.parent, lives) |
|
153
|
1 |
|
self.assertEqual(len(kingdom.child), 1) |
|
154
|
1 |
|
self.assertEqual(kingdom.subtreeType, 'undef') |
|
155
|
1 |
|
self.assertEqual(kingdom.dfsTag, 0) |
|
156
|
|
|
# The |
|
157
|
1 |
|
the=kingdom.child[0] |
|
158
|
1 |
|
self.assertEqual(the.wordList, [Word("the", 5, 'DT')]) |
|
159
|
1 |
|
self.assertEqual(the.namedEntityTag, 'undef') |
|
160
|
1 |
|
self.assertEqual(the.dependency, 'det') |
|
161
|
1 |
|
self.assertEqual(the.parent, kingdom) |
|
162
|
1 |
|
self.assertEqual(len(the.child), 0) |
|
163
|
1 |
|
self.assertEqual(the.subtreeType, 'undef') |
|
164
|
1 |
|
self.assertEqual(the.dfsTag, 0) |
|
165
|
|
|
|
|
166
|
1 |
View Code Duplication |
def testNamedEntity2(self): |
|
|
|
|
|
|
167
|
1 |
|
tree=computeTree(data.give_obama_president_usa()) |
|
168
|
1 |
|
NamedEntityMerging(tree).merge() |
|
169
|
1 |
|
tree.sort() |
|
170
|
1 |
|
root=tree |
|
171
|
|
|
# Root |
|
172
|
1 |
|
self.assertEqual(root.wordList, [Word("ROOT", 0)]) |
|
173
|
1 |
|
self.assertEqual(root.namedEntityTag, 'undef') |
|
174
|
1 |
|
self.assertEqual(root.dependency, 'undef') |
|
175
|
1 |
|
self.assertEqual(root.parent, None) |
|
176
|
1 |
|
self.assertEqual(len(root.child), 1) |
|
177
|
1 |
|
self.assertEqual(root.subtreeType, 'undef') |
|
178
|
1 |
|
self.assertEqual(root.dfsTag, 0) |
|
179
|
|
|
# Is |
|
180
|
1 |
|
is_=root.child[0] |
|
181
|
1 |
|
self.assertEqual(is_.wordList, [Word("is", 2, 'VBZ')]) |
|
182
|
1 |
|
self.assertEqual(is_.namedEntityTag, 'undef') |
|
183
|
1 |
|
self.assertEqual(is_.dependency, 'ROOT') |
|
184
|
1 |
|
self.assertEqual(is_.parent, tree) |
|
185
|
1 |
|
self.assertEqual(len(is_.child), 2) |
|
186
|
1 |
|
self.assertEqual(is_.subtreeType, 'undef') |
|
187
|
1 |
|
self.assertEqual(is_.dfsTag, 0) |
|
188
|
|
|
# Obama |
|
189
|
1 |
|
obama=is_.child[0] |
|
190
|
1 |
|
self.assertEqual(obama.wordList, [Word("Obama", 1, 'NNP')]) |
|
191
|
1 |
|
self.assertEqual(obama.namedEntityTag, 'PERSON') |
|
192
|
1 |
|
self.assertEqual(obama.dependency, 'nsubj') |
|
193
|
1 |
|
self.assertEqual(obama.parent, is_) |
|
194
|
1 |
|
self.assertEqual(len(obama.child), 0) |
|
195
|
1 |
|
self.assertEqual(obama.subtreeType, 'undef') |
|
196
|
1 |
|
self.assertEqual(obama.dfsTag, 0) |
|
197
|
|
|
# president |
|
198
|
1 |
|
president =is_.child[1] |
|
199
|
1 |
|
self.assertEqual(president.wordList, [Word("president", 6, 'NN')]) |
|
200
|
1 |
|
self.assertEqual(president.namedEntityTag, 'undef') |
|
201
|
1 |
|
self.assertEqual(president.dependency, 'xcomp') |
|
202
|
1 |
|
self.assertEqual(president.parent, is_) |
|
203
|
1 |
|
self.assertEqual(len(president.child), 2) |
|
204
|
1 |
|
self.assertEqual(president.subtreeType, 'undef') |
|
205
|
1 |
|
self.assertEqual(president.dfsTag, 0) |
|
206
|
|
|
# The |
|
207
|
1 |
|
the=president.child[0] |
|
208
|
1 |
|
self.assertEqual(the.wordList, [Word("the", 3, 'DT')]) |
|
209
|
1 |
|
self.assertEqual(the.namedEntityTag, 'undef') |
|
210
|
1 |
|
self.assertEqual(the.dependency, 'det') |
|
211
|
1 |
|
self.assertEqual(the.parent, president) |
|
212
|
1 |
|
self.assertEqual(len(the.child), 0) |
|
213
|
1 |
|
self.assertEqual(the.subtreeType, 'undef') |
|
214
|
1 |
|
self.assertEqual(the.dfsTag, 0) |
|
215
|
|
|
# United States |
|
216
|
1 |
|
united=president.child[1] |
|
217
|
1 |
|
self.assertEqual(united.wordList, [Word("United", 4, 'NNP'), Word("States", 5, 'NNPS')]) |
|
218
|
1 |
|
self.assertEqual(united.namedEntityTag, 'LOCATION') |
|
219
|
1 |
|
self.assertEqual(united.dependency, 'compound') |
|
220
|
1 |
|
self.assertEqual(united.parent, president) |
|
221
|
1 |
|
self.assertEqual(len(united.child), 0) |
|
222
|
1 |
|
self.assertEqual(united.subtreeType, 'undef') |
|
223
|
1 |
|
self.assertEqual(united.dfsTag, 0) |
|
224
|
|
|
|
|
225
|
1 |
|
def testStr2(self): |
|
226
|
1 |
|
tree=computeTree(data.give_john_smith()) |
|
227
|
1 |
|
NamedEntityMerging(tree).merge() |
|
228
|
1 |
|
PrepositionMerging(tree).merge() |
|
229
|
1 |
|
self.maxDiff=None |
|
230
|
1 |
|
tree.sort() |
|
231
|
|
|
self.assertEqual(str(tree), data.give_john_smith_stringMerge()) |
|
232
|
|
|
|