|
1
|
1 |
|
import sys |
|
2
|
1 |
|
from .questionWordProcessing import identifyQuestionWord, questionWordDependencyTree |
|
3
|
1 |
|
from .dependencyTree import Word, DependenciesTree |
|
4
|
1 |
|
from copy import deepcopy |
|
5
|
1 |
|
from .data.exceptions import GrammaticalError |
|
6
|
|
|
|
|
7
|
|
|
############################## |
|
8
|
|
|
# General analysis functions # |
|
9
|
|
|
############################## |
|
10
|
|
|
|
|
11
|
1 |
|
def remove(t, qw): |
|
12
|
1 |
|
t.parent.child.remove(t) |
|
13
|
|
|
|
|
14
|
1 |
|
def impossible(t, qw): |
|
15
|
1 |
|
raise GrammaticalError(t.dependency, "unexpected dependency") |
|
16
|
|
|
|
|
17
|
1 |
|
def ignore(t, qw): |
|
18
|
1 |
|
pass |
|
19
|
|
|
|
|
20
|
1 |
|
def merge(t, qw): |
|
21
|
1 |
|
t.parent.merge(t, True) |
|
22
|
|
|
|
|
23
|
|
|
############################## |
|
24
|
|
|
# Special analysis functions # |
|
25
|
|
|
############################## |
|
26
|
|
|
|
|
27
|
1 |
|
def amodRule(t, qw): |
|
28
|
1 |
|
if t.wordList[0].pos == 'JJ': |
|
29
|
1 |
|
if len(t.child) > 0 and t.child[0].wordList[0].pos == 'RBS': # superlative of type "most popular, most beautiful, ..." |
|
30
|
1 |
|
assert len(t.child) == 1 and len(t.child[0].child) == 0 |
|
31
|
1 |
|
merge(t.child[0], qw) |
|
32
|
1 |
|
t.dependency = 'connectorUp' |
|
33
|
1 |
|
return |
|
34
|
1 |
|
if t.namedEntityTag != 'ORDINAL' and t.wordList[0].pos != 'JJS': # [0] : must be improved? (search in the whole list?) |
|
35
|
1 |
|
assert t.parent is not None |
|
36
|
1 |
|
merge(t, qw) |
|
37
|
|
|
else: |
|
38
|
1 |
|
t.dependency = 'connectorUp' # superlative of type "biggest, deepest,..." |
|
39
|
|
|
|
|
40
|
1 |
|
def prepRule(t, qw): |
|
41
|
1 |
|
if t.parent.wordList[0].pos[0] == 'V': |
|
42
|
1 |
|
t.dependency = 'R3' |
|
43
|
|
|
else: |
|
44
|
1 |
|
t.dependency = 'R2' |
|
45
|
|
|
|
|
46
|
|
|
########################## |
|
47
|
|
|
# General analysis rules # |
|
48
|
|
|
########################## |
|
49
|
|
|
|
|
50
|
1 |
|
dependenciesMap1 = { |
|
51
|
|
|
'undef' : 'R0', |
|
52
|
|
|
'ROOT' : 'R0', |
|
53
|
|
|
'inst_of' : 'RinstOf', # << |
|
54
|
|
|
'nmod' : impossible, # new dependency type, to discuss... |
|
55
|
|
|
'dep' : 'R1', |
|
56
|
|
|
'aux' : remove, |
|
57
|
|
|
'auxpass' : remove, |
|
58
|
|
|
'cop' : impossible, |
|
59
|
|
|
'arg' : impossible, |
|
60
|
|
|
'agent' : 'R3', # << |
|
61
|
|
|
'comp' : 'R3', |
|
62
|
|
|
'acomp' : 'R3', |
|
63
|
|
|
'ccomp' : 'R2', |
|
64
|
|
|
'xcomp' : 'R2', |
|
65
|
|
|
'pcomp' : 'R3', |
|
66
|
|
|
'obj' : impossible, |
|
67
|
|
|
'dobj' : 'R3', |
|
68
|
|
|
'iobj' : 'R3', |
|
69
|
|
|
'pobj' : 'R3', |
|
70
|
|
|
'subj' : impossible, |
|
71
|
|
|
'nsubj' : 'R2', # << |
|
72
|
|
|
'nsubjpass' : 'R2', # << |
|
73
|
|
|
'csubj' : impossible, |
|
74
|
|
|
'csubjpass' : impossible, |
|
75
|
|
|
'cc' : impossible, |
|
76
|
|
|
'conj' : 'R0', |
|
77
|
|
|
'conj_and' : ignore, |
|
78
|
|
|
'conj_or' : ignore, |
|
79
|
|
|
'conj_negcc': ignore, |
|
80
|
|
|
'expl' : remove, |
|
81
|
|
|
'mod' : impossible, |
|
82
|
|
|
'amod' : amodRule, |
|
83
|
|
|
'appos' : 'R0', # << |
|
84
|
|
|
'advcl' : 'R2', |
|
85
|
|
|
'det' : remove, |
|
86
|
|
|
'predet' : remove, |
|
87
|
|
|
'preconj' : remove, |
|
88
|
|
|
'vmod' : 'R3', |
|
89
|
|
|
'mwe' : merge, |
|
90
|
|
|
'mark' : remove, |
|
91
|
|
|
'advmod' : 'R2', |
|
92
|
|
|
'neg' : 'connectorUp', # need a NOT node |
|
93
|
|
|
'rcmod' : 'R2', |
|
94
|
|
|
'quantmod' : remove, |
|
95
|
|
|
'nn' : merge, |
|
96
|
|
|
'npadvmod' : 'R2', |
|
97
|
|
|
'tmod' : 'R3', |
|
98
|
|
|
'num' : merge, |
|
99
|
|
|
'nummod' : merge, |
|
100
|
|
|
'number' : merge, |
|
101
|
|
|
'prep' : prepRule, # << |
|
102
|
|
|
'poss' : 'R2', |
|
103
|
|
|
'possessive': impossible, |
|
104
|
|
|
'prt' : merge, |
|
105
|
|
|
'parataxis' : remove, |
|
106
|
|
|
'punct' : impossible, |
|
107
|
|
|
'ref' : impossible, |
|
108
|
|
|
'sdep' : impossible, |
|
109
|
|
|
'xsubj' : 'R3', |
|
110
|
|
|
'goeswith' : merge, |
|
111
|
|
|
'discourse' : remove, |
|
112
|
|
|
'compound' : merge, # new dependency type, to discuss... |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
1 |
|
def propagateType(t, qw): |
|
116
|
|
|
""" |
|
117
|
|
|
Propagate locally the type of the subtree |
|
118
|
|
|
""" |
|
119
|
1 |
|
if t.parent != None: |
|
120
|
1 |
|
if t.parent.subtreeType == 'undef': |
|
121
|
1 |
|
t.parent.subtreeType = t.subtreeType |
|
122
|
1 |
|
assert t.subtreeType == 'undef' or t.subtreeType == t.parent.subtreeType |
|
123
|
1 |
|
t.subtreeType = t.parent.subtreeType |
|
124
|
|
|
|
|
125
|
1 |
|
dependenciesMap2 = { # how to handle a -b-> c |
|
126
|
|
|
'R0' : propagateType, # normalize(c) |
|
127
|
|
|
'R1' : propagateType, # !c |
|
128
|
|
|
'R2' : ignore, # (normalize(c), !a, ?) |
|
129
|
|
|
'R3' : ignore, # (?, !a, normalize(c)) |
|
130
|
|
|
'RinstOf' : propagateType, # (?, instance of, c) |
|
131
|
|
|
'Rspl' : propagateType, # superlative |
|
132
|
|
|
'RconjT' : propagateType, # top of a conjunction relation |
|
133
|
|
|
'RconjB' : propagateType, # bottom of a conjunction relation |
|
134
|
|
|
'Rexist' : propagateType |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
1 |
|
def collapseMap(t, depMap, qw, down=True): |
|
138
|
|
|
""" |
|
139
|
|
|
Apply the rules of depMap to t |
|
140
|
|
|
If down = false, collapse from top to down, otherwise collapse from down to top |
|
141
|
|
|
""" |
|
142
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
|
143
|
1 |
|
if down: |
|
144
|
1 |
|
for c in temp: |
|
145
|
1 |
|
collapseMap(c, depMap, qw, down) |
|
146
|
1 |
|
try: |
|
147
|
1 |
|
if isinstance(depMap[t.dependency], str): |
|
148
|
1 |
|
t.dependency = depMap[t.dependency] |
|
149
|
|
|
else: |
|
150
|
1 |
|
depMap[t.dependency](t, qw) |
|
151
|
1 |
|
except KeyError: |
|
152
|
|
|
raise GrammaticalError(t.dependency, "unknown dependency") |
|
153
|
1 |
|
if not down: |
|
154
|
1 |
|
for c in temp: |
|
155
|
1 |
|
collapseMap(c, depMap, qw, down) |
|
156
|
|
|
|
|
157
|
|
|
########################## |
|
158
|
|
|
# Connectors rebalancing # |
|
159
|
|
|
########################## |
|
160
|
|
|
|
|
161
|
1 |
|
def connectorUp(t): |
|
162
|
|
|
""" |
|
163
|
|
|
Move amod connectors (superlative: first, biggest...) |
|
164
|
|
|
""" |
|
165
|
1 |
|
if t.dependency == 'connectorUp': |
|
166
|
1 |
|
assert t.parent is not None and t.child == [] |
|
167
|
1 |
|
t.dependency = t.parent.dependency |
|
168
|
1 |
|
t.parent.dependency = 'Rspl' |
|
169
|
1 |
|
t.parent.child.remove(t) |
|
170
|
1 |
|
t.child = [t.parent] |
|
171
|
1 |
|
t.parent.parent.child.remove(t.parent) |
|
172
|
1 |
|
t.parent.parent.child.append(t) |
|
173
|
1 |
|
parentTemp = t.parent.parent |
|
174
|
1 |
|
t.parent.parent = t |
|
175
|
1 |
|
t.parent = parentTemp |
|
176
|
|
|
else: |
|
177
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
|
178
|
1 |
|
for c in temp: |
|
179
|
1 |
|
connectorUp(c) |
|
180
|
|
|
|
|
181
|
1 |
|
def conjConnectorsUp(t): |
|
182
|
|
|
""" |
|
183
|
|
|
Move conjonction connectors (and, or, neg...) |
|
184
|
|
|
""" |
|
185
|
1 |
|
if not t.dependency.startswith('conj'): |
|
186
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
|
187
|
1 |
|
for c in temp: |
|
188
|
1 |
|
conjConnectorsUp(c) |
|
189
|
|
|
else: |
|
190
|
1 |
|
assert t.parent is not None |
|
191
|
1 |
|
depSave = t.dependency[t.dependency.index('_')+1:] |
|
192
|
1 |
|
parentTemp = None |
|
193
|
1 |
|
dupl = None |
|
194
|
1 |
|
newTree = None |
|
195
|
1 |
|
if len(t.parent.child) == 1: |
|
196
|
1 |
|
parentTemp = t.parent.parent |
|
197
|
1 |
|
t.dependency = t.parent.dependency |
|
198
|
1 |
|
t.parent.child.remove(t) |
|
199
|
1 |
|
dupl = deepcopy(parentTemp) |
|
200
|
1 |
|
parentTemp.child.remove(t.parent) |
|
201
|
1 |
|
parentTemp.child.append(t) |
|
202
|
1 |
|
t.parent = parentTemp |
|
203
|
1 |
|
newTree = DependenciesTree(depSave, dependency=parentTemp.dependency, child=[dupl, parentTemp], parent=parentTemp.parent) |
|
204
|
1 |
|
parentTemp.dependency = 'RconjB' |
|
205
|
1 |
|
parentTemp.parent = newTree |
|
206
|
|
|
else: |
|
207
|
1 |
|
parentTemp = t.parent |
|
208
|
1 |
|
parentTemp.child.remove(t) |
|
209
|
1 |
|
dupl = deepcopy(parentTemp) |
|
210
|
1 |
|
t.child += t.parent.child |
|
211
|
1 |
|
for n in t.child: |
|
212
|
1 |
|
n.parent = t |
|
213
|
1 |
|
newTree = DependenciesTree(depSave, dependency=parentTemp.dependency, child=[dupl, t], parent=parentTemp.parent) |
|
214
|
1 |
|
t.dependency = 'RconjB' |
|
215
|
1 |
|
t.parent = newTree |
|
216
|
1 |
|
newTree.parent.child.remove(parentTemp) |
|
217
|
1 |
|
newTree.parent.child.append(newTree) |
|
218
|
1 |
|
dupl.dependency = 'RconjT' |
|
219
|
1 |
|
dupl.parent = newTree |
|
220
|
1 |
|
temp = list(newTree.child) # copy, because t.child is changed while iterating |
|
221
|
1 |
|
for c in temp: |
|
222
|
1 |
|
conjConnectorsUp(c) |
|
223
|
|
|
|
|
224
|
|
|
################### |
|
225
|
|
|
# Global function # |
|
226
|
|
|
################### |
|
227
|
|
|
|
|
228
|
1 |
|
def simplify(t): |
|
229
|
|
|
""" |
|
230
|
|
|
identify and remove question word |
|
231
|
|
|
collapse dependencies of tree t |
|
232
|
|
|
""" |
|
233
|
1 |
|
qw = identifyQuestionWord(t) # identify and remove question word |
|
234
|
1 |
|
collapseMap(t, dependenciesMap1, qw) # collapse the tree according to dependenciesMap1 |
|
235
|
1 |
|
conjConnectorsUp(t) # remove conjonction connectors |
|
236
|
1 |
|
connectorUp(t) # remove remaining amod connectors |
|
237
|
1 |
|
questionWordDependencyTree(t, qw) # change the tree depending on the qw |
|
238
|
1 |
|
collapseMap(t, dependenciesMap2, qw) # propagate types from bottom to top |
|
239
|
1 |
|
collapseMap(t, dependenciesMap2, qw, False) # propagate types from top to bottom |
|
240
|
|
|
return qw |
|
241
|
|
|
|