1
|
1 |
|
import sys |
2
|
1 |
|
from .questionWordProcessing import identifyQuestionWord, questionWordDependencyTree |
3
|
1 |
|
from .dependencyTree import Word, DependenciesTree |
4
|
1 |
|
from copy import deepcopy |
5
|
1 |
|
from .data.exceptions import GrammaticalError |
6
|
|
|
|
7
|
|
|
############################## |
8
|
|
|
# General analysis functions # |
9
|
|
|
############################## |
10
|
|
|
|
11
|
1 |
|
def remove(t, qw): |
12
|
1 |
|
t.parent.child.remove(t) |
13
|
|
|
|
14
|
1 |
|
def impossible(t, qw): |
15
|
1 |
|
raise GrammaticalError(t.dependency, "unexpected dependency") |
16
|
|
|
|
17
|
1 |
|
def ignore(t, qw): |
18
|
1 |
|
pass |
19
|
|
|
|
20
|
1 |
|
def merge(t, qw): |
21
|
1 |
|
t.parent.merge(t, True) |
22
|
|
|
|
23
|
|
|
############################## |
24
|
|
|
# Special analysis functions # |
25
|
|
|
############################## |
26
|
|
|
|
27
|
1 |
|
def amodRule(t, qw): |
28
|
1 |
|
if t.wordList[0].pos == 'JJ': |
29
|
1 |
|
if len(t.child) > 0 and t.child[0].wordList[0].pos == 'RBS': # superlative of type "most popular, most beautiful, ..." |
30
|
1 |
|
assert len(t.child) == 1 and len(t.child[0].child) == 0 |
31
|
1 |
|
merge(t.child[0], qw) |
32
|
1 |
|
t.dependency = 'connectorUp' |
33
|
1 |
|
return |
34
|
1 |
|
if t.namedEntityTag != 'ORDINAL' and t.wordList[0].pos != 'JJS': # [0] : must be improved? (search in the whole list?) |
35
|
1 |
|
assert t.parent is not None |
36
|
1 |
|
merge(t, qw) |
37
|
|
|
else: |
38
|
1 |
|
t.dependency = 'connectorUp' # superlative of type "biggest, deepest,..." |
39
|
|
|
|
40
|
1 |
|
def prepRule(t, qw): |
41
|
1 |
|
if t.parent.wordList[0].pos[0] == 'V': |
42
|
1 |
|
t.dependency = 'R3' |
43
|
|
|
else: |
44
|
1 |
|
t.dependency = 'R2' |
45
|
|
|
|
46
|
|
|
########################## |
47
|
|
|
# General analysis rules # |
48
|
|
|
########################## |
49
|
|
|
|
50
|
1 |
|
dependenciesMap1 = { |
51
|
|
|
'undef' : 'R0', |
52
|
|
|
'ROOT' : 'R0', |
53
|
|
|
'inst_of' : 'RinstOf', # << |
54
|
|
|
'nmod' : impossible, # new dependency type, to discuss... |
55
|
|
|
'dep' : 'R1', |
56
|
|
|
'aux' : remove, |
57
|
|
|
'auxpass' : remove, |
58
|
|
|
'cop' : impossible, |
59
|
|
|
'arg' : impossible, |
60
|
|
|
'agent' : 'R3', # << |
61
|
|
|
'comp' : 'R3', |
62
|
|
|
'acomp' : 'R3', |
63
|
|
|
'ccomp' : 'R2', |
64
|
|
|
'xcomp' : 'R2', |
65
|
|
|
'pcomp' : 'R3', |
66
|
|
|
'obj' : impossible, |
67
|
|
|
'dobj' : 'R3', |
68
|
|
|
'iobj' : 'R3', |
69
|
|
|
'pobj' : 'R3', |
70
|
|
|
'subj' : impossible, |
71
|
|
|
'nsubj' : 'R2', # << |
72
|
|
|
'nsubjpass' : 'R2', # << |
73
|
|
|
'csubj' : impossible, |
74
|
|
|
'csubjpass' : impossible, |
75
|
|
|
'cc' : impossible, |
76
|
|
|
'conj' : 'R0', |
77
|
|
|
'conj_and' : ignore, |
78
|
|
|
'conj_or' : ignore, |
79
|
|
|
'conj_negcc': ignore, |
80
|
|
|
'expl' : remove, |
81
|
|
|
'mod' : impossible, |
82
|
|
|
'amod' : amodRule, |
83
|
|
|
'appos' : 'R0', # << |
84
|
|
|
'advcl' : 'R2', |
85
|
|
|
'det' : remove, |
86
|
|
|
'predet' : remove, |
87
|
|
|
'preconj' : remove, |
88
|
|
|
'vmod' : 'R3', |
89
|
|
|
'mwe' : merge, |
90
|
|
|
'mark' : remove, |
91
|
|
|
'advmod' : 'R2', |
92
|
|
|
'neg' : 'connectorUp', # need a NOT node |
93
|
|
|
'rcmod' : 'R2', |
94
|
|
|
'quantmod' : remove, |
95
|
|
|
'nn' : merge, |
96
|
|
|
'npadvmod' : 'R2', |
97
|
|
|
'tmod' : 'R3', |
98
|
|
|
'num' : merge, |
99
|
|
|
'nummod' : merge, |
100
|
|
|
'number' : merge, |
101
|
|
|
'prep' : prepRule, # << |
102
|
|
|
'poss' : 'R2', |
103
|
|
|
'possessive': impossible, |
104
|
|
|
'prt' : merge, |
105
|
|
|
'parataxis' : remove, |
106
|
|
|
'punct' : impossible, |
107
|
|
|
'ref' : impossible, |
108
|
|
|
'sdep' : impossible, |
109
|
|
|
'xsubj' : 'R3', |
110
|
|
|
'goeswith' : merge, |
111
|
|
|
'discourse' : remove, |
112
|
|
|
'compound' : merge, # new dependency type, to discuss... |
113
|
|
|
} |
114
|
|
|
|
115
|
1 |
|
def propagateType(t, qw): |
116
|
|
|
""" |
117
|
|
|
Propagate locally the type of the subtree |
118
|
|
|
""" |
119
|
1 |
|
if t.parent != None: |
120
|
1 |
|
if t.parent.subtreeType == 'undef': |
121
|
1 |
|
t.parent.subtreeType = t.subtreeType |
122
|
1 |
|
assert t.subtreeType == 'undef' or t.subtreeType == t.parent.subtreeType |
123
|
1 |
|
t.subtreeType = t.parent.subtreeType |
124
|
|
|
|
125
|
1 |
|
dependenciesMap2 = { # how to handle a -b-> c |
126
|
|
|
'R0' : propagateType, # normalize(c) |
127
|
|
|
'R1' : propagateType, # !c |
128
|
|
|
'R2' : ignore, # (normalize(c), !a, ?) |
129
|
|
|
'R3' : ignore, # (?, !a, normalize(c)) |
130
|
|
|
'RinstOf' : propagateType, # (?, instance of, c) |
131
|
|
|
'Rspl' : propagateType, # superlative |
132
|
|
|
'RconjT' : propagateType, # top of a conjunction relation |
133
|
|
|
'RconjB' : propagateType, # bottom of a conjunction relation |
134
|
|
|
'Rexist' : propagateType |
135
|
|
|
} |
136
|
|
|
|
137
|
1 |
|
def collapseMap(t, depMap, qw, down=True): |
138
|
|
|
""" |
139
|
|
|
Apply the rules of depMap to t |
140
|
|
|
If down = false, collapse from top to down, otherwise collapse from down to top |
141
|
|
|
""" |
142
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
143
|
1 |
|
if down: |
144
|
1 |
|
for c in temp: |
145
|
1 |
|
collapseMap(c, depMap, qw, down) |
146
|
1 |
|
try: |
147
|
1 |
|
if isinstance(depMap[t.dependency], str): |
148
|
1 |
|
t.dependency = depMap[t.dependency] |
149
|
|
|
else: |
150
|
1 |
|
depMap[t.dependency](t, qw) |
151
|
1 |
|
except KeyError: |
152
|
|
|
raise GrammaticalError(t.dependency, "unknown dependency") |
153
|
1 |
|
if not down: |
154
|
1 |
|
for c in temp: |
155
|
1 |
|
collapseMap(c, depMap, qw, down) |
156
|
|
|
|
157
|
|
|
########################## |
158
|
|
|
# Connectors rebalancing # |
159
|
|
|
########################## |
160
|
|
|
|
161
|
1 |
|
def connectorUp(t): |
162
|
|
|
""" |
163
|
|
|
Move amod connectors (superlative: first, biggest...) |
164
|
|
|
""" |
165
|
1 |
|
if t.dependency == 'connectorUp': |
166
|
1 |
|
assert t.parent is not None and t.child == [] |
167
|
1 |
|
t.dependency = t.parent.dependency |
168
|
1 |
|
t.parent.dependency = 'Rspl' |
169
|
1 |
|
t.parent.child.remove(t) |
170
|
1 |
|
t.child = [t.parent] |
171
|
1 |
|
t.parent.parent.child.remove(t.parent) |
172
|
1 |
|
t.parent.parent.child.append(t) |
173
|
1 |
|
parentTemp = t.parent.parent |
174
|
1 |
|
t.parent.parent = t |
175
|
1 |
|
t.parent = parentTemp |
176
|
|
|
else: |
177
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
178
|
1 |
|
for c in temp: |
179
|
1 |
|
connectorUp(c) |
180
|
|
|
|
181
|
1 |
|
def conjConnectorsUp(t): |
182
|
|
|
""" |
183
|
|
|
Move conjonction connectors (and, or, neg...) |
184
|
|
|
""" |
185
|
1 |
|
if not t.dependency.startswith('conj'): |
186
|
1 |
|
temp = list(t.child) # copy, because t.child is changed while iterating |
187
|
1 |
|
for c in temp: |
188
|
1 |
|
conjConnectorsUp(c) |
189
|
|
|
else: |
190
|
1 |
|
assert t.parent is not None |
191
|
1 |
|
depSave = t.dependency[t.dependency.index('_')+1:] |
192
|
1 |
|
parentTemp = None |
193
|
1 |
|
dupl = None |
194
|
1 |
|
newTree = None |
195
|
1 |
|
if len(t.parent.child) == 1: |
196
|
1 |
|
parentTemp = t.parent.parent |
197
|
1 |
|
t.dependency = t.parent.dependency |
198
|
1 |
|
t.parent.child.remove(t) |
199
|
1 |
|
dupl = deepcopy(parentTemp) |
200
|
1 |
|
parentTemp.child.remove(t.parent) |
201
|
1 |
|
parentTemp.child.append(t) |
202
|
1 |
|
t.parent = parentTemp |
203
|
1 |
|
newTree = DependenciesTree(depSave, dependency=parentTemp.dependency, child=[dupl, parentTemp], parent=parentTemp.parent) |
204
|
1 |
|
parentTemp.dependency = 'RconjB' |
205
|
1 |
|
parentTemp.parent = newTree |
206
|
|
|
else: |
207
|
1 |
|
parentTemp = t.parent |
208
|
1 |
|
parentTemp.child.remove(t) |
209
|
1 |
|
dupl = deepcopy(parentTemp) |
210
|
1 |
|
t.child += t.parent.child |
211
|
1 |
|
for n in t.child: |
212
|
1 |
|
n.parent = t |
213
|
1 |
|
newTree = DependenciesTree(depSave, dependency=parentTemp.dependency, child=[dupl, t], parent=parentTemp.parent) |
214
|
1 |
|
t.dependency = 'RconjB' |
215
|
1 |
|
t.parent = newTree |
216
|
1 |
|
newTree.parent.child.remove(parentTemp) |
217
|
1 |
|
newTree.parent.child.append(newTree) |
218
|
1 |
|
dupl.dependency = 'RconjT' |
219
|
1 |
|
dupl.parent = newTree |
220
|
1 |
|
temp = list(newTree.child) # copy, because t.child is changed while iterating |
221
|
1 |
|
for c in temp: |
222
|
1 |
|
conjConnectorsUp(c) |
223
|
|
|
|
224
|
|
|
################### |
225
|
|
|
# Global function # |
226
|
|
|
################### |
227
|
|
|
|
228
|
1 |
|
def simplify(t): |
229
|
|
|
""" |
230
|
|
|
identify and remove question word |
231
|
|
|
collapse dependencies of tree t |
232
|
|
|
""" |
233
|
1 |
|
qw = identifyQuestionWord(t) # identify and remove question word |
234
|
1 |
|
collapseMap(t, dependenciesMap1, qw) # collapse the tree according to dependenciesMap1 |
235
|
1 |
|
conjConnectorsUp(t) # remove conjonction connectors |
236
|
1 |
|
connectorUp(t) # remove remaining amod connectors |
237
|
1 |
|
questionWordDependencyTree(t, qw) # change the tree depending on the qw |
238
|
1 |
|
collapseMap(t, dependenciesMap2, qw) # propagate types from bottom to top |
239
|
1 |
|
collapseMap(t, dependenciesMap2, qw, False) # propagate types from top to bottom |
240
|
|
|
return qw |
241
|
|
|
|