|
1
|
1 |
|
import sys |
|
2
|
1 |
|
from .dependencyTree import Word, DependenciesTree |
|
3
|
1 |
|
from .data.questionWord import closeQuestionWord, openQuestionWord, strongQuestionWord, questionAdd, questionWIs, questionType, existQuestionWord, semiQuestionWord |
|
4
|
1 |
|
from ppp_datamodel import Resource, Triple, Missing, Intersection, List, Union, And, Or, Exists, Sort, Nth |
|
5
|
|
|
|
|
6
|
|
|
|
|
7
|
|
|
##################################### |
|
8
|
|
|
# Identify and remove question word # |
|
9
|
|
|
##################################### |
|
10
|
|
|
|
|
11
|
1 |
|
def prepareInstanceOf(t): |
|
12
|
|
|
""" |
|
13
|
|
|
Replace by 'inst_of' the highest dependency that appears on a path from the root of t to the root of the whole tree |
|
14
|
|
|
""" |
|
15
|
1 |
|
if t.parent and t.parent.dependency.lower() == 'root': |
|
16
|
1 |
|
t.dependency = 'inst_of' |
|
17
|
1 |
|
return |
|
18
|
1 |
|
elif t.parent: |
|
19
|
1 |
|
prepareInstanceOf(t.parent) |
|
20
|
|
|
|
|
21
|
1 |
|
def removeWord(t, word): |
|
22
|
|
|
""" |
|
23
|
|
|
Remove word (of type str*int = s*position_of_s_in_sentence) from tree t |
|
24
|
|
|
""" |
|
25
|
1 |
|
if word in t.wordList: |
|
26
|
1 |
|
prepareInstanceOf(t) # <<< |
|
27
|
1 |
|
for u in t.child: # the word is in the middle of the tree |
|
28
|
1 |
|
u.dependency = t.dependency |
|
29
|
1 |
|
u.parent = t.parent |
|
30
|
1 |
|
t.parent.child.append(u) |
|
31
|
1 |
|
t.parent.child.remove(t) |
|
32
|
|
|
else: |
|
33
|
1 |
|
for c in t.child: |
|
34
|
1 |
|
removeWord(c, word) |
|
35
|
|
|
|
|
36
|
1 |
|
def firstWords(t, start): |
|
37
|
|
|
""" |
|
38
|
|
|
Put the 2 first words of the sentence (if they are in the tree) in start (list of size 2) |
|
39
|
|
|
""" |
|
40
|
1 |
|
for n in t.wordList: |
|
41
|
1 |
|
if n.index == 1: |
|
42
|
1 |
|
start[0] = n |
|
43
|
1 |
|
elif n.index == 2: |
|
44
|
1 |
|
start[1] = n |
|
45
|
1 |
|
for c in t.child: |
|
46
|
1 |
|
firstWords(c, start) |
|
47
|
|
|
|
|
48
|
1 |
|
def identifyQuestionWord(t): |
|
49
|
|
|
""" |
|
50
|
|
|
Identify, remove (if necessary) and return the question word. |
|
51
|
|
|
If there is no question word, return None. |
|
52
|
|
|
""" |
|
53
|
1 |
|
start = [None, None] |
|
54
|
1 |
|
firstWords(t, start) |
|
55
|
1 |
|
try: # the first words are not in the tree, we extract them directly from the sentence |
|
56
|
1 |
|
start[0] = start[0] or Word(t.text.split(' ', 1)[0], 1) |
|
57
|
1 |
|
start[1] = start[1] or Word(t.text.split(' ', 1)[1], 2) |
|
58
|
1 |
|
except IndexError: |
|
59
|
1 |
|
pass |
|
60
|
1 |
|
if start[1]: |
|
61
|
1 |
|
w = start[0].word.lower() + ' ' + start[1].word.lower() |
|
62
|
1 |
|
if w in openQuestionWord or w in semiQuestionWord or w in existQuestionWord: |
|
63
|
1 |
|
removeWord(t, start[0]) |
|
64
|
1 |
|
removeWord(t, start[1]) |
|
65
|
1 |
|
return w |
|
66
|
1 |
|
w = start[0].word.lower() |
|
67
|
1 |
|
if w in openQuestionWord or w in semiQuestionWord: |
|
68
|
1 |
|
removeWord(t, start[0]) |
|
69
|
1 |
|
return w |
|
70
|
1 |
|
if w in closeQuestionWord: |
|
71
|
1 |
|
return w |
|
72
|
1 |
|
return None |
|
73
|
|
|
|
|
74
|
|
|
######################################################## |
|
75
|
|
|
# Process question word to improve the dependency tree # |
|
76
|
|
|
######################################################## |
|
77
|
|
|
|
|
78
|
1 |
|
def processQuestionType(t, w, typeMap=questionType): |
|
79
|
|
|
""" |
|
80
|
|
|
Add a type to the root of the tree (= type of the answer) depending on the question word |
|
81
|
|
|
""" |
|
82
|
1 |
|
try: |
|
83
|
1 |
|
t.subtreeType = typeMap[w] |
|
84
|
1 |
|
except KeyError: |
|
85
|
1 |
|
pass |
|
86
|
|
|
|
|
87
|
1 |
|
def questionWordDependencyTree(t, w): |
|
88
|
1 |
|
processQuestionType(t, w) # type the ROOT according to the question word |
|
89
|
1 |
|
if w in existQuestionWord: # prepare the production of an Exists node |
|
90
|
1 |
|
t.child[0].dependency = 'Rexist' |
|
91
|
|
|
|
|
92
|
|
|
#################################################### |
|
93
|
|
|
# Process question word to improve the normal form # |
|
94
|
|
|
#################################################### |
|
95
|
|
|
|
|
96
|
1 |
|
def extractPredicates(nf): |
|
97
|
|
|
""" |
|
98
|
|
|
Assume that nf is a triple |
|
99
|
|
|
Returns the lists of strings (values) that are predicates of the triple |
|
100
|
|
|
""" |
|
101
|
1 |
|
if isinstance(nf.predicate, Resource): |
|
102
|
1 |
|
return [nf.predicate.value] |
|
103
|
|
|
else: |
|
104
|
1 |
|
return [x.value for x in nf.predicate.list] |
|
105
|
|
|
|
|
106
|
1 |
|
def enhanceTriple(nf, w, addMap=questionAdd, wisMap=questionWIs): |
|
107
|
|
|
""" |
|
108
|
|
|
Add info into the triple depending on the question word |
|
109
|
|
|
""" |
|
110
|
1 |
|
predList = extractPredicates(nf) |
|
111
|
1 |
|
try: |
|
112
|
1 |
|
if 'identity' in predList: |
|
113
|
1 |
|
if w in strongQuestionWord or isinstance(nf.subject, Resource) or isinstance(nf.object, Resource): # strong qw or triple of depth 1 |
|
114
|
1 |
|
return Triple(nf.subject, List([Resource(x) for x in wisMap[w]]), nf.object) # !! Other info lost (type...) (inverse_predicate: not relevant) |
|
115
|
|
|
else: # delete the first level |
|
116
|
1 |
|
if isinstance(nf.subject, Missing): |
|
117
|
|
|
return nf.object |
|
118
|
|
|
else: |
|
119
|
1 |
|
return nf.subject |
|
120
|
1 |
|
elif not 'instance of' in predList: # add info into the predicates list (except for instance_of predicate) |
|
121
|
1 |
|
return Triple(nf.subject, List([Resource(x) for x in predList] + [Resource(x+' '+y) for x in predList for y in addMap[w]]), nf.object, nf.inverse_predicate) # !! Other info lost (type...) (reverse_predicate not enhance?) |
|
122
|
|
|
else: |
|
123
|
1 |
|
return nf |
|
124
|
1 |
|
except KeyError: |
|
125
|
1 |
|
return nf |
|
126
|
|
|
|
|
127
|
1 |
|
def processQuestionInfo(nf, w): |
|
128
|
|
|
""" |
|
129
|
|
|
Add info into the first triples depending on the question word |
|
130
|
|
|
""" |
|
131
|
1 |
|
if isinstance(nf, (List, Intersection, Union, And, Or)): |
|
132
|
1 |
|
result = [] |
|
133
|
1 |
|
for u in nf.list: |
|
134
|
1 |
|
result.append(processQuestionInfo(u, w)) |
|
135
|
1 |
|
return type(nf)(result) |
|
136
|
1 |
|
elif isinstance(nf, (Nth, Exists)): |
|
137
|
1 |
|
return type(nf)(processQuestionInfo(nf.list, w)) |
|
138
|
1 |
|
elif isinstance(nf, Sort) or isinstance(nf, Resource): |
|
139
|
1 |
|
return nf |
|
140
|
1 |
|
elif isinstance(nf, Triple): |
|
141
|
1 |
|
return enhanceTriple(nf, w) |
|
142
|
|
|
else: |
|
143
|
|
|
assert False |
|
144
|
|
|
|
|
145
|
1 |
|
def questionWordNormalForm(nf, w): |
|
146
|
|
|
""" |
|
147
|
|
|
Improve the normal form using the question word |
|
148
|
|
|
""" |
|
149
|
1 |
|
if w in openQuestionWord: |
|
150
|
1 |
|
return processQuestionInfo(nf, w) |
|
151
|
|
|
return nf |
|
152
|
|
|
|