1
|
1 |
|
import sys |
2
|
1 |
|
from .dependencyTree import Word, DependenciesTree |
3
|
1 |
|
from .data.questionWord import closeQuestionWord, openQuestionWord, strongQuestionWord, questionAdd, questionWIs, questionType, existQuestionWord, semiQuestionWord |
4
|
1 |
|
from ppp_datamodel import Resource, Triple, Missing, Intersection, List, Union, And, Or, Exists, Sort, Nth |
5
|
|
|
|
6
|
|
|
|
7
|
|
|
##################################### |
8
|
|
|
# Identify and remove question word # |
9
|
|
|
##################################### |
10
|
|
|
|
11
|
1 |
|
def prepareInstanceOf(t): |
12
|
|
|
""" |
13
|
|
|
Replace by 'inst_of' the highest dependency that appears on a path from the root of t to the root of the whole tree |
14
|
|
|
""" |
15
|
1 |
|
if t.parent and t.parent.dependency.lower() == 'root': |
16
|
1 |
|
t.dependency = 'inst_of' |
17
|
1 |
|
return |
18
|
1 |
|
elif t.parent: |
19
|
1 |
|
prepareInstanceOf(t.parent) |
20
|
|
|
|
21
|
1 |
|
def removeWord(t, word): |
22
|
|
|
""" |
23
|
|
|
Remove word (of type str*int = s*position_of_s_in_sentence) from tree t |
24
|
|
|
""" |
25
|
1 |
|
if word in t.wordList: |
26
|
1 |
|
prepareInstanceOf(t) # <<< |
27
|
1 |
|
for u in t.child: # the word is in the middle of the tree |
28
|
1 |
|
u.dependency = t.dependency |
29
|
1 |
|
u.parent = t.parent |
30
|
1 |
|
t.parent.child.append(u) |
31
|
1 |
|
t.parent.child.remove(t) |
32
|
|
|
else: |
33
|
1 |
|
for c in t.child: |
34
|
1 |
|
removeWord(c, word) |
35
|
|
|
|
36
|
1 |
|
def firstWords(t, start): |
37
|
|
|
""" |
38
|
|
|
Put the 2 first words of the sentence (if they are in the tree) in start (list of size 2) |
39
|
|
|
""" |
40
|
1 |
|
for n in t.wordList: |
41
|
1 |
|
if n.index == 1: |
42
|
1 |
|
start[0] = n |
43
|
1 |
|
elif n.index == 2: |
44
|
1 |
|
start[1] = n |
45
|
1 |
|
for c in t.child: |
46
|
1 |
|
firstWords(c, start) |
47
|
|
|
|
48
|
1 |
|
def identifyQuestionWord(t): |
49
|
|
|
""" |
50
|
|
|
Identify, remove (if necessary) and return the question word. |
51
|
|
|
If there is no question word, return None. |
52
|
|
|
""" |
53
|
1 |
|
start = [None, None] |
54
|
1 |
|
firstWords(t, start) |
55
|
1 |
|
try: # the first words are not in the tree, we extract them directly from the sentence |
56
|
1 |
|
start[0] = start[0] or Word(t.text.split(' ', 1)[0], 1) |
57
|
1 |
|
start[1] = start[1] or Word(t.text.split(' ', 1)[1], 2) |
58
|
1 |
|
except IndexError: |
59
|
1 |
|
pass |
60
|
1 |
|
if start[1]: |
61
|
1 |
|
w = start[0].word.lower() + ' ' + start[1].word.lower() |
62
|
1 |
|
if w in openQuestionWord or w in semiQuestionWord or w in existQuestionWord: |
63
|
1 |
|
removeWord(t, start[0]) |
64
|
1 |
|
removeWord(t, start[1]) |
65
|
1 |
|
return w |
66
|
1 |
|
w = start[0].word.lower() |
67
|
1 |
|
if w in openQuestionWord or w in semiQuestionWord: |
68
|
1 |
|
removeWord(t, start[0]) |
69
|
1 |
|
return w |
70
|
1 |
|
if w in closeQuestionWord: |
71
|
1 |
|
return w |
72
|
1 |
|
return None |
73
|
|
|
|
74
|
|
|
######################################################## |
75
|
|
|
# Process question word to improve the dependency tree # |
76
|
|
|
######################################################## |
77
|
|
|
|
78
|
1 |
|
def processQuestionType(t, w, typeMap=questionType): |
79
|
|
|
""" |
80
|
|
|
Add a type to the root of the tree (= type of the answer) depending on the question word |
81
|
|
|
""" |
82
|
1 |
|
try: |
83
|
1 |
|
t.subtreeType = typeMap[w] |
84
|
1 |
|
except KeyError: |
85
|
1 |
|
pass |
86
|
|
|
|
87
|
1 |
|
def questionWordDependencyTree(t, w): |
88
|
1 |
|
processQuestionType(t, w) # type the ROOT according to the question word |
89
|
1 |
|
if w in existQuestionWord: # prepare the production of an Exists node |
90
|
1 |
|
t.child[0].dependency = 'Rexist' |
91
|
|
|
|
92
|
|
|
#################################################### |
93
|
|
|
# Process question word to improve the normal form # |
94
|
|
|
#################################################### |
95
|
|
|
|
96
|
1 |
|
def extractPredicates(nf): |
97
|
|
|
""" |
98
|
|
|
Assume that nf is a triple |
99
|
|
|
Returns the lists of strings (values) that are predicates of the triple |
100
|
|
|
""" |
101
|
1 |
|
if isinstance(nf.predicate, Resource): |
102
|
1 |
|
return [nf.predicate.value] |
103
|
|
|
else: |
104
|
1 |
|
return [x.value for x in nf.predicate.list] |
105
|
|
|
|
106
|
1 |
|
def enhanceTriple(nf, w, addMap=questionAdd, wisMap=questionWIs): |
107
|
|
|
""" |
108
|
|
|
Add info into the triple depending on the question word |
109
|
|
|
""" |
110
|
1 |
|
predList = extractPredicates(nf) |
111
|
1 |
|
try: |
112
|
1 |
|
if 'identity' in predList: |
113
|
1 |
|
if w in strongQuestionWord or isinstance(nf.subject, Resource) or isinstance(nf.object, Resource): # strong qw or triple of depth 1 |
114
|
1 |
|
return Triple(nf.subject, List([Resource(x) for x in wisMap[w]]), nf.object) # !! Other info lost (type...) (inverse_predicate: not relevant) |
115
|
|
|
else: # delete the first level |
116
|
1 |
|
if isinstance(nf.subject, Missing): |
117
|
|
|
return nf.object |
118
|
|
|
else: |
119
|
1 |
|
return nf.subject |
120
|
1 |
|
elif not 'instance of' in predList: # add info into the predicates list (except for instance_of predicate) |
121
|
1 |
|
return Triple(nf.subject, List([Resource(x) for x in predList] + [Resource(x+' '+y) for x in predList for y in addMap[w]]), nf.object, nf.inverse_predicate) # !! Other info lost (type...) (reverse_predicate not enhance?) |
122
|
|
|
else: |
123
|
1 |
|
return nf |
124
|
1 |
|
except KeyError: |
125
|
1 |
|
return nf |
126
|
|
|
|
127
|
1 |
|
def processQuestionInfo(nf, w): |
128
|
|
|
""" |
129
|
|
|
Add info into the first triples depending on the question word |
130
|
|
|
""" |
131
|
1 |
|
if isinstance(nf, (List, Intersection, Union, And, Or)): |
132
|
1 |
|
result = [] |
133
|
1 |
|
for u in nf.list: |
134
|
1 |
|
result.append(processQuestionInfo(u, w)) |
135
|
1 |
|
return type(nf)(result) |
136
|
1 |
|
elif isinstance(nf, (Nth, Exists)): |
137
|
1 |
|
return type(nf)(processQuestionInfo(nf.list, w)) |
138
|
1 |
|
elif isinstance(nf, Sort) or isinstance(nf, Resource): |
139
|
1 |
|
return nf |
140
|
1 |
|
elif isinstance(nf, Triple): |
141
|
1 |
|
return enhanceTriple(nf, w) |
142
|
|
|
else: |
143
|
|
|
assert False |
144
|
|
|
|
145
|
1 |
|
def questionWordNormalForm(nf, w): |
146
|
|
|
""" |
147
|
|
|
Improve the normal form using the question word |
148
|
|
|
""" |
149
|
1 |
|
if w in openQuestionWord: |
150
|
1 |
|
return processQuestionInfo(nf, w) |
151
|
|
|
return nf |
152
|
|
|
|