Completed
Push — master ( 4c4643...bdfd16 )
by Tom
05:30
created

identifyQuestionWord()   F

Complexity

Conditions 9

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 21
CRAP Score 9

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 9
c 2
b 0
f 0
dl 0
loc 25
ccs 21
cts 21
cp 1
crap 9
rs 3
1 1
import sys
2 1
from .dependencyTree import Word, DependenciesTree
3 1
from .data.questionWord import closeQuestionWord, openQuestionWord, strongQuestionWord, questionAdd, questionWIs, questionType, existQuestionWord, semiQuestionWord
4 1
from ppp_datamodel import Resource, Triple, Missing, Intersection, List, Union, And, Or, Exists, First, Last, Sort
5
6
#####################################
7
# Identify and remove question word #
8
#####################################
9
10 1
def prepareInstanceOf(t):
11
    """
12
        Replace by 'inst_of' the highest dependency that appears on a path from the root of t to the root of the whole tree
13
    """
14 1
    if t.parent and t.parent.dependency.lower() == 'root':
15 1
        t.dependency = 'inst_of'
16 1
        return
17 1
    elif t.parent:
18 1
        prepareInstanceOf(t.parent)
19
20 1
def removeWord(t, word):
21
    """
22
        Remove word (of type str*int = s*position_of_s_in_sentence) from tree t
23
    """
24 1
    if word in t.wordList:
25 1
        prepareInstanceOf(t) # <<<
26 1
        for u in t.child: # the word is in the middle of the tree
27 1
            u.dependency = t.dependency
28 1
            u.parent = t.parent
29 1
            t.parent.child.append(u)
30 1
        t.parent.child.remove(t)
31
    else:
32 1
        for c in t.child:
33 1
            removeWord(c, word)
34
35 1
def firstWords(t, start):
36
    """
37
        Put the 2 first words of the sentence (if they are in the tree) in start (list of size 2)
38
    """
39 1
    for n in t.wordList:
40 1
        if n.index == 1:
41 1
            start[0] = n
42 1
        elif n.index == 2:
43 1
            start[1] = n
44 1
    for c in t.child:
45 1
        firstWords(c, start)
46
47 1
def identifyQuestionWord(t):
48
    """
49
        Identify, remove (if necessary) and return the question word.
50
        If there is no question word, return None.
51
    """
52 1
    start = [None, None]
53 1
    firstWords(t, start)
54 1
    try: # the first words are not in the tree, we extract them directly from the sentence
55 1
        start[0] = start[0] or Word(t.text.split(' ', 1)[0], 1)
56 1
        start[1] = start[1] or Word(t.text.split(' ', 1)[1], 2)
57 1
    except IndexError:
58 1
        pass
59 1
    if start[1]:
60 1
        w = start[0].word.lower() + ' ' + start[1].word.lower()
61 1
        if w in openQuestionWord or w in semiQuestionWord or w in existQuestionWord:
62 1
            removeWord(t, start[0])
63 1
            removeWord(t, start[1])
64 1
            return w
65 1
    w = start[0].word.lower()
66 1
    if w in openQuestionWord or w in semiQuestionWord:
67 1
        removeWord(t, start[0])
68 1
        return w
69 1
    if w in closeQuestionWord:
70 1
        return w
71 1
    return None
72
73
########################################################
74
# Process question word to improve the dependency tree #
75
########################################################
76
77 1
def processQuestionType(t, w, typeMap=questionType):
78
    """
79
        Add a type to the root of the tree (= type of the answer) depending on the question word
80
    """
81 1
    try:
82 1
        t.subtreeType = typeMap[w]
83 1
    except KeyError:
84 1
        pass
85
86 1
def questionWordDependencyTree(t, w):
87 1
    processQuestionType(t, w)  # type the ROOT according to the question word
88 1
    if w in existQuestionWord: # prepare the production of an Exists node
89 1
        t.child[0].dependency = 'Rexist'
90
91
####################################################
92
# Process question word to improve the normal form #
93
####################################################
94
95 1
def extractPredicates(nf):
96
    """
97
        Assume that nf is a triple
98
        Returns the lists of strings (values) that are predicates of the triple
99
    """
100
    if isinstance(nf.predicate, Resource):
101
        return [nf.predicate.value]
102
    else:
103
        return [x.value for x in nf.predicate.list]
104
105 1
def enhanceTriple(nf, w, addMap=questionAdd, wisMap=questionWIs):
106
    """
107
        Add info into the triple depending on the question word
108
    """
109
    predList = extractPredicates(nf)
110
    try:
111
        if 'identity' in predList:
112
             if w in strongQuestionWord or isinstance(nf.subject, Resource) or isinstance(nf.object, Resource): # strong qw or triple of depth 1
113
                 return Triple(nf.subject, List([Resource(x) for x in wisMap[w]]), nf.object) # !! Other info lost (type...) (inverse_predicate: not relevant)
114
             else: # delete the first level
115
                if isinstance(nf.subject, Missing):
116
                    return nf.object
117
                else:
118
                    return nf.subject
119
        elif not 'instance of' in predList: # add info into the predicates list (except for instance_of predicate)
120
             return Triple(nf.subject, List([Resource(x) for x in predList] + [Resource(x+' '+y) for x in predList for y in addMap[w]]), nf.object, nf.inverse_predicate) # !! Other info lost (type...) (reverse_predicate not enhance?)
121
        else:
122
            return nf
123
    except KeyError:
124
         return nf
125
126 1
def processQuestionInfo(nf, w):
127
    """
128
        Add info into the first triples depending on the question word
129
    """
130 1
    if isinstance(nf, (List, Intersection, Union, And, Or)):
131 1
        result = []
132 1
        for u in nf.list:
133 1
            result.append(processQuestionInfo(u, w))
134
        return type(nf)(result)
135 1
    elif isinstance(nf, (Last, First, Exists)):
136
        return type(nf)(processQuestionInfo(nf.list, w))
137
    elif isinstance(nf, Sort) or isinstance(nf, Resource):
138
        return nf
139
    elif isinstance(nf, Triple):
140
        return enhanceTriple(nf, w)
141
    else:
142
        assert False
143
144 1
def questionWordNormalForm(nf, w):
145
    """
146
        Improve the normal form using the question word
147
    """
148 1
    if w in openQuestionWord:
149 1
        return processQuestionInfo(nf, w)
150
    return nf
151