|
1
|
|
|
"""Request handler of the module.""" |
|
2
|
|
|
|
|
3
|
1 |
|
import json |
|
4
|
1 |
|
import pickle |
|
5
|
1 |
|
import hashlib |
|
6
|
1 |
|
import random |
|
7
|
1 |
|
import logging |
|
8
|
1 |
|
import requests |
|
9
|
|
|
|
|
10
|
|
|
# Import pylibmc if possible; import memcache otherwise. |
|
11
|
|
|
# pylibmc is is more strict (ie. detects and raises errors instead |
|
12
|
|
|
# of just ignoring them), but is not compatible with Pypy. |
|
13
|
1 |
|
try: |
|
14
|
1 |
|
import pylibmc as memcache |
|
15
|
1 |
|
except ImportError: |
|
16
|
1 |
|
try: |
|
17
|
1 |
|
import memcache |
|
18
|
|
|
except ImportError: |
|
19
|
|
|
raise ImportError('Neither pylibmc or python3-memcached is installed') |
|
20
|
|
|
|
|
21
|
1 |
|
from ppp_datamodel import Sentence, Resource, List |
|
22
|
1 |
|
from ppp_datamodel.communication import TraceItem, Response |
|
23
|
1 |
|
from ppp_libmodule.exceptions import ClientError |
|
24
|
|
|
|
|
25
|
1 |
|
from .config import Config |
|
26
|
1 |
|
from . import computeTree, simplify, normalFormProduction, QuotationHandler,\ |
|
27
|
|
|
QuotationError, NamedEntityMerging, PrepositionMerging |
|
28
|
|
|
|
|
29
|
1 |
|
def connect_memcached(): |
|
30
|
1 |
|
mc = memcache.Client(Config().memcached_servers) |
|
31
|
1 |
|
return mc |
|
32
|
|
|
|
|
33
|
1 |
|
class StanfordNLP: |
|
34
|
1 |
|
def __init__(self, urls): |
|
35
|
1 |
|
self.servers = urls |
|
36
|
|
|
|
|
37
|
1 |
|
def _parse(self, text): |
|
38
|
|
|
''' |
|
39
|
|
|
Need to run CoreNLP server with version > 3.6.0. |
|
40
|
|
|
See http://stackoverflow.com/questions/36206407/utf-8-issue-with-corenlp-server |
|
41
|
|
|
''' |
|
42
|
1 |
|
server = random.choice(self.servers) |
|
43
|
1 |
|
r = requests.post(server, params={'properties' : '{"annotators": "tokenize,ssplit,pos,lemma,ner,parse", "outputFormat": "json", "parse.flags": " -makeCopulaHead"}'}, data=text.encode('utf8')) |
|
44
|
1 |
|
result = r.json()['sentences'][0] |
|
45
|
1 |
|
result['text'] = text |
|
46
|
1 |
|
return result |
|
47
|
|
|
|
|
48
|
1 |
|
def parse(self, text): |
|
49
|
|
|
"""Perform a query to all configured APIs and concatenates all |
|
50
|
|
|
results into a single list. |
|
51
|
|
|
Also handles caching.""" |
|
52
|
1 |
|
mc = connect_memcached() |
|
53
|
|
|
|
|
54
|
|
|
# Construct a key suitable for memcached (ie. a string of less than |
|
55
|
|
|
# 250 bytes) with a salt (to prevent attacks by hash collision) |
|
56
|
1 |
|
salt = Config().memcached_salt |
|
57
|
1 |
|
key = hashlib.md5((salt + text).encode()).hexdigest() |
|
58
|
1 |
|
key = 'ppp-qp-grammatical-%s' + key |
|
59
|
|
|
|
|
60
|
|
|
# Get the cached value, if any |
|
61
|
1 |
|
r = mc.get(key) |
|
62
|
1 |
|
if not r: |
|
63
|
|
|
# If there is no cached value, compute it. |
|
64
|
1 |
|
r = self._parse(text) |
|
65
|
1 |
|
mc.set(key, pickle.dumps(r), time=Config().memcached_timeout) |
|
66
|
|
|
else: |
|
67
|
|
|
r = pickle.loads(r) |
|
68
|
1 |
|
return r |
|
69
|
1 |
|
stanfordnlp = StanfordNLP(Config().corenlp_servers) |
|
70
|
|
|
|
|
71
|
1 |
|
def parse(sentence): |
|
72
|
1 |
|
handler = QuotationHandler() |
|
73
|
1 |
|
nonAmbiguousSentence = handler.pull(sentence) |
|
74
|
1 |
|
result = stanfordnlp.parse(nonAmbiguousSentence) |
|
75
|
1 |
|
tree = computeTree(result) |
|
76
|
|
|
handler.push(tree) |
|
77
|
|
|
NamedEntityMerging(tree).merge() |
|
78
|
|
|
PrepositionMerging(tree).merge() |
|
79
|
|
|
qw = simplify(tree) |
|
80
|
|
|
return normalFormProduction(tree, qw) |
|
81
|
|
|
|
|
82
|
1 |
|
class RequestHandler: |
|
83
|
1 |
|
__slots__ = ('request', ) |
|
84
|
1 |
|
def __init__(self, request): |
|
85
|
1 |
|
self.request = request |
|
86
|
|
|
|
|
87
|
1 |
|
def answer(self): |
|
88
|
1 |
|
if not isinstance(self.request.tree, Sentence) or \ |
|
89
|
|
|
self.request.language != 'en': |
|
90
|
1 |
|
return [] |
|
91
|
1 |
|
sentence = self.request.tree.value |
|
92
|
1 |
|
try: |
|
93
|
1 |
|
tree = parse(sentence) |
|
94
|
1 |
|
except QuotationError: # no logging, the error is between the chair and the keyboard |
|
95
|
|
|
return [] |
|
96
|
1 |
|
except KeyboardInterrupt: |
|
97
|
|
|
raise |
|
98
|
1 |
|
except Exception as e: |
|
99
|
1 |
|
logging.warning(e) |
|
100
|
1 |
|
return [] |
|
101
|
|
|
if isinstance(tree, (Resource, List)): |
|
102
|
|
|
return [] |
|
103
|
|
|
meas = {'accuracy': 0.5, 'relevance': 0.5} |
|
104
|
|
|
trace = self.request.trace + [TraceItem('QuestionParsing-Grammatical', tree, meas)] |
|
105
|
|
|
response = Response('en', tree, meas, trace) |
|
106
|
|
|
return [response] |
|
107
|
|
|
|