1
|
|
|
"""Request handler of the module.""" |
2
|
|
|
|
3
|
1 |
|
import json |
4
|
1 |
|
import pickle |
5
|
1 |
|
import hashlib |
6
|
1 |
|
import random |
7
|
1 |
|
import logging |
8
|
1 |
|
import requests |
9
|
|
|
|
10
|
|
|
# Import pylibmc if possible; import memcache otherwise. |
11
|
|
|
# pylibmc is is more strict (ie. detects and raises errors instead |
12
|
|
|
# of just ignoring them), but is not compatible with Pypy. |
13
|
1 |
|
try: |
14
|
1 |
|
import pylibmc as memcache |
15
|
1 |
|
except ImportError: |
16
|
1 |
|
try: |
17
|
1 |
|
import memcache |
18
|
|
|
except ImportError: |
19
|
|
|
raise ImportError('Neither pylibmc or python3-memcached is installed') |
20
|
|
|
|
21
|
1 |
|
from ppp_datamodel import Sentence, Resource, List |
22
|
1 |
|
from ppp_datamodel.communication import TraceItem, Response |
23
|
1 |
|
from ppp_libmodule.exceptions import ClientError |
24
|
|
|
|
25
|
1 |
|
from .config import Config |
26
|
1 |
|
from . import computeTree, simplify, normalFormProduction, QuotationHandler,\ |
27
|
|
|
QuotationError, NamedEntityMerging, PrepositionMerging |
28
|
|
|
|
29
|
1 |
|
def connect_memcached(): |
30
|
1 |
|
mc = memcache.Client(Config().memcached_servers) |
31
|
1 |
|
return mc |
32
|
|
|
|
33
|
1 |
|
class StanfordNLP: |
34
|
1 |
|
def __init__(self, urls): |
35
|
1 |
|
self.servers = urls |
36
|
|
|
|
37
|
1 |
|
def _parse(self, text): |
38
|
|
|
''' |
39
|
|
|
Need to run CoreNLP server with version > 3.6.0. |
40
|
|
|
See http://stackoverflow.com/questions/36206407/utf-8-issue-with-corenlp-server |
41
|
|
|
''' |
42
|
1 |
|
server = random.choice(self.servers) |
43
|
1 |
|
r = requests.post(server, params={'properties' : '{"annotators": "tokenize,ssplit,pos,lemma,ner,parse", "outputFormat": "json", "parse.flags": " -makeCopulaHead"}'}, data=text.encode('utf8')) |
44
|
1 |
|
result = r.json()['sentences'][0] |
45
|
1 |
|
result['text'] = text |
46
|
1 |
|
return result |
47
|
|
|
|
48
|
1 |
|
def parse(self, text): |
49
|
|
|
"""Perform a query to all configured APIs and concatenates all |
50
|
|
|
results into a single list. |
51
|
|
|
Also handles caching.""" |
52
|
1 |
|
mc = connect_memcached() |
53
|
|
|
|
54
|
|
|
# Construct a key suitable for memcached (ie. a string of less than |
55
|
|
|
# 250 bytes) with a salt (to prevent attacks by hash collision) |
56
|
1 |
|
salt = Config().memcached_salt |
57
|
1 |
|
key = hashlib.md5((salt + text).encode()).hexdigest() |
58
|
1 |
|
key = 'ppp-qp-grammatical-%s' + key |
59
|
|
|
|
60
|
|
|
# Get the cached value, if any |
61
|
1 |
|
r = mc.get(key) |
62
|
1 |
|
if not r: |
63
|
|
|
# If there is no cached value, compute it. |
64
|
1 |
|
r = self._parse(text) |
65
|
1 |
|
mc.set(key, pickle.dumps(r), time=Config().memcached_timeout) |
66
|
|
|
else: |
67
|
|
|
r = pickle.loads(r) |
68
|
1 |
|
return r |
69
|
1 |
|
stanfordnlp = StanfordNLP(Config().corenlp_servers) |
70
|
|
|
|
71
|
1 |
|
def parse(sentence): |
72
|
1 |
|
handler = QuotationHandler() |
73
|
1 |
|
nonAmbiguousSentence = handler.pull(sentence) |
74
|
1 |
|
result = stanfordnlp.parse(nonAmbiguousSentence) |
75
|
1 |
|
tree = computeTree(result) |
76
|
1 |
|
handler.push(tree) |
77
|
1 |
|
NamedEntityMerging(tree).merge() |
78
|
1 |
|
PrepositionMerging(tree).merge() |
79
|
1 |
|
qw = simplify(tree) |
80
|
1 |
|
return normalFormProduction(tree, qw) |
81
|
|
|
|
82
|
1 |
|
class RequestHandler: |
83
|
1 |
|
__slots__ = ('request', ) |
84
|
1 |
|
def __init__(self, request): |
85
|
1 |
|
self.request = request |
86
|
|
|
|
87
|
1 |
|
def answer(self): |
88
|
1 |
|
if not isinstance(self.request.tree, Sentence) or \ |
89
|
|
|
self.request.language != 'en': |
90
|
1 |
|
return [] |
91
|
1 |
|
sentence = self.request.tree.value |
92
|
1 |
|
try: |
93
|
1 |
|
tree = parse(sentence) |
94
|
1 |
|
except QuotationError: # no logging, the error is between the chair and the keyboard |
95
|
|
|
return [] |
96
|
1 |
|
except KeyboardInterrupt: |
97
|
|
|
raise |
98
|
1 |
|
except Exception as e: |
99
|
1 |
|
logging.warning(e) |
100
|
1 |
|
return [] |
101
|
1 |
|
if isinstance(tree, (Resource, List)): |
102
|
1 |
|
return [] |
103
|
1 |
|
meas = {'accuracy': 0.5, 'relevance': 0.5} |
104
|
1 |
|
trace = self.request.trace + [TraceItem('QuestionParsing-Grammatical', tree, meas)] |
105
|
1 |
|
response = Response('en', tree, meas, trace) |
106
|
|
|
return [response] |
107
|
|
|
|