1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
from __future__ import unicode_literals |
4
|
|
|
from itertools import count |
5
|
|
|
from IPython.core.display import display, HTML |
6
|
|
|
import os |
7
|
|
|
|
8
|
|
|
|
9
|
|
|
class JupyterVisualizer(object): |
10
|
|
|
""" |
11
|
|
|
Widgets for use with jupyter notebook |
12
|
|
|
""" |
13
|
|
|
|
14
|
|
|
ASSETS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") |
15
|
|
|
|
16
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.js")) as js_file: |
17
|
|
|
dp_lib = js_file.read() |
18
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.html")) as html_file: |
19
|
|
|
base_contents = html_file.read() |
20
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.css")) as css_file: |
21
|
|
|
base_css = css_file.read() |
22
|
|
|
with open(os.path.join(ASSETS_DIR, "mentions.css")) as css_file: |
23
|
|
|
mentions_css = css_file.read() |
24
|
|
|
# style loosely corresponding to mention highlighting |
25
|
|
|
with open(os.path.join(ASSETS_DIR, "parse.css")) as css_file: |
26
|
|
|
parse_css = css_file.read() |
27
|
|
|
|
28
|
|
|
_id_gen = count(start=0, step=1) |
29
|
|
|
|
30
|
|
|
@staticmethod |
31
|
|
|
def graph_to_html(s, graph_name="stanford-collapsed", css=None, distance=None, div_id=None): |
32
|
|
|
distance = distance or int((sum(len(w) for w in s.words) + s.length) * 1.75) |
33
|
|
|
|
34
|
|
|
def next_id(): return next(JupyterVisualizer._id_gen) |
35
|
|
|
nid = next_id() |
36
|
|
|
div_id = div_id or "graph_{}".format(nid) |
37
|
|
|
|
38
|
|
|
# apply css only to current viz |
39
|
|
|
custom_css = css.replace(".displacy", "#{} .displacy".format(div_id)) if css else "" |
40
|
|
|
html = JupyterVisualizer.base_contents.format( |
41
|
|
|
dp_lib=JupyterVisualizer.dp_lib, |
42
|
|
|
dist=distance, |
43
|
|
|
sent_json=s.to_JSON(), |
44
|
|
|
div_id=div_id, |
45
|
|
|
css=custom_css, |
46
|
|
|
gn=graph_name |
47
|
|
|
) |
48
|
|
|
return html |
49
|
|
|
|
50
|
|
|
@staticmethod |
51
|
|
|
def display_graph(s, graph_name="stanford-collapsed", css=None, distance=None, div_id=None): |
52
|
|
|
res = JupyterVisualizer.graph_to_html(s=s, graph_name=graph_name, css=css, distance=distance, div_id=div_id) |
53
|
|
|
display(HTML(data=res)) |
54
|
|
|
|
55
|
|
|
@staticmethod |
56
|
|
|
def mention_to_html(mention): |
57
|
|
|
SENTENCE_BOS = """<span class="sentence">""" |
58
|
|
|
MENTION_LABEL = """<sub class="mention-label">{}</sub>""" |
59
|
|
|
MENTION_SPAN_BOS = """<span class="mention-span sentence">""" |
60
|
|
|
ARG_BOS = """<span class="mention-arg mention-span sentence">""" |
61
|
|
|
TRIGGER_BOS = """<span class="mention-trigger mention-span sentence">""" |
62
|
|
|
EOS = "</span>" |
63
|
|
|
|
64
|
|
|
def add_label(label): |
65
|
|
|
return """<sup class="mention-role">{}</sup>""".format(label) |
66
|
|
|
|
67
|
|
|
def start_span(tag, w): |
68
|
|
|
return "{}{}".format(tag, w) |
69
|
|
|
def end_span(w, tag=""): |
70
|
|
|
return "{}{}{}".format(w,tag, EOS) |
71
|
|
|
|
72
|
|
|
sent = mention.sentenceObj |
73
|
|
|
sent_span = [w for w in sent.words] |
74
|
|
|
# mention trigger |
75
|
|
|
if mention.trigger: |
76
|
|
|
start = mention.trigger.start |
77
|
|
|
end = mention.trigger.end - 1 |
78
|
|
|
sent_span[start] = start_span(TRIGGER_BOS, sent_span[start]) |
79
|
|
|
sent_span[end] = end_span(sent_span[end],tag=add_label("TRIGGER")) |
80
|
|
|
# mention args |
81
|
|
|
if mention.arguments: |
82
|
|
|
for (role, args) in mention.arguments.items(): |
83
|
|
|
for arg in args: |
84
|
|
|
start = arg.start |
85
|
|
|
end = arg.end - 1 |
86
|
|
|
sent_span[start] = start_span(ARG_BOS + MENTION_LABEL.format(arg.label), sent_span[start]) |
87
|
|
|
sent_span[end] = end_span(sent_span[end], tag=add_label(role)) |
88
|
|
|
# mention span |
89
|
|
|
start = mention.start |
90
|
|
|
end = mention.end - 1 |
91
|
|
|
sent_span[start] = start_span(MENTION_SPAN_BOS + MENTION_LABEL.format(mention.label), sent_span[start]) |
92
|
|
|
sent_span[end] = end_span(sent_span[end]) |
93
|
|
|
# sentence tag |
94
|
|
|
start = 0 |
95
|
|
|
end = -1 |
96
|
|
|
sent_span[start] = start_span(SENTENCE_BOS, sent_span[start]) |
97
|
|
|
sent_span[end] = end_span(sent_span[end]) |
98
|
|
|
html = " ".join(sent_span) |
99
|
|
|
return """<style>{css}</style>{mention_html}""".format(css=JupyterVisualizer.mentions_css, mention_html=html) |
100
|
|
|
|
101
|
|
|
@staticmethod |
102
|
|
|
def display_mention(mention): |
103
|
|
|
res = JupyterVisualizer.mention_to_html(mention) |
104
|
|
|
display(HTML(res)) |
105
|
|
|
|