1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
from __future__ import unicode_literals |
4
|
|
|
from itertools import count |
5
|
|
|
from IPython.core.display import display, HTML |
6
|
|
|
from termcolor import colored |
7
|
|
|
import os |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
class JupyterVisualizer(object): |
11
|
|
|
""" |
12
|
|
|
Widgets for use with jupyter notebook |
13
|
|
|
""" |
14
|
|
|
|
15
|
|
|
ASSETS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") |
16
|
|
|
|
17
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.js")) as js_file: |
18
|
|
|
dp_lib = js_file.read() |
19
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.html")) as html_file: |
20
|
|
|
base_contents = html_file.read() |
21
|
|
|
with open(os.path.join(ASSETS_DIR, "displacy-processors.css")) as css_file: |
22
|
|
|
base_css = css_file.read() |
23
|
|
|
with open(os.path.join(ASSETS_DIR, "mentions.css")) as css_file: |
24
|
|
|
mentions_css = css_file.read() |
25
|
|
|
# style loosely corresponding to mention highlighting |
26
|
|
|
with open(os.path.join(ASSETS_DIR, "parse.css")) as css_file: |
27
|
|
|
parse_css = css_file.read() |
28
|
|
|
|
29
|
|
|
_id_gen = count(start=0, step=1) |
30
|
|
|
|
31
|
|
|
@staticmethod |
32
|
|
|
def graph_to_html(s, graph_name="stanford-collapsed", css="", distance=None, div_id=None): |
33
|
|
|
distance = distance or int((sum(len(w) for w in s.words) + s.length) * 1.75) |
34
|
|
|
|
35
|
|
|
def next_id(): return next(JupyterVisualizer._id_gen) |
36
|
|
|
nid = next_id() |
37
|
|
|
div_id = div_id or "graph_{}".format(nid) |
38
|
|
|
|
39
|
|
|
# apply css only to current viz |
40
|
|
|
custom_css = css.replace(".displacy", "#{} .displacy".format(div_id)) if css else "" |
41
|
|
|
|
42
|
|
|
html = JupyterVisualizer.base_contents.format( |
43
|
|
|
dp_lib=JupyterVisualizer.dp_lib, |
44
|
|
|
dist=distance, |
45
|
|
|
sent_json=s.to_JSON(), |
46
|
|
|
div_id=div_id, |
47
|
|
|
css=custom_css, |
48
|
|
|
gn=graph_name |
49
|
|
|
) |
50
|
|
|
return html |
51
|
|
|
|
52
|
|
|
@staticmethod |
53
|
|
|
def display_graph(s, graph_name="stanford-collapsed", css="", distance=None, div_id=None): |
54
|
|
|
res = JupyterVisualizer.graph_to_html(s=s, graph_name=graph_name, css=css, distance=distance, div_id=div_id) |
55
|
|
|
display(HTML(data=res)) |
56
|
|
|
|
57
|
|
|
@staticmethod |
58
|
|
|
def mention_to_html(mention): |
59
|
|
|
SENTENCE_BOS = """<span class="sentence">""" |
60
|
|
|
MENTION_LABEL = """<sub class="mention-label">{}</sub>""" |
61
|
|
|
MENTION_SPAN_BOS = """<span class="mention-span sentence">""" |
62
|
|
|
ARG_BOS = """<span class="mention-arg mention-span sentence">""" |
63
|
|
|
TRIGGER_BOS = """<span class="mention-trigger mention-span sentence">""" |
64
|
|
|
EOS = "</span>" |
65
|
|
|
|
66
|
|
|
def add_label(label): |
67
|
|
|
return """<sup class="mention-role">{}</sup>""".format(label) |
68
|
|
|
|
69
|
|
|
def start_span(tag, w): |
70
|
|
|
return "{}{}".format(tag, w) |
71
|
|
|
def end_span(w, tag=""): |
72
|
|
|
return "{}{}{}".format(w,tag, EOS) |
73
|
|
|
|
74
|
|
|
sent = mention.sentenceObj |
75
|
|
|
sent_span = [w for w in sent.words] |
76
|
|
|
# mention trigger |
77
|
|
|
if mention.trigger: |
78
|
|
|
start = mention.trigger.start |
79
|
|
|
end = mention.trigger.end - 1 |
80
|
|
|
sent_span[start] = start_span(TRIGGER_BOS, sent_span[start]) |
81
|
|
|
sent_span[end] = end_span(sent_span[end],tag=add_label("TRIGGER")) |
82
|
|
|
# mention args |
83
|
|
|
if mention.arguments: |
84
|
|
|
for (role, args) in mention.arguments.items(): |
85
|
|
|
for arg in args: |
86
|
|
|
start = arg.start |
87
|
|
|
end = arg.end - 1 |
88
|
|
|
sent_span[start] = start_span(ARG_BOS + MENTION_LABEL.format(arg.label), sent_span[start]) |
89
|
|
|
sent_span[end] = end_span(sent_span[end], tag=add_label(role)) |
90
|
|
|
# mention span |
91
|
|
|
start = mention.start |
92
|
|
|
end = mention.end - 1 |
93
|
|
|
sent_span[start] = start_span(MENTION_SPAN_BOS + MENTION_LABEL.format(mention.label), sent_span[start]) |
94
|
|
|
sent_span[end] = end_span(sent_span[end]) |
95
|
|
|
# sentence tag |
96
|
|
|
start = 0 |
97
|
|
|
end = -1 |
98
|
|
|
sent_span[start] = start_span(SENTENCE_BOS, sent_span[start]) |
99
|
|
|
sent_span[end] = end_span(sent_span[end]) |
100
|
|
|
html = " ".join(sent_span) |
101
|
|
|
return """<style>{css}</style>{mention_html}""".format(css=JupyterVisualizer.mentions_css, mention_html=html) |
102
|
|
|
|
103
|
|
|
@staticmethod |
104
|
|
|
def display_mention(mention): |
105
|
|
|
res = JupyterVisualizer.mention_to_html(mention) |
106
|
|
|
display(HTML(res)) |
107
|
|
|
|
108
|
|
|
|
109
|
|
|
class OdinHighlighter(object): |
110
|
|
|
|
111
|
|
|
@staticmethod |
112
|
|
|
def LABEL(token): |
113
|
|
|
return colored(token, color="red", attrs=["bold"]) |
114
|
|
|
|
115
|
|
|
@staticmethod |
116
|
|
|
def ARG(token): |
117
|
|
|
return colored(token, on_color="on_green", attrs=["bold"]) |
118
|
|
|
|
119
|
|
|
@staticmethod |
120
|
|
|
def TRIGGER(token): |
121
|
|
|
return colored(token, on_color="on_blue", attrs=["bold"]) |
122
|
|
|
|
123
|
|
|
@staticmethod |
124
|
|
|
def CONCEAL(token): |
125
|
|
|
return colored(token, on_color="on_grey", attrs=["concealed"]) |
126
|
|
|
|
127
|
|
|
@staticmethod |
128
|
|
|
def MENTION(token): |
129
|
|
|
return colored(token, on_color="on_yellow") |
130
|
|
|
|
131
|
|
|
@staticmethod |
132
|
|
|
def highlight_mention(mention): |
133
|
|
|
""" |
134
|
|
|
Formats text of mention |
135
|
|
|
""" |
136
|
|
|
text_span = mention.sentenceObj.words[:] |
137
|
|
|
# format TBM span like an arg |
138
|
|
|
if mention.type == "TextBoundMention": |
139
|
|
|
for i in range(mention.start, mention.end): |
140
|
|
|
text_span[i] = OdinHighlighter.ARG(text_span[i]) |
141
|
|
|
if mention.arguments: |
142
|
|
|
for (role, args) in mention.arguments.items(): |
143
|
|
|
for arg in args: |
144
|
|
|
for i in range(arg.start, arg.end): |
145
|
|
|
text_span[i] = OdinHighlighter.ARG(text_span[i]) |
146
|
|
|
# format trigger distinctly from args |
147
|
|
|
if mention.trigger: |
148
|
|
|
trigger = mention.trigger |
149
|
|
|
for i in range(trigger.start, trigger.end): |
150
|
|
|
text_span[i] = OdinHighlighter.TRIGGER(text_span[i]) |
151
|
|
|
|
152
|
|
|
# highlight tokens contained in mention span |
153
|
|
|
for i in range(mention.start, mention.end): |
154
|
|
|
text_span[i] = OdinHighlighter.MENTION(text_span[i]) |
155
|
|
|
mention_span = OdinHighlighter.MENTION(" ").join(text_span[mention.start:mention.end]) |
156
|
|
|
# highlight spaces in mention span |
157
|
|
|
formatted_text = " ".join(text_span[:mention.start]) + " " + mention_span + " " + " ".join(text_span[mention.end:]) |
158
|
|
|
return formatted_text.strip() |
159
|
|
|
|