|
1
|
|
|
"""Plot a GODagSmall.""" |
|
2
|
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved." |
|
4
|
|
|
__author__ = "DV Klopfenstein" |
|
5
|
|
|
|
|
6
|
|
|
import sys |
|
7
|
|
|
import os |
|
8
|
|
|
import collections as cx |
|
9
|
|
|
from collections import OrderedDict |
|
10
|
|
|
from goatools.godag_obosm import OboToGoDagSmall |
|
11
|
|
|
|
|
12
|
|
|
def plot_gos(fout_png, goids, obo_dag, *args, **kws): |
|
13
|
|
|
"""Given GO ids and the obo_dag, create a plot of paths from GO ids.""" |
|
14
|
|
|
engine = kws['engine'] if 'engine' in kws else 'pydot' |
|
15
|
|
|
godagsmall = OboToGoDagSmall(goids=goids, obodag=obo_dag).godag |
|
16
|
|
|
godagplot = GODagSmallPlot(godagsmall, *args, **kws) |
|
17
|
|
|
godagplot.plt(fout_png, engine) |
|
18
|
|
|
|
|
19
|
|
|
def plot_goid2goobj(fout_png, goid2goobj, *args, **kws): |
|
20
|
|
|
"""Given a dict containing GO id and its goobj, create a plot of paths from GO ids.""" |
|
21
|
|
|
engine = kws['engine'] if 'engine' in kws else 'pydot' |
|
22
|
|
|
godagsmall = OboToGoDagSmall(goid2goobj=goid2goobj).godag |
|
23
|
|
|
godagplot = GODagSmallPlot(godagsmall, *args, **kws) |
|
24
|
|
|
godagplot.plt(fout_png, engine) |
|
25
|
|
|
|
|
26
|
|
|
def plot_results(fout_png, goea_results, *args, **kws): |
|
27
|
|
|
"""Given a list of GOEA results, plot result GOs up to top.""" |
|
28
|
|
|
if "{NS}" not in fout_png: |
|
29
|
|
|
plt_goea_results(fout_png, goea_results, *args, **kws) |
|
30
|
|
|
else: |
|
31
|
|
|
# Plot separately by NS: BP, MF, CC |
|
32
|
|
|
ns2goea_results = cx.defaultdict(list) |
|
33
|
|
|
for rec in goea_results: |
|
34
|
|
|
ns2goea_results[rec.NS].append(rec) |
|
35
|
|
|
for ns_name, ns_res in ns2goea_results.items(): |
|
36
|
|
|
png = fout_png.format(NS=ns_name) |
|
37
|
|
|
plt_goea_results(png, ns_res, *args, **kws) |
|
38
|
|
|
|
|
39
|
|
|
def plt_goea_results(fout_png, goea_results, *args, **kws): |
|
40
|
|
|
"""Plot a single page.""" |
|
41
|
|
|
engine = kws['engine'] if 'engine' in kws else 'pydot' |
|
42
|
|
|
godagsmall = OboToGoDagSmall(goea_results=goea_results).godag |
|
43
|
|
|
godagplot = GODagSmallPlot(godagsmall, *args, goea_results=goea_results, **kws) |
|
44
|
|
|
godagplot.plt(fout_png, engine) |
|
45
|
|
|
|
|
46
|
|
|
class GODagPltVars(object): |
|
47
|
|
|
"""Holds plotting paramters.""" |
|
48
|
|
|
|
|
49
|
|
|
# http://www.graphviz.org/doc/info/colors.html |
|
50
|
|
|
rel2col = { |
|
51
|
|
|
'is_a': 'black', |
|
52
|
|
|
'part_of': 'blue', |
|
53
|
|
|
'regulates': 'gold', |
|
54
|
|
|
'positively_regulates': 'green', |
|
55
|
|
|
'negatively_regulates': 'red', |
|
56
|
|
|
'occurs_in': 'aquamarine4', |
|
57
|
|
|
'capable_of': 'dodgerblue', |
|
58
|
|
|
'capable_of_part_of': 'darkorange', |
|
59
|
|
|
} |
|
60
|
|
|
|
|
61
|
|
|
alpha2col = OrderedDict([ |
|
62
|
|
|
# GOEA GO terms that are significant |
|
63
|
|
|
(0.005, 'mistyrose'), |
|
64
|
|
|
(0.010, 'moccasin'), |
|
65
|
|
|
(0.050, 'lemonchiffon1'), |
|
66
|
|
|
# GOEA GO terms that are not significant |
|
67
|
|
|
(1.000, 'grey95'), |
|
68
|
|
|
]) |
|
69
|
|
|
|
|
70
|
|
|
key2col = { |
|
71
|
|
|
'level_01': 'lightcyan', |
|
72
|
|
|
'go_sources': 'palegreen', |
|
73
|
|
|
} |
|
74
|
|
|
|
|
75
|
|
|
fmthdr = "{GO} L{level:>02} D{depth:>02}" |
|
76
|
|
|
fmtres = "{study_count} genes" |
|
77
|
|
|
# study items per line on GO Terms: |
|
78
|
|
|
items_p_line = 5 |
|
79
|
|
|
|
|
80
|
|
|
|
|
81
|
|
|
class GODagSmallPlot(object): |
|
82
|
|
|
"""Plot a graph contained in an object of type GODagSmall .""" |
|
83
|
|
|
|
|
84
|
|
|
def __init__(self, godagsmall, *args, **kws): |
|
85
|
|
|
self.args = args |
|
86
|
|
|
self.log = kws['log'] if 'log' in kws else sys.stdout |
|
87
|
|
|
self.title = kws['title'] if 'title' in kws else None |
|
88
|
|
|
# GOATOOLs results as objects |
|
89
|
|
|
self.go2res = self._init_go2res(**kws) |
|
90
|
|
|
# GOATOOLs results as a list of namedtuples |
|
91
|
|
|
self.pval_name = self._init_pval_name(**kws) |
|
92
|
|
|
# Gene Symbol names |
|
93
|
|
|
self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} |
|
94
|
|
|
self.study_items = kws['study_items'] if 'study_items' in kws else None |
|
95
|
|
|
self.study_items_max = self._init_study_items_max() |
|
96
|
|
|
self.alpha_str = kws['alpha_str'] if 'alpha_str' in kws else None |
|
97
|
|
|
self.pltvars = kws['GODagPltVars'] if 'GODagPltVars' in kws else GODagPltVars() |
|
98
|
|
|
if 'items_p_line' in kws: |
|
99
|
|
|
self.pltvars.items_p_line = kws['items_p_line'] |
|
100
|
|
|
self.dpi = kws['dpi'] if 'dpi' in kws else 150 |
|
101
|
|
|
self.godag = godagsmall |
|
102
|
|
|
self.goid2color = self._init_goid2color() |
|
103
|
|
|
self.pydot = None |
|
104
|
|
|
|
|
105
|
|
|
def _init_study_items_max(self): |
|
106
|
|
|
"""User can limit the number of genes printed in a GO term.""" |
|
107
|
|
|
if self.study_items is None: |
|
108
|
|
|
return None |
|
109
|
|
|
if self.study_items is True: |
|
110
|
|
|
return None |
|
111
|
|
|
if isinstance(self.study_items, int): |
|
112
|
|
|
return self.study_items |
|
113
|
|
|
return None |
|
114
|
|
|
|
|
115
|
|
|
@staticmethod |
|
116
|
|
|
def _init_go2res(**kws): |
|
117
|
|
|
"""Initialize GOEA results.""" |
|
118
|
|
|
if 'goea_results' in kws: |
|
119
|
|
|
return {res.GO:res for res in kws['goea_results']} |
|
120
|
|
|
if 'go2nt' in kws: |
|
121
|
|
|
return kws['go2nt'] |
|
122
|
|
|
|
|
123
|
|
|
@staticmethod |
|
124
|
|
|
def _init_pval_name(**kws): |
|
125
|
|
|
"""Initialize pvalue attribute name.""" |
|
126
|
|
|
if 'pval_name' in kws: |
|
127
|
|
|
return kws['pval_name'] |
|
128
|
|
|
if 'goea_results' in kws: |
|
129
|
|
|
goea = kws['goea_results'] |
|
130
|
|
|
if goea: |
|
131
|
|
|
return "p_{M}".format(M=goea[0].method_flds[0].fieldname) |
|
132
|
|
|
|
|
133
|
|
|
def _init_goid2color(self): |
|
134
|
|
|
"""Set colors of GO terms.""" |
|
135
|
|
|
goid2color = {} |
|
136
|
|
|
# 1. colors based on p-value override colors based on source GO |
|
137
|
|
View Code Duplication |
if self.go2res is not None: |
|
|
|
|
|
|
138
|
|
|
alpha2col = self.pltvars.alpha2col |
|
139
|
|
|
pval_name = self.pval_name |
|
140
|
|
|
for goid, res in self.go2res.items(): |
|
141
|
|
|
pval = getattr(res, pval_name, None) |
|
142
|
|
|
if pval is not None: |
|
143
|
|
|
for alpha, color in alpha2col.items(): |
|
144
|
|
|
if pval <= alpha and res.study_count != 0: |
|
145
|
|
|
if goid not in goid2color: |
|
146
|
|
|
goid2color[goid] = color |
|
147
|
|
|
# 2. GO source color |
|
148
|
|
|
color = self.pltvars.key2col['go_sources'] |
|
149
|
|
|
for goid in self.godag.go_sources: |
|
150
|
|
|
if goid not in goid2color: |
|
151
|
|
|
goid2color[goid] = color |
|
152
|
|
|
# 3. Level-01 GO color |
|
153
|
|
|
color = self.pltvars.key2col['level_01'] |
|
154
|
|
|
for goid, goobj in self.godag.go2obj.items(): |
|
155
|
|
|
if goobj.level == 1: |
|
156
|
|
|
if goid not in goid2color: |
|
157
|
|
|
goid2color[goid] = color |
|
158
|
|
|
return goid2color |
|
159
|
|
|
|
|
160
|
|
|
def plt(self, fout_img, engine="pydot"): |
|
161
|
|
|
"""Plot using pydot, graphviz, or GML.""" |
|
162
|
|
|
if engine == "pydot": |
|
163
|
|
|
self._plt_pydot(fout_img) |
|
164
|
|
|
elif engine == "pygraphviz": |
|
165
|
|
|
raise Exception("TO BE IMPLEMENTED SOON: ENGINE pygraphvis") |
|
166
|
|
|
else: |
|
167
|
|
|
raise Exception("UNKNOWN ENGINE({E})".format(E=engine)) |
|
168
|
|
|
|
|
169
|
|
|
# ---------------------------------------------------------------------------------- |
|
170
|
|
|
# pydot |
|
171
|
|
View Code Duplication |
def _plt_pydot(self, fout_img): |
|
|
|
|
|
|
172
|
|
|
"""Plot using the pydot graphics engine.""" |
|
173
|
|
|
dag = self._get_pydot_graph() |
|
174
|
|
|
img_fmt = os.path.splitext(fout_img)[1][1:] |
|
175
|
|
|
dag.write(fout_img, format=img_fmt) |
|
176
|
|
|
self.log.write(" {GO_USR:>3} usr {GO_ALL:>3} GOs WROTE: {F}\n".format( |
|
177
|
|
|
F=fout_img, |
|
178
|
|
|
GO_USR=len(self.godag.go_sources), |
|
179
|
|
|
GO_ALL=len(self.godag.go2obj))) |
|
180
|
|
|
|
|
181
|
|
|
def _get_pydot_graph(self): |
|
182
|
|
|
"""Given a DAG, return a pydot digraph object.""" |
|
183
|
|
|
rel = "is_a" |
|
184
|
|
|
pydot = self._get_pydot() |
|
185
|
|
|
# Initialize empty dag |
|
186
|
|
|
dag = pydot.Dot(label=self.title, graph_type='digraph', dpi="{}".format(self.dpi)) |
|
187
|
|
|
# Initialize nodes |
|
188
|
|
|
go2node = self._get_go2pydotnode() |
|
189
|
|
|
# Add nodes to graph |
|
190
|
|
|
for node in go2node.values(): |
|
191
|
|
|
dag.add_node(node) |
|
192
|
|
|
# Add edges to graph |
|
193
|
|
|
rel2col = self.pltvars.rel2col |
|
194
|
|
|
for src, tgt in self.godag.get_edges(): |
|
195
|
|
|
dag.add_edge(pydot.Edge( |
|
196
|
|
|
go2node[tgt], go2node[src], |
|
197
|
|
|
shape="normal", |
|
198
|
|
|
color=rel2col[rel], |
|
199
|
|
|
dir="back")) # invert arrow direction for obo dag convention |
|
200
|
|
|
return dag |
|
201
|
|
|
|
|
202
|
|
|
def _get_go2pydotnode(self): |
|
203
|
|
|
"""Create pydot Nodes.""" |
|
204
|
|
|
go2node = {} |
|
205
|
|
|
for goid, goobj in self.godag.go2obj.items(): |
|
206
|
|
|
txt = self._get_node_text(goid, goobj) |
|
207
|
|
|
fillcolor = self.goid2color.get(goid, "white") |
|
208
|
|
|
node = self.pydot.Node( |
|
209
|
|
|
txt, |
|
210
|
|
|
shape="box", |
|
211
|
|
|
style="rounded, filled", |
|
212
|
|
|
fillcolor=fillcolor, |
|
213
|
|
|
color="mediumseagreen") |
|
214
|
|
|
go2node[goid] = node |
|
215
|
|
|
return go2node |
|
216
|
|
|
|
|
217
|
|
|
def _get_pydot(self): |
|
218
|
|
|
"""Return pydot package. Load pydot, if necessary.""" |
|
219
|
|
|
if self.pydot: |
|
220
|
|
|
return self.pydot |
|
221
|
|
|
self.pydot = __import__("pydot") |
|
222
|
|
|
return self.pydot |
|
223
|
|
|
|
|
224
|
|
|
# ---------------------------------------------------------------------------------- |
|
225
|
|
|
# Methods for text printed inside GO terms |
|
226
|
|
|
def _get_node_text(self, goid, goobj): |
|
227
|
|
|
"""Return a string to be printed in a GO term box.""" |
|
228
|
|
|
txt = [] |
|
229
|
|
|
# Header line: "GO:0036464 L04 D06" |
|
230
|
|
|
txt.append(self.pltvars.fmthdr.format( |
|
231
|
|
|
GO=goobj.id.replace("GO:", "GO"), |
|
232
|
|
|
level=goobj.level, |
|
233
|
|
|
depth=goobj.depth)) |
|
234
|
|
|
# GO name line: "cytoplamic ribonucleoprotein" |
|
235
|
|
|
name = goobj.name.replace(",", "\n") |
|
236
|
|
|
txt.append(name) |
|
237
|
|
|
# study info line: "24 genes" |
|
238
|
|
|
study_txt = self._get_study_txt(goid) |
|
239
|
|
|
if study_txt is not None: |
|
240
|
|
|
txt.append(study_txt) |
|
241
|
|
|
# return text string |
|
242
|
|
|
return "\n".join(txt) |
|
243
|
|
|
|
|
244
|
|
|
def _get_study_txt(self, goid): |
|
245
|
|
|
"""Get GO text from GOEA study.""" |
|
246
|
|
|
if self.go2res is not None: |
|
247
|
|
|
res = self.go2res.get(goid, None) |
|
248
|
|
|
if res is not None: |
|
249
|
|
|
if self.study_items is not None: |
|
250
|
|
|
return self._get_item_str(res) |
|
251
|
|
|
else: |
|
252
|
|
|
return self.pltvars.fmtres.format( |
|
253
|
|
|
study_count=res.study_count) |
|
254
|
|
|
|
|
255
|
|
View Code Duplication |
def _get_item_str(self, res): |
|
|
|
|
|
|
256
|
|
|
"""Return genes in any of these formats: |
|
257
|
|
|
1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 |
|
258
|
|
|
2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a |
|
259
|
|
|
3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... |
|
260
|
|
|
""" |
|
261
|
|
|
npl = self.pltvars.items_p_line # Number of items Per Line |
|
262
|
|
|
prt_items = sorted([self.__get_genestr(itemid) for itemid in res.study_items]) |
|
263
|
|
|
prt_multiline = [prt_items[i:i+npl] for i in range(0, len(prt_items), npl)] |
|
264
|
|
|
num_items = len(prt_items) |
|
265
|
|
|
if self.study_items_max is None: |
|
266
|
|
|
genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
|
267
|
|
|
return "{N}) {GENES}".format(N=num_items, GENES=genestr) |
|
268
|
|
|
else: |
|
269
|
|
|
if num_items <= self.study_items_max: |
|
270
|
|
|
strs = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] |
|
271
|
|
|
genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
|
272
|
|
|
return genestr |
|
273
|
|
|
else: |
|
274
|
|
|
short_list = prt_items[:self.study_items_max] |
|
275
|
|
|
short_mult = [short_list[i:i+npl] for i in range(0, len(short_list), npl)] |
|
276
|
|
|
short_str = "\n".join([", ".join(str(e) for e in sublist) for sublist in short_mult]) |
|
277
|
|
|
return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) |
|
278
|
|
|
|
|
279
|
|
|
def __get_genestr(self, itemid): |
|
280
|
|
|
"""Given a geneid, return the string geneid or a gene symbol.""" |
|
281
|
|
|
if self.id2symbol is not None: |
|
282
|
|
|
symbol = self.id2symbol.get(itemid, None) |
|
283
|
|
|
if symbol is not None: |
|
284
|
|
|
return symbol |
|
285
|
|
|
if isinstance(itemid, int): |
|
286
|
|
|
return str(itemid) |
|
287
|
|
|
return itemid |
|
288
|
|
|
|
|
289
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved. |
|
290
|
|
|
|