|
1
|
|
|
"""Plot a GoSubDag. |
|
2
|
|
|
|
|
3
|
|
|
GO Terms in a plot contain text. The first two lines appear by default: |
|
4
|
|
|
|
|
5
|
|
|
GO:0015618 L07 D13 p2 c2 d3 |
|
6
|
|
|
potassium-transporting ATPase activity |
|
7
|
|
|
|
|
8
|
|
|
GO:0015618 => GO ID |
|
9
|
|
|
|
|
10
|
|
|
First line: |
|
11
|
|
|
|
|
12
|
|
|
LNN => The "level" of the go term. |
|
13
|
|
|
The length of the shortest path(s) from the top to the current term. |
|
14
|
|
|
|
|
15
|
|
|
DNN => The "depth" of the go term. |
|
16
|
|
|
The length of the longest path(s) from the top to the current term. |
|
17
|
|
|
|
|
18
|
|
|
pN => Optional (parentcnt arg): number of immediate parent terms if the number |
|
19
|
|
|
of parents plotted is different than the number of parents in the obo. |
|
20
|
|
|
The default is to plot all higher-level parent terms from the |
|
21
|
|
|
source GO IDs to best show the hierarchy. |
|
22
|
|
|
But with some plots, plotting all parent GO terms can result in |
|
23
|
|
|
a GO DAG which is too large to be clearly readable. The user can then |
|
24
|
|
|
force some parent GO terms to not be plotting using the init_dag=path keyword. |
|
25
|
|
|
If some parent terms are not plotted, using the "parentcnt" option |
|
26
|
|
|
can help the user know where the plot was cut for readability. |
|
27
|
|
|
|
|
28
|
|
|
cN => Optional (childcnt arg): number of immediate child terms. |
|
29
|
|
|
Child hierarchy is not traversed. The default is to not plot all lower-level |
|
30
|
|
|
child terms to prevent the GO plots from being massive and unreadable. |
|
31
|
|
|
So knowing the total number of immediate child terms present (with |
|
32
|
|
|
most not on the plot) can give the user a better sense of the qualities |
|
33
|
|
|
of their plot. |
|
34
|
|
|
|
|
35
|
|
|
dN => Optional (rcntobj arg): total number of all levels of child terms. |
|
36
|
|
|
Child hierarchy is traversed to the bottom or the leaf-level of the graph. |
|
37
|
|
|
Knowing the total number of all descendants succinctly gives the user |
|
38
|
|
|
a sense of how close a GO term is to the bottom of the graph. |
|
39
|
|
|
"Descendants Count" is used a proxy for understanding if the |
|
40
|
|
|
GO term is a "broad" or "specific". If the GO term broadly |
|
41
|
|
|
describes a biological process, it most always has hundreds or thousands |
|
42
|
|
|
of total child terms. If the GO term specifically describes |
|
43
|
|
|
a biological process, it often has tens or less of total child terms. |
|
44
|
|
|
|
|
45
|
|
|
""" |
|
46
|
|
|
|
|
47
|
|
|
from __future__ import print_function |
|
48
|
|
|
|
|
49
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved." |
|
50
|
|
|
__author__ = "DV Klopfenstein" |
|
51
|
|
|
|
|
52
|
|
|
import sys |
|
53
|
|
|
import os |
|
54
|
|
|
import pydot |
|
55
|
|
|
from goatools.gosubdag.go_edges import get_edgesobj |
|
56
|
|
|
from goatools.gosubdag.plot.go_node import GoNodeOpts |
|
57
|
|
|
from goatools.gosubdag.plot.go_node import GoNode |
|
58
|
|
|
from goatools.gosubdag.plot.go2color import Go2Color |
|
59
|
|
|
from goatools.gosubdag.plot.goea_results import GoeaResults |
|
60
|
|
|
from goatools.gosubdag.utils import get_kwargs |
|
61
|
|
|
|
|
62
|
|
|
|
|
63
|
|
|
class GoSubDagPlot(object): |
|
64
|
|
|
"""Plot a graph contained in an object of type GoSubDag .""" |
|
65
|
|
|
|
|
66
|
|
|
# http://www.graphviz.org/doc/info/colors.html |
|
67
|
|
|
# pylint: disable=bad-whitespace |
|
68
|
|
|
rel2edgekws = { |
|
69
|
|
|
'is_a': {'color':'black', 'style':'solid'}, |
|
70
|
|
|
'part_of': {'color':'magenta', 'style':'dashed'}, |
|
71
|
|
|
'regulates': {'color':'purple3', 'style':'dashed'}, |
|
72
|
|
|
'positively_regulates': {'color':'red', 'style':'dashed'}, |
|
73
|
|
|
'negatively_regulates': {'color':'blue', 'style':'dashed'}, |
|
74
|
|
|
'occurs_in': {'color':'aquamarine4', 'style':'dashed'}, |
|
75
|
|
|
'capable_of': {'color':'dodgerblue', 'style':'dashed'}, |
|
76
|
|
|
'capable_of_part_of': {'color':'darkorange', 'style':'dashed'}, |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
exp_keys = { |
|
80
|
|
|
'dag': set(['title', 'id', 'dpi']), # pydot.Dot kwargs |
|
81
|
|
|
# goobj2fncname parentcnt shorten mark_alt_id childcnt prt_pcnt ... |
|
82
|
|
|
'node_go': GoNodeOpts.exp_keys.union(GoNodeOpts.exp_elems), |
|
83
|
|
|
# id2symbol study_items items_p_line pval_name |
|
84
|
|
|
'goea': GoeaResults.kws_set, |
|
85
|
|
|
} |
|
86
|
|
|
|
|
87
|
|
|
dflts = {'dpi':150} |
|
88
|
|
|
|
|
89
|
|
|
def __init__(self, gosubdag, **kwu): |
|
90
|
|
|
# kwu: id, title, dpi, go2txt |
|
91
|
|
|
self.kws = self._init_kws(**kwu) |
|
92
|
|
|
# kwu: log parentcnt |
|
93
|
|
|
assert gosubdag, "**FATAL: MISSING SUBSET GODag" |
|
94
|
|
|
self.gosubdag = gosubdag |
|
95
|
|
|
self.edgesobj = get_edgesobj(gosubdag, **kwu) |
|
96
|
|
|
# pylint: disable=line-too-long |
|
97
|
|
|
# kwu: go2color go2bordercolor dflt_bordercolor |
|
98
|
|
|
_node_opt = kwu['GoNodeOpts'] if 'GoNodeOpts' in kwu else self._init_gonodeopts(**kwu) |
|
99
|
|
|
_objcolor = kwu['Go2Color'] if 'Go2Color' in kwu else self._init_objcolor(_node_opt, **kwu) |
|
100
|
|
|
self.pydotnodego = GoNode(gosubdag, _objcolor, _node_opt) |
|
101
|
|
|
self.log = kwu.get('log', sys.stdout) |
|
102
|
|
|
# KWS=kws.keys(), V=kws['parentcnt'] if 'parentcnt' in kws else None) |
|
103
|
|
|
|
|
104
|
|
|
def _init_objcolor(self, node_opts, **kwu): |
|
105
|
|
|
"""Return user-created Go2Color object or create one.""" |
|
106
|
|
|
objgoea = node_opts.kws['dict'].get('objgoea', None) |
|
107
|
|
|
# kwu: go2color go2bordercolor dflt_bordercolor key2col |
|
108
|
|
|
return Go2Color(self.gosubdag, objgoea, **kwu) |
|
109
|
|
|
|
|
110
|
|
|
def _init_gonodeopts(self, **kws_usr): |
|
111
|
|
|
"""Initialize a GO Node plot options object, GoNodeOpts.""" |
|
112
|
|
|
options = GoNodeOpts(self.gosubdag, **self.kws['node_go']) |
|
113
|
|
|
# Add parent edge count if either is in kws: parentcnt, prt_pcnt |
|
114
|
|
|
if not options.kws['set'].isdisjoint(['parentcnt', 'prt_pcnt']): |
|
115
|
|
|
options.kws['dict']['c2ps'] = self.edgesobj.get_c2ps() |
|
116
|
|
|
# GoeaResults(kws['goea_results'], **self.kws['goea']) if 'goea_results' in kws else None |
|
117
|
|
|
if 'goea_results' in kws_usr: |
|
118
|
|
|
objgoea = GoeaResults(kws_usr['goea_results'], **self.kws['goea']) |
|
119
|
|
|
options.kws['dict']['objgoea'] = objgoea |
|
120
|
|
|
return options |
|
121
|
|
|
|
|
122
|
|
|
def prt_goids(self, prt): |
|
123
|
|
|
"""Print all GO IDs in the plot, plus their color.""" |
|
124
|
|
|
fmt = self.gosubdag.prt_attr['fmta'] |
|
125
|
|
|
nts = sorted(self.gosubdag.go2nt.values(), key=lambda nt: [nt.NS, nt.depth, nt.alt]) |
|
126
|
|
|
_get_color = self.pydotnodego.go2color.get |
|
127
|
|
|
for ntgo in nts: |
|
128
|
|
|
gostr = fmt.format(**ntgo._asdict()) |
|
129
|
|
|
col = _get_color(ntgo.GO, "") |
|
130
|
|
|
prt.write("{COLOR:7} {GO}\n".format(COLOR=col, GO=gostr)) |
|
131
|
|
|
|
|
132
|
|
|
def _init_kws(self, **kws_usr): |
|
133
|
|
|
"""Return a dict containing user-specified plotting options.""" |
|
134
|
|
|
kws_self = {} |
|
135
|
|
|
user_keys = set(kws_usr) |
|
136
|
|
|
for objname, expset in self.exp_keys.items(): |
|
137
|
|
|
usrkeys_curr = user_keys.intersection(expset) |
|
138
|
|
|
kws_self[objname] = get_kwargs(kws_usr, usrkeys_curr, usrkeys_curr) |
|
139
|
|
|
dpi = str(kws_self['dag'].get('dpi', self.dflts['dpi'])) |
|
140
|
|
|
kws_self['dag']['dpi'] = dpi |
|
141
|
|
|
return kws_self |
|
142
|
|
|
|
|
143
|
|
|
def plt_dag(self, fout_img, engine="pydot"): |
|
144
|
|
|
"""Plot using pydot, graphviz, or GML.""" |
|
145
|
|
|
if engine == "pydot": |
|
146
|
|
|
self._plt_pydot(fout_img) |
|
147
|
|
|
else: |
|
148
|
|
|
raise RuntimeError("ENGINE NOT IMPLEMENTED({E})".format(E=engine)) |
|
149
|
|
|
|
|
150
|
|
|
# ---------------------------------------------------------------------------------- |
|
151
|
|
|
# pydot |
|
152
|
|
|
def _plt_pydot(self, fout_img): |
|
153
|
|
|
"""Plot using the pydot graphics engine.""" |
|
154
|
|
|
dag = self.get_pydot_graph() |
|
155
|
|
|
self.wr_pydot_dag(fout_img, dag) |
|
156
|
|
|
|
|
157
|
|
View Code Duplication |
def wr_pydot_dag(self, fout_img, dag): |
|
|
|
|
|
|
158
|
|
|
"""Plot using the pydot graphics engine.""" |
|
159
|
|
|
img_fmt = os.path.splitext(fout_img)[1][1:] |
|
160
|
|
|
dag.write(fout_img, format=img_fmt) |
|
161
|
|
|
self.log.write(" {GO_USR:>3} usr {GO_ALL:>3} GOs WROTE: {F}\n".format( |
|
162
|
|
|
F=fout_img, |
|
163
|
|
|
GO_USR=len(self.gosubdag.go_sources), |
|
164
|
|
|
GO_ALL=len(dag.obj_dict['nodes']))) |
|
165
|
|
|
|
|
166
|
|
|
def get_pydot_graph(self): |
|
167
|
|
|
"""Given a DAG, return a pydot digraph object.""" |
|
168
|
|
|
rel = "is_a" |
|
169
|
|
|
# Initialize empty dag |
|
170
|
|
|
dag = pydot.Dot(graph_type='digraph', **self.kws['dag']) |
|
171
|
|
|
# Initialize nodes |
|
172
|
|
|
go2node = self._get_go2pydotnode() |
|
173
|
|
|
# Add nodes to graph |
|
174
|
|
|
for node in go2node.values(): |
|
175
|
|
|
dag.add_node(node) |
|
176
|
|
|
# Add edges to graph |
|
177
|
|
|
rel2edgekws = self.rel2edgekws |
|
178
|
|
|
self.edgesobj.chk_edges() |
|
179
|
|
|
edgekws = rel2edgekws.get(rel) |
|
180
|
|
|
self._add_edges(self.edgesobj.edges, go2node, dag, **edgekws) |
|
181
|
|
|
for reltype, edges_list in self.edgesobj.edges_rel.items(): |
|
182
|
|
|
edgekws = rel2edgekws.get(reltype) |
|
183
|
|
|
self._add_edges(edges_list, go2node, dag, **edgekws) |
|
184
|
|
|
return dag |
|
185
|
|
|
|
|
186
|
|
|
@staticmethod |
|
187
|
|
|
def _add_edges(edges_list, go2node, dag, **kws): |
|
188
|
|
|
# style: solid dashed dotted bold invis tapered |
|
189
|
|
|
# arrowType: http://www.graphviz.org/doc/info/attrs.html#k:arrowType |
|
190
|
|
|
for src, tgt in edges_list: |
|
191
|
|
|
assert src in go2node, "MISSING Edge source({S}); target({T})".format(S=src, T=tgt) |
|
192
|
|
|
assert tgt in go2node, "MISSING Edge target({T}); source({S})".format(S=src, T=tgt) |
|
193
|
|
|
dag_edge = pydot.Edge( |
|
194
|
|
|
go2node[tgt], go2node[src], |
|
195
|
|
|
shape="normal", |
|
196
|
|
|
# # style="normal", |
|
197
|
|
|
# color=color, |
|
198
|
|
|
dir="back", # invert arrow direction for obo dag convention |
|
199
|
|
|
**kws) |
|
200
|
|
|
# sequence parent_graph points attributes type parent_edge_list |
|
201
|
|
|
# GoSubDagPlot._prt_edge(dag_edge, 'parent_edge_list') |
|
202
|
|
|
dag.add_edge(dag_edge) |
|
203
|
|
|
|
|
204
|
|
|
@staticmethod |
|
205
|
|
|
def _prt_edge(dag_edge, attr): |
|
206
|
|
|
"""Print edge attribute""" |
|
207
|
|
|
# sequence parent_graph points attributes type parent_edge_list |
|
208
|
|
|
print("Edge {ATTR}: {VAL}".format(ATTR=attr, VAL=dag_edge.obj_dict[attr])) |
|
209
|
|
|
|
|
210
|
|
|
def _get_go2pydotnode(self): |
|
211
|
|
|
"""Create pydot Nodes.""" |
|
212
|
|
|
go2node = {} |
|
213
|
|
|
go2obj = self.gosubdag.go2obj |
|
214
|
|
|
get_node = self.pydotnodego.get_node |
|
215
|
|
|
for goid in self.get_goids_plt(): |
|
216
|
|
|
goobj = go2obj[goid] |
|
217
|
|
|
node = get_node(goid, goobj) |
|
218
|
|
|
go2node[goid] = node |
|
219
|
|
|
if goid != goobj.id: # goid is an alias for goobj |
|
220
|
|
|
go2node[goobj.id] = node |
|
221
|
|
|
return go2node |
|
222
|
|
|
|
|
223
|
|
|
def get_goids_plt(self): |
|
224
|
|
|
"""Get GO IDs to be plotted, given a GoSubDag.""" |
|
225
|
|
|
return self.edgesobj.get_all_edge_nodes() |
|
226
|
|
|
|
|
227
|
|
|
|
|
228
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved. |
|
229
|
|
|
|