1
|
|
|
"""Create a pydot Node for a GO Term.""" |
2
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved." |
4
|
|
|
__author__ = "DV Klopfenstein" |
5
|
|
|
|
6
|
|
|
import pydot |
7
|
|
|
from goatools.gosubdag.plot.go_name_shorten import ShortenText |
8
|
|
|
from goatools.gosubdag.utils import extract_kwargs |
9
|
|
|
|
10
|
|
|
class GoNodeOpts(object): |
11
|
|
|
"""Processes GO Node plot args.""" |
12
|
|
|
|
13
|
|
|
exp_keys = set(['goobj2fncname', 'go2txt', 'objgoea', 'prt_flds']) |
14
|
|
|
|
15
|
|
|
exp_elems = set([ |
16
|
|
|
'c2ps', # Count of an object's Parent |
17
|
|
|
'prt_pcnt', # Always print parent count: pN |
18
|
|
|
'parentcnt', # Print parent count only if not all parents are shown |
19
|
|
|
'childcnt', # Always print child count: cN |
20
|
|
|
'mark_alt_id', # Put an 'a' after GO:NNNNNNN if it is an alternate GO ID |
21
|
|
|
'shorten', # Shorten GO description |
22
|
|
|
'no_name', # Do not print GO description |
23
|
|
|
]) |
24
|
|
|
|
25
|
|
|
def __init__(self, gosubdag, **kws): |
26
|
|
|
self.gosubdag = gosubdag |
27
|
|
|
# print("WWWWWWWWWWWWW GoNodeOpts UsrKws", kws) |
28
|
|
|
# kws = {'set':set(...), 'dict':{...}} |
29
|
|
|
self.kws = extract_kwargs(kws, self.exp_keys, self.exp_elems) |
30
|
|
|
# print("WWWWWWWWWWWWW GoNodeOpts KWARGS", self.kws) |
31
|
|
|
|
32
|
|
|
def get_kws(self): |
33
|
|
|
"""Only load keywords if they are specified by the user.""" |
34
|
|
|
ret = self.kws['dict'].copy() |
35
|
|
|
act_set = self.kws['set'] |
36
|
|
|
if 'shorten' in act_set and 'goobj2fncname' not in ret: |
37
|
|
|
ret['goobj2fncname'] = ShortenText().get_short_plot_name |
38
|
|
|
return ret |
39
|
|
|
|
40
|
|
|
def get_present(self): |
41
|
|
|
"""Only store keywords if they are specified by the user.""" |
42
|
|
|
# The presence of c2ps marks that the user specified parentcnt=True |
43
|
|
|
return self.kws['set'].difference(['parentcnt']) |
44
|
|
|
|
45
|
|
|
|
46
|
|
|
class GoNode(object): |
47
|
|
|
"""Creates pydot Node containing a GO term.""" |
48
|
|
|
|
49
|
|
|
def __init__(self, gosubdag, objcolor, optobj): |
50
|
|
|
self.gosubdag = gosubdag # GoSubDag |
51
|
|
|
self.objcolor = objcolor # Go2Color -> color options |
52
|
|
|
self.kws = optobj.get_kws() # GoNodeOpts -> text options |
53
|
|
|
self.present = optobj.get_present() |
54
|
|
|
self.go2color = objcolor.go2color |
55
|
|
|
|
56
|
|
|
def get_node(self, goid, goobj): |
57
|
|
|
"""Return pydot node.""" |
58
|
|
|
# pydot.Node.objdict holds this information. pydot.Node.objdict['name'] |
59
|
|
|
return pydot.Node( |
60
|
|
|
self.get_node_text(goid, goobj), |
61
|
|
|
shape="box", |
62
|
|
|
style="rounded, filled", |
63
|
|
|
fillcolor=self.go2color.get(goid, "white"), |
64
|
|
|
color=self.objcolor.get_bordercolor(goid)) |
65
|
|
|
|
66
|
|
|
def str_fmthdr(self, goid, goobj): |
67
|
|
|
"""Return hdr line seen inside a GO Term box.""" |
68
|
|
|
# Shorten: Ex: GO:0007608 -> G0007608 |
69
|
|
|
go_txt = goid.replace("GO:", "G") |
70
|
|
|
if 'mark_alt_id' in self.present and goid != goobj.id: |
71
|
|
|
go_txt += 'a' |
72
|
|
|
return go_txt |
73
|
|
|
|
74
|
|
|
# ---------------------------------------------------------------------------------- |
75
|
|
|
# Methods for text printed inside GO terms |
76
|
|
|
def get_node_text(self, goid, goobj): |
77
|
|
|
"""Return a string to be printed in a GO term box.""" |
78
|
|
|
txt = [] |
79
|
|
|
# Header line: "GO:0036464 L04 D06" |
80
|
|
|
txt.append(self.get_hdr(goid, goobj)) |
81
|
|
|
# GO name line: "cytoplamic ribonucleoprotein" |
82
|
|
|
if 'no_name' not in self.present: |
83
|
|
|
txt.append(self._get_go_name(goobj)) |
84
|
|
|
# study info line: "24 genes" |
85
|
|
|
if 'objgoea' in self.kws: |
86
|
|
|
study_txt = self.kws['objgoea'].get_study_txt(goid) |
87
|
|
|
if study_txt is not None: |
88
|
|
|
txt.append(study_txt) |
89
|
|
|
# Add user-specified text, if needed |
90
|
|
|
if 'go2txt' in self.kws and goid in self.kws['go2txt']: |
91
|
|
|
txt.append(self.kws['go2txt'][goid]) |
92
|
|
|
return "\n".join(txt) |
93
|
|
|
|
94
|
|
|
def _get_go_name(self, goobj): |
95
|
|
|
"""Return GO name/description, as is or edited by a user function.""" |
96
|
|
|
if 'goobj2fncname' not in self.kws: |
97
|
|
|
return goobj.name.replace(",", "\n") |
98
|
|
|
# Return GO Term name edited by user-provided function |
99
|
|
|
return self.kws['goobj2fncname'](goobj) |
100
|
|
|
|
101
|
|
|
def get_hdr(self, goid, goobj): |
102
|
|
|
"""Header for GO Term box. Ex: 'G0001719 L6 D9 d3.'""" |
103
|
|
|
hdr = [] |
104
|
|
|
ntgo = self.gosubdag.go2nt.get(goid) |
105
|
|
|
prt_flds = self._get_prtflds() |
106
|
|
|
# Add letter to depth-01 GO Node. |
107
|
|
|
if 'D1' in prt_flds and goobj.depth == 1: |
108
|
|
|
hdr.append("{ABC} ".format(ABC=ntgo.D1)) |
109
|
|
|
hdr.append(self.str_fmthdr(goid, goobj)) |
110
|
|
|
if 'level' in prt_flds: |
111
|
|
|
hdr.append("L{level}".format(level=goobj.level)) |
112
|
|
|
if 'depth' in prt_flds: |
113
|
|
|
hdr.append("D{depth}".format(depth=goobj.depth)) |
114
|
|
|
if 'reldepth' in prt_flds: |
115
|
|
|
hdr.append("R{reldepth}".format(reldepth=goobj.reldepth)) |
116
|
|
|
# Print count of parents for this GO term |
117
|
|
|
if 'c2ps' in self.kws: |
118
|
|
|
self._add_parent_cnt(hdr, goobj, self.kws['c2ps']) |
119
|
|
|
# Print count of children for this GO term |
120
|
|
|
childcnt_str = self._get_hdr_childcnt(goobj, ntgo) |
121
|
|
|
if childcnt_str: |
122
|
|
|
hdr.append(childcnt_str) |
123
|
|
|
# Print count of all descendants down to the leaf-level for this GO term |
124
|
|
|
if 'dcnt' in prt_flds: |
125
|
|
|
hdr.append("d{N}".format(N=ntgo.dcnt)) |
126
|
|
|
if 'tinfo' in prt_flds: |
127
|
|
|
hdr.append("i{I:4.02f}".format(I=ntgo.tinfo)) |
128
|
|
|
if 'REL' in prt_flds: |
129
|
|
|
hdr.append("{R}".format(R=ntgo.REL_short)) |
130
|
|
|
return " ".join(hdr) |
131
|
|
|
|
132
|
|
|
def _get_prtflds(self): |
133
|
|
|
"""Get print fields for GO header.""" |
134
|
|
|
# User-specified print fields |
135
|
|
|
ntflds = self.gosubdag.prt_attr['flds'] |
136
|
|
|
prt_flds = self.kws.get('prt_flds') |
137
|
|
|
if prt_flds: |
138
|
|
|
return prt_flds.intersection(ntflds) |
139
|
|
|
exclude = set() |
140
|
|
|
# Default print fields |
141
|
|
|
if self.gosubdag.relationships: |
142
|
|
|
exclude.add('level') |
143
|
|
|
return set(f for f in ntflds if f not in exclude) |
144
|
|
|
|
145
|
|
|
def _get_hdr_childcnt(self, goobj, ntgo): |
146
|
|
|
"""Get string representing count of children for this GO term.""" |
147
|
|
|
if 'childcnt' in self.present: |
148
|
|
|
return "c{N}".format(N=len(goobj.children)) |
149
|
|
|
elif self.gosubdag.relationships and not goobj.children and ntgo.dcnt != 0: |
150
|
|
|
return "c0" |
151
|
|
|
|
152
|
|
|
def _add_parent_cnt(self, hdr, goobj, c2ps): |
153
|
|
|
"""Add the parent count to the GO term box for if not all parents are plotted.""" |
154
|
|
|
if goobj.id in c2ps: |
155
|
|
|
parents = c2ps[goobj.id] |
156
|
|
|
if 'prt_pcnt' in self.present or parents and len(goobj.parents) != len(parents): |
157
|
|
|
assert len(goobj.parents) == len(set(goobj.parents)) |
158
|
|
|
hdr.append("p{N}".format(N=len(set(goobj.parents)))) |
159
|
|
|
|
160
|
|
|
|
161
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved. |
162
|
|
|
|