1
|
|
|
"""Command-line script to create GO term diagrams |
2
|
|
|
|
3
|
|
|
Usage: |
4
|
|
|
go_plot.py [GO ...] [options] |
5
|
|
|
go_plot.py [GO ...] [--obo=<file.obo>] [--outfile=<file.png>] [--title=<title>] |
6
|
|
|
[--go_file=<file.txt>] |
7
|
|
|
[--relationship] |
8
|
|
|
[--sections=<sections.txt>] |
9
|
|
|
[--gaf=<file.gaf>] |
10
|
|
|
[--gene2go=<gene2go>] [--taxid=<Taxonomy_number>] |
11
|
|
|
[--shorten] |
12
|
|
|
[--parentcnt] [--childcnt] [--mark_alt_id] |
13
|
|
|
[--go_aliases=<go_aliases.txt>] |
14
|
|
|
[--draw-children] |
15
|
|
|
[--norel] |
16
|
|
|
go_plot.py [GO ...] [--obo=<file.obo>] [-o <file.png>] [-t <title>] |
17
|
|
|
[--shorten] [-p] [-c] |
18
|
|
|
go_plot.py [GO ...] [-o <file.png>] [--draw-children] |
19
|
|
|
go_plot.py [GO ...] [-o <file.png>] [--draw-children] [--shorten] |
20
|
|
|
go_plot.py [--obo=<file.obo>] |
21
|
|
|
go_plot.py [--obo=<file.obo>] [--outfile=<file.png>] |
22
|
|
|
go_plot.py [GO ...] |
23
|
|
|
go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] |
24
|
|
|
go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] [--shorten] |
25
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] |
26
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] |
27
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--childcnt] |
28
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] [--childcnt] |
29
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] |
30
|
|
|
go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] [-c] |
31
|
|
|
|
32
|
|
|
Options: |
33
|
|
|
-h --help show this help message and exit |
34
|
|
|
-i --go_file=<file.txt> GO IDs in an ASCII file |
35
|
|
|
-o <file.png>, --outfile=<file.png> Plot file name [default: go_plot.png] |
36
|
|
|
-r --relationship Plot all relationships |
37
|
|
|
-s <sections.txt> --sections=<sections.txt> Sections file for grouping |
38
|
|
|
-S <sections module str> Sections file for grouping |
39
|
|
|
|
40
|
|
|
--gaf=<file.gaf> Annotations from a gaf file |
41
|
|
|
--gene2go=<gene2go> Annotations from a gene2go file downloaded from NCBI |
42
|
|
|
|
43
|
|
|
--obo=<file.obo> Ontologies in obo file [default: go-basic.obo]. |
44
|
|
|
|
45
|
|
|
-t <title>, --title=<title> Title string to place in image |
46
|
|
|
-p --parentcnt Include parent count in each GO term |
47
|
|
|
-c --childcnt Include child count in each GO term |
48
|
|
|
--shorten Shorten the GO name on plots |
49
|
|
|
--mark_alt_id Add 'a' if GO ID is an alternate ID: GO:0007582a |
50
|
|
|
--draw-children Draw children. By default, they are not drawn. |
51
|
|
|
--go_aliases=<go_aliases.txt> ASCII file containing letter alias |
52
|
|
|
|
53
|
|
|
--norel Don't load relationship from the GO DAG |
54
|
|
|
""" |
55
|
|
|
|
56
|
|
|
from __future__ import print_function |
57
|
|
|
|
58
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved." |
59
|
|
|
__author__ = "DV Klopfenstein" |
60
|
|
|
|
61
|
|
|
|
62
|
|
|
import re |
63
|
|
|
import os |
64
|
|
|
import sys |
65
|
|
|
|
66
|
|
|
from goatools.obo_parser import GODag |
67
|
|
|
from goatools.associations import get_tcntobj |
68
|
|
|
from goatools.godag.obo_optional_attributes import OboOptionalAttrs |
69
|
|
|
|
70
|
|
|
from goatools.cli.docopt_parse import DocOptParse |
71
|
|
|
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot |
72
|
|
|
from goatools.gosubdag.plot.go2color import Go2Color |
73
|
|
|
from goatools.gosubdag.gosubdag import GoSubDag |
74
|
|
|
from goatools.gosubdag.go_tasks import get_go2obj_unique |
75
|
|
|
from goatools.gosubdag.go_tasks import get_leaf_children |
76
|
|
|
from goatools.gosubdag.rpt.wr_xlsx import read_d1_letter |
77
|
|
|
# COMING SOON: Plotting using GOATOOLS grouping: |
78
|
|
|
# from goatools.gosubdag.rpt.read_goids import read_sections |
79
|
|
|
# from goatools.grouper.grprdflts import GrouperDflts |
80
|
|
|
# from goatools.grouper.hdrgos import HdrgosSections |
81
|
|
|
# from goatools.grouper.grprobj import Grouper |
82
|
|
|
# from goatools.grouper.colors import GrouperColors |
83
|
|
|
# from goatools.grouper.grprplt import GrouperPlot |
84
|
|
|
|
85
|
|
|
|
86
|
|
|
# pylint: disable=too-few-public-methods |
87
|
|
|
class GetGOs(object): |
88
|
|
|
"""Return a list of GO IDs for plotting.""" |
89
|
|
|
|
90
|
|
|
exp_color_chars = set('ABCDEFabcdef0123456789') |
91
|
|
|
exp_kws_dct = set(['GO', 'go_file']) |
92
|
|
|
exp_kws_set = set(['draw-children']) |
93
|
|
|
max_gos = 200 # Maximum number of source GO IDs |
94
|
|
|
|
95
|
|
|
def __init__(self, go2obj): |
96
|
|
|
self.go2obj = go2obj |
97
|
|
|
self.re_goids = re.compile(r"(GO:\d{7})+?") |
98
|
|
|
self.re_color = re.compile(r"(#[0-9a-fA-F]{6})+?") |
99
|
|
|
|
100
|
|
|
def get_go_color(self, **kws): |
101
|
|
|
"""Return source GO IDs .""" |
102
|
|
|
ret = {'GOs':set(), 'go2color':{}} |
103
|
|
|
if 'GO' in kws: |
104
|
|
|
self._goargs(ret, kws['GO']) |
105
|
|
|
if 'go_file' in kws: |
106
|
|
|
self._rdtxt_gos(ret, kws['go_file']) |
107
|
|
|
if 'draw-children' in kws: |
108
|
|
|
ret['GOs'].update(get_leaf_children(ret['GOs'], self.go2obj)) |
109
|
|
|
# If there have been no GO IDs explicitly specified by the user |
110
|
|
|
if not ret['GOs']: |
111
|
|
|
# If the GO-DAG is sufficiently small, print all GO IDs |
112
|
|
|
if len(self.go2obj) < self.max_gos: |
113
|
|
|
main_gos = set(o.id for go, o in self.go2obj.items() if go != o.id) |
114
|
|
|
go_leafs = set(go for go, o in self.go2obj.items() if not o.children) |
115
|
|
|
ret['GOs'] = go_leafs.difference(main_gos) |
116
|
|
|
else: |
117
|
|
|
raise RuntimeError("GO IDs NEEDED") |
118
|
|
|
go2obj = {go:self.go2obj[go] for go in ret['GOs']} |
119
|
|
|
ret['GOs'] = set(get_go2obj_unique(go2obj)) |
120
|
|
|
return [ret['GOs'], ret['go2color']] |
121
|
|
|
|
122
|
|
View Code Duplication |
def _goargs(self, ret, go_args): |
|
|
|
|
123
|
|
|
"""Get GO IDs and colors for GO IDs from the GO ID runtime arguments.""" |
124
|
|
|
goids = set() |
125
|
|
|
go2color = {} |
126
|
|
|
# Match on "GO ID" or "GO ID and color" |
127
|
|
|
re_gocolor = re.compile(r'(GO:\d{7})((?:#[0-9a-fA-F]{6})?)') |
128
|
|
|
for go_arg in go_args: |
129
|
|
|
mtch = re_gocolor.match(go_arg) |
130
|
|
|
if mtch: |
131
|
|
|
goid, color = mtch.groups() |
132
|
|
|
goids.add(goid) |
133
|
|
|
if color: |
134
|
|
|
go2color[goid] = color |
135
|
|
|
else: |
136
|
|
|
print("WARNING: UNRECOGNIZED ARG({})".format(go_arg)) |
137
|
|
|
self._update_ret(ret, goids, go2color) |
138
|
|
|
|
139
|
|
|
def _rdtxt_gos(self, ret, go_file): |
140
|
|
|
"""Read GO IDs from a file.""" |
141
|
|
|
if not os.path.exists(go_file): |
142
|
|
|
raise RuntimeError("CAN NOT READ: {FILE}\n".format(FILE=go_file)) |
143
|
|
|
goids = set() |
144
|
|
|
go2color = {} |
145
|
|
|
with open(go_file) as ifstrm: |
146
|
|
|
for line in ifstrm: |
147
|
|
|
goids_found = self.re_goids.findall(line) |
148
|
|
|
if goids_found: |
149
|
|
|
goids.update(goids_found) |
150
|
|
|
colors = self.re_color.findall(line) |
151
|
|
|
if colors: |
152
|
|
|
if len(goids_found) == len(colors): |
153
|
|
|
for goid, color in zip(goids_found, colors): |
154
|
|
|
go2color[goid] = color |
155
|
|
|
else: |
156
|
|
|
print("IGNORING: {L}".format(L=line),) |
157
|
|
|
self._update_ret(ret, goids, go2color) |
158
|
|
|
|
159
|
|
|
@staticmethod |
160
|
|
|
def _update_ret(ret, goids, go2color): |
161
|
|
|
"""Update 'GOs' and 'go2color' in dict with goids and go2color.""" |
162
|
|
|
if goids: |
163
|
|
|
ret['GOs'].update(goids) |
164
|
|
|
if go2color: |
165
|
|
|
for goid, color in go2color.items(): |
166
|
|
|
ret['go2color'][goid] = color |
167
|
|
|
|
168
|
|
|
|
169
|
|
|
class PlotCli(object): |
170
|
|
|
"""Class for command-line interface for creating GO term diagrams""" |
171
|
|
|
|
172
|
|
|
kws_dict = set(['GO', 'outfile', 'go_file', 'sections', 'S', |
173
|
|
|
'gaf', 'gene2go', 'taxid', |
174
|
|
|
'title', |
175
|
|
|
'obo', |
176
|
|
|
'go_aliases']) |
177
|
|
|
kws_set = set(['relationship', |
178
|
|
|
'parentcnt', 'childcnt', 'mark_alt_id', 'shorten', |
179
|
|
|
'draw-children', |
180
|
|
|
'norel']) |
181
|
|
|
dflt_outfile = "go_plot.png" |
182
|
|
|
kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten']) |
183
|
|
|
|
184
|
|
|
def __init__(self, gosubdag=None): |
185
|
|
|
self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set) |
186
|
|
|
self.gosubdag = None if gosubdag is None else gosubdag |
187
|
|
|
|
188
|
|
|
def cli(self): |
189
|
|
|
"""Command-line interface for go_draw script.""" |
190
|
|
|
kws_all = self.get_docargs(prt=None) |
191
|
|
|
optional_attrs = self._get_optional_attrs(kws_all) |
192
|
|
|
go2obj = GODag(kws_all['obo'], optional_attrs) |
193
|
|
|
# GO kws_all: GO go_file draw-children |
194
|
|
|
goids, go2color = GetGOs(go2obj).get_go_color(**kws_all) |
195
|
|
|
relationships = 'relationship' in optional_attrs |
196
|
|
|
kws_dag = self._get_kwsdag(goids, go2obj, **kws_all) |
197
|
|
|
self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag) |
198
|
|
|
|
199
|
|
|
if 'sections' in kws_all: |
200
|
|
|
return self._plt_gogrouped(goids, go2color, **kws_all) |
201
|
|
|
else: |
202
|
|
|
return self._plt_gosubdag(goids, go2color, **kws_all) |
203
|
|
|
|
204
|
|
|
# pylint: disable=unused-argument,no-self-use |
205
|
|
|
def _plt_gogrouped(self, goids, go2color_usr, **kws): |
206
|
|
|
"""Plot grouped GO IDs.""" |
207
|
|
|
print("Plotting with GOATOOLS grouping coming soon...") |
208
|
|
|
# fout_img = self.get_outfile(kws['outfile'], goids) |
209
|
|
|
# sections = read_sections(kws['sections'], exclude_ungrouped=True) |
210
|
|
|
# # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt} |
211
|
|
|
# grprobj_cur = self._get_grprobj(goids, sections) |
212
|
|
|
# # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only |
213
|
|
|
# # BORDER: Black=hdr Blu=hdr&usr |
214
|
|
|
# grpcolor = GrouperColors(grprobj_cur) # get_bordercolor get_go2color_users |
215
|
|
|
# grp_go2color = grpcolor.get_go2color_users() |
216
|
|
|
# grp_go2bordercolor = grpcolor.get_bordercolor() |
217
|
|
|
# for goid, color in go2color_usr.items(): |
218
|
|
|
# grp_go2color[goid] = color |
219
|
|
|
# objcolor = Go2Color(self.gosubdag, objgoea=None, |
220
|
|
|
# go2color=grp_go2color, go2bordercolor=grp_go2bordercolor) |
221
|
|
|
# go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor) |
222
|
|
|
# objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws) |
223
|
|
|
# objplt.prt_goids(sys.stdout) |
224
|
|
|
# objplt.plt_dag(fout_img) |
225
|
|
|
# sys.stdout.write("{N:>6} sections read\n".format( |
226
|
|
|
# N="NO" if sections is None else len(sections))) |
227
|
|
|
# return fout_img |
228
|
|
|
|
229
|
|
|
# def _get_grprobj(self, goids, sections): |
230
|
|
|
# """Get Grouper, given GO IDs and sections.""" |
231
|
|
|
# grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo") |
232
|
|
|
# hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections) |
233
|
|
|
# return Grouper("sections", goids, hdrobj, self.gosubdag) |
234
|
|
|
|
235
|
|
|
def _plt_gosubdag(self, goids, go2color, **kws): |
236
|
|
|
"""Plot GO IDs.""" |
237
|
|
|
fout_img = self.get_outfile(kws['outfile'], goids) |
238
|
|
|
objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color) |
239
|
|
|
objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws) |
240
|
|
|
objplt.prt_goids(sys.stdout) |
241
|
|
|
objplt.plt_dag(fout_img) |
242
|
|
|
return fout_img |
243
|
|
|
|
244
|
|
|
def _get_kwsdag(self, goids, go2obj, **kws_all): |
245
|
|
|
"""Get keyword args for a GoSubDag.""" |
246
|
|
|
kws_dag = {} |
247
|
|
|
# GO letters specified by the user |
248
|
|
|
if 'go_aliases' in kws_all: |
249
|
|
|
fin_go_aliases = kws_all['go_aliases'] |
250
|
|
|
if os.path.exists(fin_go_aliases): |
251
|
|
|
go2letter = read_d1_letter(fin_go_aliases) |
252
|
|
|
if go2letter: |
253
|
|
|
kws_dag['go2letter'] = go2letter |
254
|
|
|
return kws_dag |
255
|
|
|
|
256
|
|
|
@staticmethod |
257
|
|
|
def _get_tcntobj(goids, go2obj, **kws): |
258
|
|
|
"""Get a TermCounts object if the user provides an annotation file, otherwise None.""" |
259
|
|
|
# kws: gaf (gene2go taxid) |
260
|
|
|
if 'gaf' in kws or 'gene2go' in kws: |
261
|
|
|
return get_tcntobj(go2obj, **kws) # TermCounts |
262
|
|
|
|
263
|
|
|
def get_docargs(self, args=None, prt=None): |
264
|
|
|
"""Pare down docopt. Return a minimal dictionary and a set containing runtime arg values.""" |
265
|
|
|
# docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c'])) |
266
|
|
|
docargs = self.objdoc.get_docargs(args, prt) |
267
|
|
|
self._chk_docopts(docargs) |
268
|
|
|
return docargs |
269
|
|
|
|
270
|
|
|
def _chk_docopts(self, kws): |
271
|
|
|
"""Check for common user command-line errors.""" |
272
|
|
|
# outfile should contain .png, .png, etc. |
273
|
|
|
outfile = kws['outfile'] |
274
|
|
|
if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \ |
275
|
|
|
kws['outfile'] == self.dflt_outfile: |
276
|
|
|
self._err("NO GO IDS SPECFIED", err=False) |
277
|
|
|
if 'obo' in outfile: |
278
|
|
|
self._err("BAD outfile({O})".format(O=outfile)) |
279
|
|
|
if 'gaf' in kws and 'gene2go' in kws: |
280
|
|
|
self._err("SPECIFY ANNOTAIONS FROM ONE FILE") |
281
|
|
|
if 'gene2go' in kws: |
282
|
|
|
if 'taxid' not in kws: |
283
|
|
|
self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE") |
284
|
|
|
|
285
|
|
|
def _err(self, msg, err=True): |
286
|
|
|
"""Print useage and error before exiting.""" |
287
|
|
|
severity = "FATAL" if err else "NOTE" |
288
|
|
|
txt = "".join([self.objdoc.doc, |
289
|
|
|
"User's command-line:\n\n", |
290
|
|
|
" % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])), |
291
|
|
|
"**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)]) |
292
|
|
|
if err: |
293
|
|
|
raise RuntimeError(txt) |
294
|
|
|
sys.stdout.write(txt) |
295
|
|
|
sys.exit(0) |
296
|
|
|
|
297
|
|
|
def get_outfile(self, outfile, goids=None): |
298
|
|
|
"""Return output file for GO Term plot.""" |
299
|
|
|
# 1. Use the user-specfied output filename for the GO Term plot |
300
|
|
|
if outfile != self.dflt_outfile: |
301
|
|
|
return outfile |
302
|
|
|
# 2. If only plotting 1 GO term, use GO is in plot name |
303
|
|
|
if goids is not None and len(goids) == 1: |
304
|
|
|
goid = next(iter(goids)) |
305
|
|
|
goobj = self.gosubdag.go2obj[goid] |
306
|
|
|
fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name) |
307
|
|
|
return ".".join([re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), 'png']) |
308
|
|
|
# 3. Return default name |
309
|
|
|
return self.dflt_outfile |
310
|
|
|
|
311
|
|
|
@staticmethod |
312
|
|
|
def _get_optional_attrs(kws): |
313
|
|
|
"""Given keyword args, return optional_attributes to be loaded into the GODag.""" |
314
|
|
|
vals = OboOptionalAttrs.attributes.intersection(kws.keys()) |
315
|
|
|
if 'sections' in kws: |
316
|
|
|
vals.add('relationship') |
317
|
|
|
if 'norel' in kws: |
318
|
|
|
vals.discard('relationship') |
319
|
|
|
return vals |
320
|
|
|
|
321
|
|
|
|
322
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved. |
323
|
|
|
|