|
1
|
|
|
"""Command-line script to print a GO term's lower-level hierarchy. |
|
2
|
|
|
|
|
3
|
|
|
Usage: |
|
4
|
|
|
wr_hier.py [GO ...] [options] |
|
5
|
|
|
|
|
6
|
|
|
Options: |
|
7
|
|
|
-h --help show this help message and exit |
|
8
|
|
|
|
|
9
|
|
|
-i <gofile.txt> Read a file name containing a list of GO IDs |
|
10
|
|
|
--o=<outfile> Output file in ASCII text format |
|
11
|
|
|
-f Writes results to an ASCII file named after the GO term. e.g. hier_GO0002376.txt |
|
12
|
|
|
--up Write report from GO term up to root |
|
13
|
|
|
|
|
14
|
|
|
--dag=<dag_file> Ontologies in obo file [default: go-basic.obo]. |
|
15
|
|
|
|
|
16
|
|
|
--gaf=<file.gaf> Annotations from a gaf file |
|
17
|
|
|
--gene2go=<gene2go> Annotations from a gene2go file downloaded from NCBI |
|
18
|
|
|
|
|
19
|
|
|
--no_indent Do not indent GO terms |
|
20
|
|
|
--max_indent=<int> max indent depth for printing relative to GO Term |
|
21
|
|
|
--num_child=<int> Print count of total number of children for each GO |
|
22
|
|
|
--short If a branch has already been printed, do not re-print. |
|
23
|
|
|
Print '===' instead of dashes to note the point of compression |
|
24
|
|
|
-r --relationship Load and use the 'relationship' field |
|
25
|
|
|
""" |
|
26
|
|
|
|
|
27
|
|
|
from __future__ import print_function |
|
28
|
|
|
|
|
29
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved." |
|
30
|
|
|
__author__ = "DV Klopfenstein" |
|
31
|
|
|
|
|
32
|
|
|
import sys |
|
33
|
|
|
from goatools.base import get_godag |
|
34
|
|
|
from goatools.associations import get_tcntobj |
|
35
|
|
|
from goatools.godag.obo_optional_attributes import OboOptionalAttrs |
|
36
|
|
|
from goatools.cli.docopt_parse import DocOptParse |
|
37
|
|
|
from goatools.cli.gos_get import GetGOs |
|
38
|
|
|
from goatools.gosubdag.gosubdag import GoSubDag |
|
39
|
|
|
from goatools.gosubdag.rpt.write_hierarchy import WrHierGO |
|
40
|
|
|
|
|
41
|
|
|
|
|
42
|
|
|
def cli(): |
|
43
|
|
|
"""Command-line script to print a GO term's lower-level hierarchy.""" |
|
44
|
|
|
objcli = WrHierCli(sys.argv[1:]) |
|
45
|
|
|
fouts_txt = objcli.get_fouts() |
|
46
|
|
|
if fouts_txt: |
|
47
|
|
|
for fout_txt in fouts_txt: |
|
48
|
|
|
objcli.wrtxt_hier(fout_txt) |
|
49
|
|
|
else: |
|
50
|
|
|
objcli.prt_hier(sys.stdout) |
|
51
|
|
|
print(objcli.kws) |
|
52
|
|
|
|
|
53
|
|
|
class WrHierCli(object): |
|
54
|
|
|
"""Write hierarchy cli.""" |
|
55
|
|
|
|
|
56
|
|
|
kws_set_all = set(['relationship', 'up', 'f']) |
|
57
|
|
|
kws_dct_all = set(['GO', 'dag', 'i', 'o', 'max_indent', 'num_child', 'no_indent', 'short', |
|
58
|
|
|
'gaf', 'gene2go']) |
|
59
|
|
|
kws_dct_wr = set(['max_indent', 'num_child', 'no_indent', 'short', 'relationship']) |
|
60
|
|
|
|
|
61
|
|
|
def __init__(self, args=None, prt=sys.stdout): |
|
62
|
|
|
self.kws = DocOptParse(__doc__, self.kws_dct_all, self.kws_set_all).get_docargs( |
|
63
|
|
|
args, intvals=set(['max_indent', 'num_child'])) |
|
64
|
|
|
opt_attrs = OboOptionalAttrs.attributes.intersection(self.kws.keys()) |
|
65
|
|
|
godag = get_godag(self.kws['dag'], prt, optional_attrs=opt_attrs) |
|
66
|
|
|
# goids_usr = None if 'GO' not in self.kws else self.kws['GO'] |
|
67
|
|
|
self.gosubdag = GoSubDag(godag.keys(), godag, |
|
68
|
|
|
relationships='relationship' in opt_attrs, |
|
69
|
|
|
tcntobj=get_tcntobj(godag, **self.kws), |
|
70
|
|
|
children=True, |
|
71
|
|
|
prt=prt) |
|
72
|
|
|
self.goids = GetGOs().get_goids(self.kws.get('GO'), self.kws.get('i'), sys.stdout) |
|
73
|
|
|
|
|
74
|
|
|
def get_fouts(self): |
|
75
|
|
|
"""Get output filename.""" |
|
76
|
|
|
fouts_txt = [] |
|
77
|
|
|
if 'o' in self.kws: |
|
78
|
|
|
fouts_txt.append(self.kws['o']) |
|
79
|
|
|
if 'f' in self.kws: |
|
80
|
|
|
fouts_txt.append(self._get_fout_go()) |
|
81
|
|
|
return fouts_txt |
|
82
|
|
|
|
|
83
|
|
|
def _get_fout_go(self): |
|
84
|
|
|
"""Get the name of an output file based on the top GO term.""" |
|
85
|
|
|
assert self.goids, "NO VALID GO IDs WERE PROVIDED" |
|
86
|
|
|
base = next(iter(self.goids)).replace(':', '') |
|
87
|
|
|
upstr = '_up' if 'up' in self.kws else '' |
|
88
|
|
|
return "hier_{BASE}{UP}.{EXT}".format(BASE=base, UP=upstr, EXT='txt') |
|
89
|
|
|
|
|
90
|
|
|
def wrtxt_hier(self, fout_txt): |
|
91
|
|
|
"""Write hierarchy below specfied GO IDs to an ASCII file.""" |
|
92
|
|
|
with open(fout_txt, 'wb') as prt: |
|
93
|
|
|
self.prt_hier(prt) |
|
94
|
|
|
print(" WROTE: {TXT}".format(TXT=fout_txt)) |
|
95
|
|
|
|
|
96
|
|
|
def prt_hier(self, prt=sys.stdout): |
|
97
|
|
|
"""Write hierarchy below specfied GO IDs.""" |
|
98
|
|
|
objwr = WrHierGO(self.gosubdag, **self.kws) |
|
99
|
|
|
assert self.goids, "NO VALID GO IDs WERE PROVIDED" |
|
100
|
|
|
# kws = {k:v for k, v in self.kws.items() if k in self.kws_dct_wr} |
|
101
|
|
|
# objwr.write_hier_all(prt=prt, **kws) |
|
102
|
|
|
# max_indent=None, num_child=None, short_prt=False): |
|
103
|
|
|
if 'up' not in objwr.usrset: |
|
104
|
|
|
for goid in self.goids: |
|
105
|
|
|
objwr.prt_hier_down(goid, prt) |
|
106
|
|
|
else: |
|
107
|
|
|
objwr.prt_hier_up(self.goids, prt) |
|
108
|
|
|
|
|
109
|
|
|
|
|
110
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved. |
|
111
|
|
|
|