Completed
Push — master ( 0f596f...821888 )
by
unknown
01:11
created

_WrHierPrt._str_dash()   A

Complexity

Conditions 2

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 7
rs 9.4285
1
"""Print a GO term's lower-level hierarchy."""
2
3
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import sys
7
import collections as cx
8
from goatools.godag.consts import Consts
9
from goatools.gosubdag.go_paths import GoPaths
10
11
12
class WrHierGO(object):
13
    """Write hierarchy object."""
14
15
    kws_dct = set(['max_indent', 'num_child'])
16
    kws_set = set(['no_indent', 'short'])
17
    consts = Consts()
18
19
    def __init__(self, gosubdag, **kws):
20
        self.gosubdag = gosubdag  # GoSubDag arg, children=True, must be used
21
        self.usrdct = {k:v for k, v in kws.items() if k in kws}
22
        self.usrset = set([k for k, v in kws.items() if k in kws and v])
23
        # ' {NS} {dcnt:6,} L{level:02} D{depth:02} {D1:5} {GO_name}'
24
25
    def prt_hier_all(self, prt=sys.stdout):
26
        """Write hierarchy for all GO Terms in obo file."""
27
        # Print: [biological_process, molecular_function, and cellular_component]
28
        for goid in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
29
            self.prt_hier_down(goid, prt)
30
31
    def prt_hier_down(self, goid, prt=sys.stdout):
32
        """Write hierarchy for all GO IDs below GO ID in arg, goid."""
33
        obj = _WrHierPrt(self, prt)
34
        obj.prt_hier_rec(goid)
35
36
    def prt_hier_up(self, goids, prt=sys.stdout):
37
        """Write hierarchy for all GO IDs below GO ID in arg, goid."""
38
        go2goterm_all = {go:self.gosubdag.go2obj[go] for go in goids}
39
        objp = GoPaths()
40
        for namespace, go2term_ns in self._get_namespace2go2term(go2goterm_all).items():
41
            go_root = self.consts.NAMESPACE2GO[namespace]
42
            goids_all = set()
43
            for goid, goterm in go2term_ns.items():
44
                goids_all.add(goid)
45
                paths = objp.get_paths_from_to(goterm, goid_end=None, dn0_up1=True)
46
                goids_all.update(set(o.id for p in paths for o in p))
47
            obj = _WrHierPrt(self, prt, goids_all, set(go2term_ns.keys()))
48
            obj.prt_hier_rec(go_root)
49
50
    @staticmethod
51
    def _get_namespace2go2term(go2terms):
52
        """Group GO IDs by namespace."""
53
        namespace2go2term = cx.defaultdict(dict)
54
        for goid, goterm in go2terms.items():
55
            namespace2go2term[goterm.namespace][goid] = goterm
56
        return namespace2go2term
57
58
59
class _WrHierPrt(object):
60
    """Print GO hierarchy."""
61
62
    def __init__(self, obj, prt=sys.stdout, include_only=None, go_marks=None):
63
        self.gosubdag = obj.gosubdag
64
        self.max_indent = obj.usrdct.get('max_indent')
65
        # self.num_child = num_child
66
        self.include_only = include_only  # if include_only else set()
67
        self.go_marks = go_marks if go_marks else set()
68
        self.short_prt = 'short' in obj.usrset
69
        # vars
70
        self.prt = prt
71
        self.gos_printed = set()
72
        self.prtfmt = self.gosubdag.prt_attr['fmta'].replace('{GO}{alt:1} # ', '')
73
74
    def prt_hier_rec(self, goid, depth=1):
75
        """Write hierarchy for a GO Term record and all GO IDs down to the leaf level."""
76
        ntgo = self.gosubdag.go2nt[goid]
77
        ntobj = self.gosubdag.go2obj[goid]
78
        # Shortens hierarchy report by only printing the hierarchy
79
        # for the sub-set of user-specified GO terms which are connected.
80
        if self.include_only is not None and goid not in self.include_only:
81
            return
82
        nrp = self.short_prt and goid in self.gos_printed
83
        if self.go_marks:
84
            self.prt.write('{} '.format('>' if goid in self.go_marks else ' '))
85
86
        # '-' is default character indicating hierarchy level
87
        # '=' is used to indicate a hierarchical path printed in detail previously.
88
        dashgo = self._str_dashgoid(ntgo, depth, not nrp or not ntobj.children)
89
        self.prt.write('{DASHGO:{N}}'.format(DASHGO=dashgo, N=17))
90
91
        # if num_child is not None:
92
        #     self.prt.write('{N:>5} '.format(N=len(self.get_all_children())))
93
        self.prt.write("{GO_INFO}\n".format(GO_INFO=self.prtfmt.format(**ntgo._asdict())))
94
        self.gos_printed.add(goid)
95
        # Do not print hierarchy below this turn if it has already been printed
96
        if nrp:
97
            return
98
        depth += 1
99
        if self.max_indent is not None and depth > self.max_indent:
100
            return
101
        for child in ntobj.children:
102
            self.prt_hier_rec(child.id, depth)
103
104
    @staticmethod
105
    def _str_dash(depth, single_or_double):
106
        """Return a string containing dashes (optional) and GO ID."""
107
        # '-' is default character indicating hierarchy level
108
        # '=' is used to indicate a hierarchical path printed in detail previously.
109
        letter = '-' if single_or_double else '='
110
        return ''.join([letter]*depth)
111
112
    def _str_dashgoid(self, ntgo, depth, single_or_double):
113
        """Return a string containing dashes (optional) and GO ID."""
114
        dashes = self._str_dash(depth, single_or_double)
115
        return "{DASHES} {GO}{alt:1}".format(DASHES=dashes, GO=ntgo.GO, alt=ntgo.alt)
116
117
118
#### Examples:
119
####
120
#### Print the hierarchy below Term, GO:0030663
121
#### >>> python {SCR} GO:0030663
122
####
123
#### - GO:0030663	level-05	depth-07	COPI-coated vesicle membrane [cellular_component]
124
#### -- GO:0012508	level-05	depth-08	Golgi to ER transport vesicle membrane [cellular_component]
125
#### -- GO:0012509	level-05	depth-08	inter-Golgi transport vesicle membrane [cellular_component]
126
####
127
####
128
#### Write the hierarchy below Term, GO:0030663 into a file
129
#### >>> python {SCR} GO:0030663 --o=hier_GO_0030663.rpt
130
####
131
####   WROTE: hier_GO_0030663.rpt
132
####
133
#### Print the hierarchy for biological process, molecular_function, and cellular_component:
134
#### >>> python {SCR} --o=hier_BP_MF_CC.rpt
135
####
136
#### Print hierarchy for BP, MF, CC only printing the first 2 levels.
137
#### >>> python {SCR} --max_indent=2
138
#### >>> python {SCR} --max_indent=2 --dash_len=2 --num_child
139
####
140
####
141
#### Print a shortened version of the hierarchy for BP, MF, and CC.
142
#### This will only print a path to a leaf GO Term once.
143
#### If the path appears a second time, the term is printed again, but its path is not.
144
#### The presence of a compressed (unprinted) paths is marked by using '=" instead of '-'.
145
####
146
####     $ wc -l hier_BP_MF_CC*.rpt
147
####
148
####           789583 hier_BP_MF_CC.rpt
149
####            70152 hier_BP_MF_CC_short.rpt
150
####
151
#### >>> python {SCR} --o=hier_BP_MF_CC_short.rpt --short
152
####
153
#### Print hierarchy
154
#### -  26894 GO:0008150	level-00	depth-00	biological_process [biological_process]
155
#### --    30 GO:0001906	level-01	depth-01	cell killing [biological_process]
156
#### --   555 GO:0002376	level-01	depth-01	immune system process [biological_process]
157
#### -- 11208 GO:0065007	level-01	depth-01	biological regulation [biological_process]
158
####
159
#### >>> python {SCR}
160
####
161
#### This program prints the hierarchy for all GO terms, if no argument is provided.
162
#### If a GO term is provided as an argument, then the hierarchy of all children
163
#### for that term is printed.
164
####
165
#### """.format(SCR='write_hierarchy')
166
167
168
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
169