_WrHierPrt.prt_hier_rec()   C
last analyzed

Complexity

Conditions 9

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
dl 0
loc 27
rs 6.6666
c 1
b 0
f 0
1
"""Print a GO term's lower-level hierarchy."""
2
3
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import sys
7
import collections as cx
8
from goatools.godag.consts import Consts
9
from goatools.gosubdag.go_paths import GoPaths
10
11
12
class WrHierGO(object):
13
    """Write hierarchy object."""
14
15
    kws_dct = set(['max_indent'])
16
    kws_set = set(['no_indent', 'concise'])
17
    consts = Consts()
18
19
    def __init__(self, gosubdag, **kws):
20
        self.gosubdag = gosubdag  # GoSubDag arg, children=True, must be used
21
        self.usrdct = {k:v for k, v in kws.items() if k in kws}
22
        self.usrset = set([k for k, v in kws.items() if k in kws and v])
23
        # ' {NS} {dcnt:6,} L{level:02} D{depth:02} {D1:5} {GO_name}'
24
25
    def prt_hier_all(self, prt=sys.stdout):
26
        """Write hierarchy for all GO Terms in obo file."""
27
        # Print: [biological_process, molecular_function, and cellular_component]
28
        gos_printed = set()
29
        for goid in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
30
            gos_printed.update(self.prt_hier_down(goid, prt))
31
        return gos_printed
32
33
    def prt_hier_down(self, goid, prt=sys.stdout):
34
        """Write hierarchy for all GO IDs below GO ID in arg, goid."""
35
        obj = _WrHierPrt(self, prt)
36
        obj.prt_hier_rec(goid)
37
        return obj.gos_printed
38
39
    def prt_hier_up(self, goids, prt=sys.stdout):
40
        """Write hierarchy for all GO IDs below GO ID in arg, goid."""
41
        go2goterm_all = {go:self.gosubdag.go2obj[go] for go in goids}
42
        objp = GoPaths()
43
        gos_printed = set()
44
        for namespace, go2term_ns in self._get_namespace2go2term(go2goterm_all).items():
45
            go_root = self.consts.NAMESPACE2GO[namespace]
46
            goids_all = set()  # GO IDs from user-specfied GO to root
47
            for goid, goterm in go2term_ns.items():
48
                goids_all.add(goid)
49
                paths = objp.get_paths_from_to(goterm, goid_end=None, dn0_up1=True)
50
                goids_all.update(set(o.id for p in paths for o in p))
51
            # Only include GO IDs from user-specified GO to the root
52
            if 'include_only' not in self.usrdct:
53
                self.usrdct['include_only'] = set()
54
            self.usrdct['include_only'].update(goids_all)
55
            # Mark the user-specfied GO term
56
            if 'go_marks' not in self.usrdct:
57
                self.usrdct['go_marks'] = set()
58
            self.usrdct['go_marks'].update(go2term_ns.keys())
59
            obj = _WrHierPrt(self, prt)  # , goids_all, set(go2term_ns.keys()))
60
            gos_printed.update(obj.gos_printed)
61
            obj.prt_hier_rec(go_root)
62
        return gos_printed
63
64
    @staticmethod
65
    def _get_namespace2go2term(go2terms):
66
        """Group GO IDs by namespace."""
67
        namespace2go2term = cx.defaultdict(dict)
68
        for goid, goterm in go2terms.items():
69
            namespace2go2term[goterm.namespace][goid] = goterm
70
        return namespace2go2term
71
72
73
# pylint: disable=too-many-instance-attributes,too-few-public-methods
74
class _WrHierPrt(object):
75
    """Print GO hierarchy."""
76
77
    def __init__(self, obj, prt=sys.stdout):
78
        self.gosubdag = obj.gosubdag
79
        self.max_indent = obj.usrdct.get('max_indent')
80
        self.include_only = obj.usrdct['include_only'] if 'include_only' in obj.usrdct else None
81
        self.go_marks = obj.usrdct['go_marks'] if 'go_marks' in obj.usrdct else set()
82
        self.concise_prt = 'concise' in obj.usrset
83
        self.indent = 'no_indent' not in obj.usrset
84
        # vars
85
        self.prt = prt
86
        self.gos_printed = set()
87
        self.prtfmt = self._init_prtfmt()
88
        self.dash_len = obj.usrdct.get('dash_len', 6) + 12
89
90
    def prt_hier_rec(self, goid, depth=1):
91
        """Write hierarchy for a GO Term record and all GO IDs down to the leaf level."""
92
        ntgo = self.gosubdag.go2nt[goid]
93
        ntobj = self.gosubdag.go2obj[goid]
94
        # Shortens hierarchy report by only printing the hierarchy
95
        # for the sub-set of user-specified GO terms which are connected.
96
        if self.include_only and goid not in self.include_only:
97
            return
98
        nrp = self.concise_prt and goid in self.gos_printed
99
        if self.go_marks:
100
            self.prt.write('{} '.format('>' if goid in self.go_marks else ' '))
101
102
        # '-' is default character indicating hierarchy level
103
        # '=' is used to indicate a hierarchical path printed in detail previously.
104
        dashgo = self._str_dashgoid(ntgo, depth, not nrp or not ntobj.children)
105
        self.prt.write('{DASHGO:{N}}'.format(DASHGO=dashgo, N=self.dash_len))
106
107
        self.prt.write("{GO_INFO}\n".format(GO_INFO=self.prtfmt.format(**ntgo._asdict())))
108
        self.gos_printed.add(goid)
109
        # Do not print hierarchy below this turn if it has already been printed
110
        if nrp:
111
            return
112
        depth += 1
113
        if self.max_indent is not None and depth > self.max_indent:
114
            return
115
        for child in ntobj.children:
116
            self.prt_hier_rec(child.id, depth)
117
118
    @staticmethod
119
    def _str_dash(depth, single_or_double):
120
        """Return a string containing dashes (optional) and GO ID."""
121
        # '-' is default character indicating hierarchy level
122
        # '=' is used to indicate a hierarchical path printed in detail previously.
123
        letter = '-' if single_or_double else '='
124
        return ''.join([letter]*depth)
125
126
    def _str_dashgoid(self, ntgo, depth, single_or_double):
127
        """Return a string containing dashes (optional) and GO ID."""
128
        dashes = self._str_dash(depth, single_or_double) if self.indent else ""
129
        return "{DASHES} {GO}{alt:1}".format(DASHES=dashes, GO=ntgo.GO, alt=ntgo.alt)
130
131
    def _init_prtfmt(self):
132
        """Initialize print format."""
133
        prtfmt = self.gosubdag.prt_attr['fmt']
134
        prtfmt = prtfmt.replace('{GO} # ', '')
135
        prtfmt = prtfmt.replace('{D1:5} ', '')
136
        return prtfmt
137
138
#### Examples:
139
####
140
#### Print the hierarchy below Term, GO:0030663
141
#### >>> python {SCR} GO:0030663
142
####
143
#### - GO:0030663	level-05	depth-07	COPI-coated vesicle membrane [cellular_component]
144
#### -- GO:0012508	level-05	depth-08	Golgi to ER transport vesicle membrane [cellular_component]
145
#### -- GO:0012509	level-05	depth-08	inter-Golgi transport vesicle membrane [cellular_component]
146
####
147
####
148
#### Write the hierarchy below Term, GO:0030663 into a file
149
#### >>> python {SCR} GO:0030663 --o=hier_GO_0030663.rpt
150
####
151
####   WROTE: hier_GO_0030663.rpt
152
####
153
#### Print the hierarchy for biological process, molecular_function, and cellular_component:
154
#### >>> python {SCR} --o=hier_BP_MF_CC.rpt
155
####
156
#### Print hierarchy for BP, MF, CC only printing the first 2 levels.
157
#### >>> python {SCR} --max_indent=2
158
#### >>> python {SCR} --max_indent=2 --dash_len=2
159
####
160
####
161
#### Print a conciseened version of the hierarchy for BP, MF, and CC.
162
#### This will only print a path to a leaf GO Term once.
163
#### If the path appears a second time, the term is printed again, but its path is not.
164
#### The presence of a compressed (unprinted) paths is marked by using '=" instead of '-'.
165
####
166
####     $ wc -l hier_BP_MF_CC*.rpt
167
####
168
####           789583 hier_BP_MF_CC.rpt
169
####            70152 hier_BP_MF_CC_concise.rpt
170
####
171
#### >>> python {SCR} --o=hier_BP_MF_CC_concise.rpt --concise
172
####
173
#### Print hierarchy
174
#### -  26894 GO:0008150	level-00	depth-00	biological_process [biological_process]
175
#### --    30 GO:0001906	level-01	depth-01	cell killing [biological_process]
176
#### --   555 GO:0002376	level-01	depth-01	immune system process [biological_process]
177
#### -- 11208 GO:0065007	level-01	depth-01	biological regulation [biological_process]
178
####
179
#### >>> python {SCR}
180
####
181
#### This program prints the hierarchy for all GO terms, if no argument is provided.
182
#### If a GO term is provided as an argument, then the hierarchy of all children
183
#### for that term is printed.
184
####
185
#### """.format(SCR='write_hierarchy')
186
187
188
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
189