| 1 |  |  | """Print a GO term's lower-level hierarchy.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | __copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | __author__ = "DV Klopfenstein" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import collections as cx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from goatools.godag.consts import Consts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from goatools.gosubdag.go_paths import GoPaths | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | class WrHierGO(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |     """Write hierarchy object.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |     kws_dct = set(['max_indent', 'num_child']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |     kws_set = set(['no_indent', 'short']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     consts = Consts() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     def __init__(self, gosubdag, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |         self.gosubdag = gosubdag  # GoSubDag arg, children=True, must be used | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         self.usrdct = {k:v for k, v in kws.items() if k in kws} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |         self.usrset = set([k for k, v in kws.items() if k in kws and v]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         # ' {NS} {dcnt:6,} L{level:02} D{depth:02} {D1:5} {GO_name}' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     def prt_hier_all(self, prt=sys.stdout): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |         """Write hierarchy for all GO Terms in obo file.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         # Print: [biological_process, molecular_function, and cellular_component] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         for goid in ['GO:0008150', 'GO:0003674', 'GO:0005575']: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |             self.prt_hier_down(goid, prt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     def prt_hier_down(self, goid, prt=sys.stdout): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |         """Write hierarchy for all GO IDs below GO ID in arg, goid.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         obj = _WrHierPrt(self, prt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |         obj.prt_hier_rec(goid) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     def prt_hier_up(self, goids, prt=sys.stdout): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         """Write hierarchy for all GO IDs below GO ID in arg, goid.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |         go2goterm_all = {go:self.gosubdag.go2obj[go] for go in goids} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |         objp = GoPaths() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         for namespace, go2term_ns in self._get_namespace2go2term(go2goterm_all).items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |             go_root = self.consts.NAMESPACE2GO[namespace] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |             goids_all = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |             for goid, goterm in go2term_ns.items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |                 goids_all.add(goid) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |                 paths = objp.get_paths_from_to(goterm, goid_end=None, dn0_up1=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |                 goids_all.update(set(o.id for p in paths for o in p)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |             obj = _WrHierPrt(self, prt, goids_all, set(go2term_ns.keys())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |             obj.prt_hier_rec(go_root) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 49 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |     @staticmethod | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |     def _get_namespace2go2term(go2terms): | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |         """Group GO IDs by namespace.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |         namespace2go2term = cx.defaultdict(dict) | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |         for goid, goterm in go2terms.items(): | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |             namespace2go2term[goterm.namespace][goid] = goterm | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |         return namespace2go2term | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  | class _WrHierPrt(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     """Print GO hierarchy.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |     def __init__(self, obj, prt=sys.stdout, include_only=None, go_marks=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         self.gosubdag = obj.gosubdag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         self.max_indent = obj.usrdct.get('max_indent') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         # self.num_child = num_child | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         self.include_only = include_only  # if include_only else set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         self.go_marks = go_marks if go_marks else set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         self.short_prt = 'short' in obj.usrset | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         # vars | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         self.prt = prt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         self.gos_printed = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         self.prtfmt = self.gosubdag.prt_attr['fmta'].replace('{GO}{alt:1} # ', '') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     def prt_hier_rec(self, goid, depth=1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         """Write hierarchy for a GO Term record and all GO IDs down to the leaf level.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         ntgo = self.gosubdag.go2nt[goid] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         ntobj = self.gosubdag.go2obj[goid] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         # Shortens hierarchy report by only printing the hierarchy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         # for the sub-set of user-specified GO terms which are connected. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         if self.include_only is not None and goid not in self.include_only: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         nrp = self.short_prt and goid in self.gos_printed | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |         if self.go_marks: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |             self.prt.write('{} '.format('>' if goid in self.go_marks else ' ')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         # '-' is default character indicating hierarchy level | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         # '=' is used to indicate a hierarchical path printed in detail previously. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         dashgo = self._str_dashgoid(ntgo, depth, not nrp or not ntobj.children) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         self.prt.write('{DASHGO:{N}}'.format(DASHGO=dashgo, N=17)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         # if num_child is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         #     self.prt.write('{N:>5} '.format(N=len(self.get_all_children()))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         self.prt.write("{GO_INFO}\n".format(GO_INFO=self.prtfmt.format(**ntgo._asdict()))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         self.gos_printed.add(goid) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         # Do not print hierarchy below this turn if it has already been printed | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         if nrp: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         depth += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         if self.max_indent is not None and depth > self.max_indent: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         for child in ntobj.children: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |             self.prt_hier_rec(child.id, depth) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |     def _str_dash(depth, single_or_double): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         """Return a string containing dashes (optional) and GO ID.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |         # '-' is default character indicating hierarchy level | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |         # '=' is used to indicate a hierarchical path printed in detail previously. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         letter = '-' if single_or_double else '=' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         return ''.join([letter]*depth) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     def _str_dashgoid(self, ntgo, depth, single_or_double): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         """Return a string containing dashes (optional) and GO ID.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         dashes = self._str_dash(depth, single_or_double) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         return "{DASHES} {GO}{alt:1}".format(DASHES=dashes, GO=ntgo.GO, alt=ntgo.alt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  | #### Examples: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  | #### Print the hierarchy below Term, GO:0030663 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  | #### >>> python {SCR} GO:0030663 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  | #### - GO:0030663	level-05	depth-07	COPI-coated vesicle membrane [cellular_component] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  | #### -- GO:0012508	level-05	depth-08	Golgi to ER transport vesicle membrane [cellular_component] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  | #### -- GO:0012509	level-05	depth-08	inter-Golgi transport vesicle membrane [cellular_component] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  | #### Write the hierarchy below Term, GO:0030663 into a file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  | #### >>> python {SCR} GO:0030663 --o=hier_GO_0030663.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  | ####   WROTE: hier_GO_0030663.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  | #### Print the hierarchy for biological process, molecular_function, and cellular_component: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  | #### >>> python {SCR} --o=hier_BP_MF_CC.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  | #### Print hierarchy for BP, MF, CC only printing the first 2 levels. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  | #### >>> python {SCR} --max_indent=2 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  | #### >>> python {SCR} --max_indent=2 --dash_len=2 --num_child | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  | #### Print a shortened version of the hierarchy for BP, MF, and CC. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  | #### This will only print a path to a leaf GO Term once. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  | #### If the path appears a second time, the term is printed again, but its path is not. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  | #### The presence of a compressed (unprinted) paths is marked by using '=" instead of '-'. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  | ####     $ wc -l hier_BP_MF_CC*.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  | ####           789583 hier_BP_MF_CC.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  | ####            70152 hier_BP_MF_CC_short.rpt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  | #### >>> python {SCR} --o=hier_BP_MF_CC_short.rpt --short | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  | #### Print hierarchy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  | #### -  26894 GO:0008150	level-00	depth-00	biological_process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  | #### --    30 GO:0001906	level-01	depth-01	cell killing [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  | #### --   555 GO:0002376	level-01	depth-01	immune system process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  | #### -- 11208 GO:0065007	level-01	depth-01	biological regulation [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  | #### >>> python {SCR} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  | #### This program prints the hierarchy for all GO terms, if no argument is provided. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  | #### If a GO term is provided as an argument, then the hierarchy of all children | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  | #### for that term is printed. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  | #### | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  | #### """.format(SCR='write_hierarchy') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 168 |  |  | # Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved. | 
            
                                                        
            
                                    
            
            
                | 169 |  |  |  |