| 1 |  |  | """Used to find all genes or gene products annotated w/GO terms that match a regex.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from goatools.obo_parser import GODag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | __copyright__ = "Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | __author__ = "DV Klopfenstein" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | class GoSearch(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |     """Returns GOs matching a regex pattern.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     def __init__(self, fin_go_basic_obo, go2items, log=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |         self.log = sys.stdout if log is None else log | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |         self.bstdout = True if log is None else log | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |         # Some obo fields often used in searching. Many are optional to load when reading obo | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |         self.goa_srch_hdrs = ['defn', 'comment', 'name', 'is_a', 'relationship', 'synonym', 'xref'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |         self.obo_dag = GODag(fin_go_basic_obo, optional_attrs=self.goa_srch_hdrs) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |         self.go2items = go2items | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     def get_matching_gos(self, compiled_pattern, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         """Return all GOs which match the user regex pattern.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |         # kws: prt gos | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         matching_gos = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |         obo_dag = self.obo_dag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |         prt = kws['prt'] if 'prt' in kws else self.log | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |         prt.write('\nPATTERN SEARCH: "{P}"\n'.format(P=compiled_pattern.pattern)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |         # Only look through GOs in annotation or user-specified GOs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         srchgos = kws['gos'] if 'gos' in kws else self.go2items.keys() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         for go_id in srchgos: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |             go_obj = obo_dag.get(go_id, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |             if go_obj is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |                 for hdr in self.goa_srch_hdrs: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |                     if hdr in go_obj.__dict__: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |                         fld_val = getattr(go_obj, hdr) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |                         matches = self._search_vals(compiled_pattern, fld_val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |                         for mtch in matches: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |                             prt.write("MATCH {go_id}({NAME}) {FLD}: {M}\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |                                 FLD=hdr, go_id=go_obj.id, NAME=go_obj.name, M=mtch)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |                         if matches: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |                             matching_gos.append(go_id) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |                 prt.write("**WARNING: {GO} found in annotation is not found in obo\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |                     GO=go_id)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         matching_gos = set(matching_gos) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         # Print summary message | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         self._summary_matching_gos(prt, compiled_pattern.pattern, matching_gos, srchgos) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         return matching_gos | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  |     @staticmethod | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |     def _summary_matching_gos(prt, pattern, matching_gos, all_gos): | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |         """Print summary for get_matching_gos.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |         msg = 'Found {N} GO(s) out of {M} matching pattern("{P}")\n' | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |         num_gos = len(matching_gos) | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |         num_all = len(all_gos) | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |         prt.write(msg.format(N=num_gos, M=num_all, P=pattern)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     def _search_vals(self, compiled_pattern, fld_val): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         """Search for user-regex in scalar or iterable data values.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         matches = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |         if isinstance(fld_val, set): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |             for val in fld_val: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |                 self._search_val(matches, compiled_pattern, val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         elif isinstance(fld_val, str): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |             self._search_val(matches, compiled_pattern, fld_val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         return matches | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     def _search_val(matches, compiled_pattern, fld_val): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         """Search for user-regex in scalar data values.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         mtch = compiled_pattern.search(fld_val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         if mtch: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |             matches.append(fld_val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     def add_children_gos(self, gos): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         """Return children of input gos plus input gos.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         lst = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         obo_dag = self.obo_dag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |         get_children = lambda go_obj: list(go_obj.get_all_children()) + [go_obj.id] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         for go_id in gos: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |             go_obj = obo_dag[go_id] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             lst.extend(get_children(go_obj)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         return set(lst) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     def get_items(self, gos): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         """Given GO terms, return genes or gene products for the GOs.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         items = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         for go_id in gos: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |             items.extend(self.go2items.get(go_id, [])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         return set(items) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 91 |  |  | # Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved. | 
            
                                                        
            
                                    
            
            
                | 92 |  |  |  |