1
|
|
|
"""Used to find all genes or gene products annotated w/GO terms that match a regex.""" |
2
|
|
|
|
3
|
|
|
import sys |
4
|
|
|
from goatools.obo_parser import GODag |
5
|
|
|
|
6
|
|
|
__copyright__ = "Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved." |
7
|
|
|
__author__ = "DV Klopfenstein" |
8
|
|
|
|
9
|
|
|
class GoSearch(object): |
10
|
|
|
"""Returns GOs matching a regex pattern.""" |
11
|
|
|
|
12
|
|
|
def __init__(self, fin_go_basic_obo, go2items, log=None): |
13
|
|
|
self.log = sys.stdout if log is None else log |
14
|
|
|
self.bstdout = True if log is None else log |
15
|
|
|
# Some obo fields often used in searching. Many are optional to load when reading obo |
16
|
|
|
self.goa_srch_hdrs = ['defn', 'comment', 'name', 'is_a', 'relationship', 'synonym', 'xref'] |
17
|
|
|
self.obo_dag = GODag(fin_go_basic_obo, optional_attrs=self.goa_srch_hdrs) |
18
|
|
|
self.go2items = go2items |
19
|
|
|
|
20
|
|
|
def get_matching_gos(self, compiled_pattern, **kws): |
21
|
|
|
"""Return all GOs which match the user regex pattern.""" |
22
|
|
|
# kws: prt gos |
23
|
|
|
matching_gos = [] |
24
|
|
|
obo_dag = self.obo_dag |
25
|
|
|
prt = kws['prt'] if 'prt' in kws else self.log |
26
|
|
|
prt.write('\nPATTERN SEARCH: "{P}"\n'.format(P=compiled_pattern.pattern)) |
27
|
|
|
# Only look through GOs in annotation or user-specified GOs |
28
|
|
|
srchgos = kws['gos'] if 'gos' in kws else self.go2items.keys() |
29
|
|
|
for go_id in srchgos: |
30
|
|
|
go_obj = obo_dag.get(go_id, None) |
31
|
|
|
if go_obj is not None: |
32
|
|
|
for hdr in self.goa_srch_hdrs: |
33
|
|
|
if hdr in go_obj.__dict__: |
34
|
|
|
fld_val = getattr(go_obj, hdr) |
35
|
|
|
matches = self._search_vals(compiled_pattern, fld_val) |
36
|
|
|
for mtch in matches: |
37
|
|
|
prt.write("MATCH {go_id}({NAME}) {FLD}: {M}\n".format( |
38
|
|
|
FLD=hdr, go_id=go_obj.id, NAME=go_obj.name, M=mtch)) |
39
|
|
|
if matches: |
40
|
|
|
matching_gos.append(go_id) |
41
|
|
|
else: |
42
|
|
|
prt.write("**WARNING: {GO} found in annotation is not found in obo\n".format( |
43
|
|
|
GO=go_id)) |
44
|
|
|
matching_gos = set(matching_gos) |
45
|
|
|
# Print summary message |
46
|
|
|
self._summary_matching_gos(prt, compiled_pattern.pattern, matching_gos, srchgos) |
47
|
|
|
return matching_gos |
48
|
|
|
|
49
|
|
|
@staticmethod |
50
|
|
|
def _summary_matching_gos(prt, pattern, matching_gos, all_gos): |
51
|
|
|
"""Print summary for get_matching_gos.""" |
52
|
|
|
msg = 'Found {N} GO(s) out of {M} matching pattern("{P}")\n' |
53
|
|
|
num_gos = len(matching_gos) |
54
|
|
|
num_all = len(all_gos) |
55
|
|
|
prt.write(msg.format(N=num_gos, M=num_all, P=pattern)) |
56
|
|
|
|
57
|
|
|
def _search_vals(self, compiled_pattern, fld_val): |
58
|
|
|
"""Search for user-regex in scalar or iterable data values.""" |
59
|
|
|
matches = [] |
60
|
|
|
if isinstance(fld_val, set): |
61
|
|
|
for val in fld_val: |
62
|
|
|
self._search_val(matches, compiled_pattern, val) |
63
|
|
|
elif isinstance(fld_val, str): |
64
|
|
|
self._search_val(matches, compiled_pattern, fld_val) |
65
|
|
|
return matches |
66
|
|
|
|
67
|
|
|
@staticmethod |
68
|
|
|
def _search_val(matches, compiled_pattern, fld_val): |
69
|
|
|
"""Search for user-regex in scalar data values.""" |
70
|
|
|
mtch = compiled_pattern.search(fld_val) |
71
|
|
|
if mtch: |
72
|
|
|
matches.append(fld_val) |
73
|
|
|
|
74
|
|
|
def add_children_gos(self, gos): |
75
|
|
|
"""Return children of input gos plus input gos.""" |
76
|
|
|
lst = [] |
77
|
|
|
obo_dag = self.obo_dag |
78
|
|
|
get_children = lambda go_obj: list(go_obj.get_all_children()) + [go_obj.id] |
79
|
|
|
for go_id in gos: |
80
|
|
|
go_obj = obo_dag[go_id] |
81
|
|
|
lst.extend(get_children(go_obj)) |
82
|
|
|
return set(lst) |
83
|
|
|
|
84
|
|
|
def get_items(self, gos): |
85
|
|
|
"""Given GO terms, return genes or gene products for the GOs.""" |
86
|
|
|
items = [] |
87
|
|
|
for go_id in gos: |
88
|
|
|
items.extend(self.go2items.get(go_id, [])) |
89
|
|
|
return set(items) |
90
|
|
|
|
91
|
|
|
# Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved. |
92
|
|
|
|