|
1
|
|
|
"""Used to find all genes or gene products annotated w/GO terms that match a regex.""" |
|
2
|
|
|
|
|
3
|
|
|
import sys |
|
4
|
|
|
from goatools.obo_parser import GODag |
|
5
|
|
|
|
|
6
|
|
|
__copyright__ = "Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved." |
|
7
|
|
|
__author__ = "DV Klopfenstein" |
|
8
|
|
|
|
|
9
|
|
|
class GoSearch(object): |
|
10
|
|
|
"""Returns GOs matching a regex pattern.""" |
|
11
|
|
|
|
|
12
|
|
|
def __init__(self, fin_go_basic_obo, go2items, log=None): |
|
13
|
|
|
self.log = sys.stdout if log is None else log |
|
14
|
|
|
self.bstdout = True if log is None else log |
|
15
|
|
|
# Some obo fields often used in searching. Many are optional to load when reading obo |
|
16
|
|
|
self.goa_srch_hdrs = ['defn', 'comment', 'name', 'is_a', 'relationship', 'synonym', 'xref'] |
|
17
|
|
|
self.obo_dag = GODag(fin_go_basic_obo, optional_attrs=self.goa_srch_hdrs) |
|
18
|
|
|
self.go2items = go2items |
|
19
|
|
|
|
|
20
|
|
|
def get_matching_gos(self, compiled_pattern, **kws): |
|
21
|
|
|
"""Return all GOs which match the user regex pattern.""" |
|
22
|
|
|
# kws: prt gos |
|
23
|
|
|
matching_gos = [] |
|
24
|
|
|
obo_dag = self.obo_dag |
|
25
|
|
|
prt = kws['prt'] if 'prt' in kws else self.log |
|
26
|
|
|
prt.write('\nPATTERN SEARCH: "{P}"\n'.format(P=compiled_pattern.pattern)) |
|
27
|
|
|
# Only look through GOs in annotation or user-specified GOs |
|
28
|
|
|
srchgos = kws['gos'] if 'gos' in kws else self.go2items.keys() |
|
29
|
|
|
for go_id in srchgos: |
|
30
|
|
|
go_obj = obo_dag.get(go_id, None) |
|
31
|
|
|
if go_obj is not None: |
|
32
|
|
|
for hdr in self.goa_srch_hdrs: |
|
33
|
|
|
if hdr in go_obj.__dict__: |
|
34
|
|
|
fld_val = getattr(go_obj, hdr) |
|
35
|
|
|
matches = self._search_vals(compiled_pattern, fld_val) |
|
36
|
|
|
for mtch in matches: |
|
37
|
|
|
prt.write("MATCH {go_id}({NAME}) {FLD}: {M}\n".format( |
|
38
|
|
|
FLD=hdr, go_id=go_obj.id, NAME=go_obj.name, M=mtch)) |
|
39
|
|
|
if matches: |
|
40
|
|
|
matching_gos.append(go_id) |
|
41
|
|
|
else: |
|
42
|
|
|
prt.write("**WARNING: {GO} found in annotation is not found in obo\n".format( |
|
43
|
|
|
GO=go_id)) |
|
44
|
|
|
matching_gos = set(matching_gos) |
|
45
|
|
|
# Print summary message |
|
46
|
|
|
self._summary_matching_gos(prt, compiled_pattern.pattern, matching_gos, srchgos) |
|
47
|
|
|
return matching_gos |
|
48
|
|
|
|
|
49
|
|
|
@staticmethod |
|
50
|
|
|
def _summary_matching_gos(prt, pattern, matching_gos, all_gos): |
|
51
|
|
|
"""Print summary for get_matching_gos.""" |
|
52
|
|
|
msg = 'Found {N} GO(s) out of {M} matching pattern("{P}")\n' |
|
53
|
|
|
num_gos = len(matching_gos) |
|
54
|
|
|
num_all = len(all_gos) |
|
55
|
|
|
prt.write(msg.format(N=num_gos, M=num_all, P=pattern)) |
|
56
|
|
|
|
|
57
|
|
|
def _search_vals(self, compiled_pattern, fld_val): |
|
58
|
|
|
"""Search for user-regex in scalar or iterable data values.""" |
|
59
|
|
|
matches = [] |
|
60
|
|
|
if isinstance(fld_val, set): |
|
61
|
|
|
for val in fld_val: |
|
62
|
|
|
self._search_val(matches, compiled_pattern, val) |
|
63
|
|
|
elif isinstance(fld_val, str): |
|
64
|
|
|
self._search_val(matches, compiled_pattern, fld_val) |
|
65
|
|
|
return matches |
|
66
|
|
|
|
|
67
|
|
|
@staticmethod |
|
68
|
|
|
def _search_val(matches, compiled_pattern, fld_val): |
|
69
|
|
|
"""Search for user-regex in scalar data values.""" |
|
70
|
|
|
mtch = compiled_pattern.search(fld_val) |
|
71
|
|
|
if mtch: |
|
72
|
|
|
matches.append(fld_val) |
|
73
|
|
|
|
|
74
|
|
|
def add_children_gos(self, gos): |
|
75
|
|
|
"""Return children of input gos plus input gos.""" |
|
76
|
|
|
lst = [] |
|
77
|
|
|
obo_dag = self.obo_dag |
|
78
|
|
|
get_children = lambda go_obj: list(go_obj.get_all_children()) + [go_obj.id] |
|
79
|
|
|
for go_id in gos: |
|
80
|
|
|
go_obj = obo_dag[go_id] |
|
81
|
|
|
lst.extend(get_children(go_obj)) |
|
82
|
|
|
return set(lst) |
|
83
|
|
|
|
|
84
|
|
|
def get_items(self, gos): |
|
85
|
|
|
"""Given GO terms, return genes or gene products for the GOs.""" |
|
86
|
|
|
items = [] |
|
87
|
|
|
for go_id in gos: |
|
88
|
|
|
items.extend(self.go2items.get(go_id, [])) |
|
89
|
|
|
return set(items) |
|
90
|
|
|
|
|
91
|
|
|
# Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved. |
|
92
|
|
|
|