Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

GoSearch._search_val()   A

Complexity

Conditions 2

Size

Total Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 6
rs 10
c 0
b 0
f 0
1
"""Used to find all genes or gene products annotated w/GO terms that match a regex."""
2
3
import sys
4
from goatools.obo_parser import GODag
5
6
__copyright__ = "Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved."
7
__author__ = "DV Klopfenstein"
8
9
class GoSearch(object):
10
    """Returns GOs matching a regex pattern."""
11
12
    def __init__(self, fin_go_basic_obo, go2items, log=None):
13
        self.log = sys.stdout if log is None else log
14
        self.bstdout = True if log is None else log
15
        # Some obo fields often used in searching. Many are optional to load when reading obo
16
        self.goa_srch_hdrs = ['defn', 'comment', 'name', 'is_a', 'relationship', 'synonym', 'xref']
17
        self.obo_dag = GODag(fin_go_basic_obo, optional_attrs=self.goa_srch_hdrs)
18
        self.go2items = go2items
19
20
    def get_matching_gos(self, compiled_pattern, **kws):
21
        """Return all GOs which match the user regex pattern."""
22
        # kws: prt gos
23
        matching_gos = []
24
        obo_dag = self.obo_dag
25
        prt = kws['prt'] if 'prt' in kws else self.log
26
        prt.write('\nPATTERN SEARCH: "{P}"\n'.format(P=compiled_pattern.pattern))
27
        # Only look through GOs in annotation or user-specified GOs
28
        srchgos = kws['gos'] if 'gos' in kws else self.go2items.keys()
29
        for go_id in srchgos:
30
            go_obj = obo_dag.get(go_id, None)
31
            if go_obj is not None:
32
                for hdr in self.goa_srch_hdrs:
33
                    if hdr in go_obj.__dict__:
34
                        fld_val = getattr(go_obj, hdr)
35
                        matches = self._search_vals(compiled_pattern, fld_val)
36
                        for mtch in matches:
37
                            prt.write("MATCH {go_id}({NAME}) {FLD}: {M}\n".format(
38
                                FLD=hdr, go_id=go_obj.id, NAME=go_obj.name, M=mtch))
39
                        if matches:
40
                            matching_gos.append(go_id)
41
            else:
42
                prt.write("**WARNING: {GO} found in annotation is not found in obo\n".format(
43
                    GO=go_id))
44
        matching_gos = set(matching_gos)
45
        # Print summary message
46
        self._summary_matching_gos(prt, compiled_pattern.pattern, matching_gos, srchgos)
47
        return matching_gos
48
49
    @staticmethod
50
    def _summary_matching_gos(prt, pattern, matching_gos, all_gos):
51
        """Print summary for get_matching_gos."""
52
        msg = 'Found {N} GO(s) out of {M} matching pattern("{P}")\n'
53
        num_gos = len(matching_gos)
54
        num_all = len(all_gos)
55
        prt.write(msg.format(N=num_gos, M=num_all, P=pattern))
56
57
    def _search_vals(self, compiled_pattern, fld_val):
58
        """Search for user-regex in scalar or iterable data values."""
59
        matches = []
60
        if isinstance(fld_val, set):
61
            for val in fld_val:
62
                self._search_val(matches, compiled_pattern, val)
63
        elif isinstance(fld_val, str):
64
            self._search_val(matches, compiled_pattern, fld_val)
65
        return matches
66
67
    @staticmethod
68
    def _search_val(matches, compiled_pattern, fld_val):
69
        """Search for user-regex in scalar data values."""
70
        mtch = compiled_pattern.search(fld_val)
71
        if mtch:
72
            matches.append(fld_val)
73
74
    def add_children_gos(self, gos):
75
        """Return children of input gos plus input gos."""
76
        lst = []
77
        obo_dag = self.obo_dag
78
        get_children = lambda go_obj: list(go_obj.get_all_children()) + [go_obj.id]
79
        for go_id in gos:
80
            go_obj = obo_dag[go_id]
81
            lst.extend(get_children(go_obj))
82
        return set(lst)
83
84
    def get_items(self, gos):
85
        """Given GO terms, return genes or gene products for the GOs."""
86
        items = []
87
        for go_id in gos:
88
            items.extend(self.go2items.get(go_id, []))
89
        return set(items)
90
91
# Copyright (C) 2010-2018, DV Klopfenstein, H Tang, All rights reserved.
92