1
|
|
|
"""Manage GOATOOLS GOEA namedtuples.""" |
2
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved." |
4
|
|
|
__author__ = "DV Klopfenstein" |
5
|
|
|
|
6
|
|
|
import collections as cx |
7
|
|
|
from goatools.rpt.nts_xfrm import MgrNts |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
def get_study_items(goea_results): |
11
|
|
|
"""Get all study items found in a GOATOOLS GOEA (e.g., geneids).""" |
12
|
|
|
return MgrNtGOEAs(goea_results).get_study_items() |
13
|
|
|
|
14
|
|
|
def get_goea_nts_prt(goea_results, **kws): |
15
|
|
|
"""Get namedtuples containing user-specified (or default) data from GOATOOLS GOEA results.""" |
16
|
|
|
return MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
class MgrNtGOEAs(object): |
20
|
|
|
"""Manage GOATOOLS GOEA namedtuples.""" |
21
|
|
|
|
22
|
|
|
def __init__(self, goea_results): |
23
|
|
|
self.goea_results = list(goea_results) |
24
|
|
|
|
25
|
|
|
def get_study_items(self): |
26
|
|
|
"""Get all study items (e.g., geneids).""" |
27
|
|
|
study_items = set() |
28
|
|
|
for rec in self.goea_results: |
29
|
|
|
study_items |= rec.study_items |
30
|
|
|
return study_items |
31
|
|
|
|
32
|
|
|
def get_nts_strpval(self, fmt="{:8.2e}"): |
33
|
|
|
"""Given GOEA namedtuples, return nts w/P-value in string format.""" |
34
|
|
|
objntmgr = MgrNts(self.goea_results) |
35
|
|
|
dcts = objntmgr.init_dicts() |
36
|
|
|
pval_flds = set(k for k in self._get_fieldnames(next(iter(self.goea_results))) if k[:2] == 'p_') |
37
|
|
|
for fld_float in pval_flds: |
38
|
|
|
fld_str = "s_" + fld_float[2:] |
39
|
|
|
objntmgr.add_f2str(dcts, fld_float, fld_str, fmt) |
40
|
|
|
return objntmgr.mknts(dcts) |
41
|
|
|
|
42
|
|
|
def get_goea_nts_prt(self, fldnames=None, **usr_kws): |
43
|
|
|
"""Return list of namedtuples removing fields which are redundant or verbose.""" |
44
|
|
|
kws = usr_kws.copy() |
45
|
|
|
if 'not_fldnames' not in kws: |
46
|
|
|
kws['not_fldnames'] = ['goterm', 'parents', 'children', 'id'] |
47
|
|
|
if 'rpt_fmt' not in kws: |
48
|
|
|
kws['rpt_fmt'] = True |
49
|
|
|
return self.get_goea_nts_all(fldnames, **kws) |
50
|
|
|
|
51
|
|
|
def get_goea_nts_all(self, fldnames=None, **kws): |
52
|
|
|
"""Get namedtuples containing user-specified (or default) data from GOEA results. |
53
|
|
|
|
54
|
|
|
Reformats data from GOEnrichmentRecord objects into lists of |
55
|
|
|
namedtuples so the generic table writers may be used. |
56
|
|
|
""" |
57
|
|
|
# kws: prt_if indent itemid2name(study_items) |
58
|
|
|
data_nts = [] # A list of namedtuples containing GOEA results |
59
|
|
|
if not self.goea_results: |
60
|
|
|
return data_nts |
61
|
|
|
keep_if = kws.get('keep_if', None) |
62
|
|
|
rpt_fmt = kws.get('rpt_fmt', False) |
63
|
|
|
indent = kws.get('indent', False) |
64
|
|
|
# I. FIELD (column) NAMES |
65
|
|
|
not_fldnames = kws.get('not_fldnames', None) |
66
|
|
|
if fldnames is None: |
67
|
|
|
fldnames = self._get_fieldnames(self.goea_results[0]) |
68
|
|
|
# Ia. Explicitly exclude specific fields from named tuple |
69
|
|
|
if not_fldnames is not None: |
70
|
|
|
fldnames = [f for f in fldnames if f not in not_fldnames] |
71
|
|
|
nttyp = cx.namedtuple("NtGoeaResults", " ".join(fldnames)) |
72
|
|
|
goid_idx = fldnames.index("GO") if 'GO' in fldnames else None |
73
|
|
|
# II. Loop through GOEA results stored in a GOEnrichmentRecord object |
74
|
|
|
for goerec in self.goea_results: |
75
|
|
|
vals = self._get_field_values(goerec, fldnames, rpt_fmt, kws.get('itemid2name', None)) |
76
|
|
|
if indent: |
77
|
|
|
vals[goid_idx] = "".join([goerec.get_indent_dots(), vals[goid_idx]]) |
78
|
|
|
ntobj = nttyp._make(vals) |
79
|
|
|
if keep_if is None or keep_if(goerec): |
80
|
|
|
data_nts.append(ntobj) |
81
|
|
|
return data_nts |
82
|
|
|
|
83
|
|
|
@staticmethod |
84
|
|
|
def _get_field_values(item, fldnames, rpt_fmt=None, itemid2name=None): |
85
|
|
|
"""Return fieldnames and values of either a namedtuple or GOEnrichmentRecord.""" |
86
|
|
|
if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord |
87
|
|
|
return item.get_field_values(fldnames, rpt_fmt, itemid2name) |
88
|
|
|
if hasattr(item, "_fields"): # Is a namedtuple |
89
|
|
|
return [getattr(item, f) for f in fldnames] |
90
|
|
|
|
91
|
|
|
@staticmethod |
92
|
|
|
def _get_fieldnames(item): |
93
|
|
|
"""Return fieldnames of either a namedtuple or GOEnrichmentRecord.""" |
94
|
|
|
if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord |
95
|
|
|
return item.get_prtflds_all() |
96
|
|
|
if hasattr(item, "_fields"): # Is a namedtuple |
97
|
|
|
return item._fields |
98
|
|
|
|
99
|
|
|
# Copyright (C) 2010-2018, H Tang et al., All rights reserved. |
100
|
|
|
|