|
1
|
|
|
"""Manage GOATOOLS GOEA namedtuples.""" |
|
2
|
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved." |
|
4
|
|
|
__author__ = "DV Klopfenstein" |
|
5
|
|
|
|
|
6
|
|
|
import collections as cx |
|
7
|
|
|
|
|
8
|
|
|
|
|
9
|
|
|
def get_study_items(goea_results): |
|
10
|
|
|
"""Get all study items found in a GOATOOLS GOEA (e.g., geneids).""" |
|
11
|
|
|
return MgrNtGOEAs(goea_results).get_study_items() |
|
12
|
|
|
|
|
13
|
|
|
def get_goea_nts_prt(goea_results, **kws): |
|
14
|
|
|
"""Get namedtuples containing user-specified (or default) data from GOATOOLS GOEA results.""" |
|
15
|
|
|
return MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
class MgrNtGOEAs(object): |
|
19
|
|
|
"""Manage GOATOOLS GOEA namedtuples.""" |
|
20
|
|
|
|
|
21
|
|
|
def __init__(self, goea_results): |
|
22
|
|
|
self.goea_results = list(goea_results) |
|
23
|
|
|
|
|
24
|
|
|
def get_study_items(self): |
|
25
|
|
|
"""Get all study items (e.g., geneids).""" |
|
26
|
|
|
study_items = set() |
|
27
|
|
|
for rec in self.goea_results: |
|
28
|
|
|
study_items |= rec.study_items |
|
29
|
|
|
return study_items |
|
30
|
|
|
|
|
31
|
|
|
def get_goea_nts_prt(self, fldnames=None, **usr_kws): |
|
32
|
|
|
"""Return list of namedtuples removing fields which are redundant or verbose.""" |
|
33
|
|
|
kws = usr_kws.copy() |
|
34
|
|
|
if 'not_fldnames' not in kws: |
|
35
|
|
|
kws['not_fldnames'] = ['goterm', 'parents', 'children', 'id'] |
|
36
|
|
|
if 'rpt_fmt' not in kws: |
|
37
|
|
|
kws['rpt_fmt'] = True |
|
38
|
|
|
return self.get_goea_nts_all(fldnames, **kws) |
|
39
|
|
|
|
|
40
|
|
|
def get_goea_nts_all(self, fldnames=None, **kws): |
|
41
|
|
|
"""Get namedtuples containing user-specified (or default) data from GOEA results. |
|
42
|
|
|
|
|
43
|
|
|
Reformats data from GOEnrichmentRecord objects into lists of |
|
44
|
|
|
namedtuples so the generic table writers may be used. |
|
45
|
|
|
""" |
|
46
|
|
|
# kws: prt_if indent itemid2name(study_items) |
|
47
|
|
|
data_nts = [] # A list of namedtuples containing GOEA results |
|
48
|
|
|
if not self.goea_results: |
|
49
|
|
|
return data_nts |
|
50
|
|
|
keep_if = kws.get('keep_if', None) |
|
51
|
|
|
rpt_fmt = kws.get('rpt_fmt', False) |
|
52
|
|
|
indent = kws.get('indent', False) |
|
53
|
|
|
# I. FIELD (column) NAMES |
|
54
|
|
|
not_fldnames = kws.get('not_fldnames', None) |
|
55
|
|
|
if fldnames is None: |
|
56
|
|
|
fldnames = self._get_fieldnames(self.goea_results[0]) |
|
57
|
|
|
# Ia. Explicitly exclude specific fields from named tuple |
|
58
|
|
|
if not_fldnames is not None: |
|
59
|
|
|
fldnames = [f for f in fldnames if f not in not_fldnames] |
|
60
|
|
|
nttyp = cx.namedtuple("NtGoeaResults", " ".join(fldnames)) |
|
61
|
|
|
goid_idx = fldnames.index("GO") if 'GO' in fldnames else None |
|
62
|
|
|
# II. Loop through GOEA results stored in a GOEnrichmentRecord object |
|
63
|
|
|
for goerec in self.goea_results: |
|
64
|
|
|
vals = self._get_field_values(goerec, fldnames, rpt_fmt, kws.get('itemid2name', None)) |
|
65
|
|
|
if indent: |
|
66
|
|
|
vals[goid_idx] = "".join([goerec.get_indent_dots(), vals[goid_idx]]) |
|
67
|
|
|
ntobj = nttyp._make(vals) |
|
68
|
|
|
if keep_if is None or keep_if(goerec): |
|
69
|
|
|
data_nts.append(ntobj) |
|
70
|
|
|
return data_nts |
|
71
|
|
|
|
|
72
|
|
|
def mknts(self, add_dct): |
|
73
|
|
|
"""Add information from add_dct to a new copy of namedtuples.""" |
|
74
|
|
|
nts = [] |
|
75
|
|
|
assert len(add_dct) == len(self.goea_results) |
|
76
|
|
|
flds = vars(next(iter(self.goea_results))).keys() + next(iter(add_dct)).keys() |
|
77
|
|
|
ntobj = cx.namedtuple("ntgoea", " ".join(flds)) |
|
78
|
|
|
for dct_new, ntgoea in zip(add_dct, self.goea_results): |
|
79
|
|
|
dct_curr = ntgoea._asdict() |
|
80
|
|
|
for key, val in dct_new.items(): |
|
81
|
|
|
dct_curr[key] = val |
|
82
|
|
|
nts.append(ntobj(**dct_curr)) |
|
83
|
|
|
return nts |
|
84
|
|
|
|
|
85
|
|
|
def add_f2str(self, dcts, srcfld, dstfld, dstfmt): |
|
86
|
|
|
"""Add a namedtuple field of type string generated from an existing namedtuple field.""" |
|
87
|
|
|
# Example: f2str = objntmgr.add_f2str(dcts, "p_fdr_bh", "s_fdr_bh", "{:8.2e}") |
|
88
|
|
|
# ntobj = self.get_ntobj() |
|
89
|
|
|
# print(ntobj) |
|
90
|
|
|
assert len(dcts) == len(self.goea_results) |
|
91
|
|
|
for dct, ntgoea in zip(dcts, self.goea_results): |
|
92
|
|
|
valorig = getattr(ntgoea, srcfld) |
|
93
|
|
|
valstr = dstfmt.format(valorig) |
|
94
|
|
|
dct[dstfld] = valstr |
|
95
|
|
|
|
|
96
|
|
|
def get_ntobj(self): |
|
97
|
|
|
"""Create namedtuple object with GOEA fields.""" |
|
98
|
|
|
if self.goea_results: |
|
99
|
|
|
return cx.namedtuple("ntgoea", " ".join(vars(next(iter(self.goea_results))).keys())) |
|
100
|
|
|
|
|
101
|
|
|
def init_dicts(self): |
|
102
|
|
|
"""Return a list of empty dicts to be filled with new data for revised namedtuples.""" |
|
103
|
|
|
return [{} for _ in self.goea_results] |
|
104
|
|
|
|
|
105
|
|
|
@staticmethod |
|
106
|
|
|
def _get_field_values(item, fldnames, rpt_fmt=None, itemid2name=None): |
|
107
|
|
|
"""Return fieldnames and values of either a namedtuple or GOEnrichmentRecord.""" |
|
108
|
|
|
if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord |
|
109
|
|
|
return item.get_field_values(fldnames, rpt_fmt, itemid2name) |
|
110
|
|
|
if hasattr(item, "_fields"): # Is a namedtuple |
|
111
|
|
|
return [getattr(item, f) for f in fldnames] |
|
112
|
|
|
|
|
113
|
|
|
@staticmethod |
|
114
|
|
|
def _get_fieldnames(item): |
|
115
|
|
|
"""Return fieldnames of either a namedtuple or GOEnrichmentRecord.""" |
|
116
|
|
|
if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord |
|
117
|
|
|
return item.get_prtflds_all() |
|
118
|
|
|
if hasattr(item, "_fields"): # Is a namedtuple |
|
119
|
|
|
return item._fields |
|
120
|
|
|
|
|
121
|
|
|
# Copyright (C) 2010-2018, H Tang et al., All rights reserved. |
|
122
|
|
|
|