Completed
Push — master ( b01e15...0fc7aa )
by
unknown
03:08
created

MgrNtGOEAs.get_goea_nts_prt()   A

Complexity

Conditions 3

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 8
rs 9.4285
1
"""Manage GOATOOLS GOEA namedtuples."""
2
3
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import collections as cx
7
8
9
def get_study_items(goea_results):
10
    """Get all study items found in a GOATOOLS GOEA (e.g., geneids)."""
11
    return MgrNtGOEAs(goea_results).get_study_items()
12
13
def get_goea_nts_prt(goea_results, **kws):
14
    """Get namedtuples containing user-specified (or default) data from GOATOOLS GOEA results."""
15
    return MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
16
17
18
class MgrNtGOEAs(object):
19
    """Manage GOATOOLS GOEA namedtuples."""
20
21
    def __init__(self, goea_results):
22
        self.goea_results = list(goea_results)
23
24
    def get_study_items(self):
25
        """Get all study items (e.g., geneids)."""
26
        study_items = set()
27
        for rec in self.goea_results:
28
            study_items |= rec.study_items
29
        return study_items
30
31
    def get_goea_nts_prt(self, fldnames=None, **usr_kws):
32
        """Return list of namedtuples removing fields which are redundant or verbose."""
33
        kws = usr_kws.copy()
34
        if 'not_fldnames' not in kws:
35
            kws['not_fldnames'] = ['goterm', 'parents', 'children', 'id']
36
        if 'rpt_fmt' not in kws:
37
            kws['rpt_fmt'] = True
38
        return self.get_goea_nts_all(fldnames, **kws)
39
40
    def get_goea_nts_all(self, fldnames=None, **kws):
41
        """Get namedtuples containing user-specified (or default) data from GOEA results.
42
43
            Reformats data from GOEnrichmentRecord objects into lists of
44
            namedtuples so the generic table writers may be used.
45
        """
46
        # kws: prt_if indent itemid2name(study_items)
47
        data_nts = [] # A list of namedtuples containing GOEA results
48
        if not self.goea_results:
49
            return data_nts
50
        keep_if = kws.get('keep_if', None)
51
        rpt_fmt = kws.get('rpt_fmt', False)
52
        indent = kws.get('indent', False)
53
        # I. FIELD (column) NAMES
54
        not_fldnames = kws.get('not_fldnames', None)
55
        if fldnames is None:
56
            fldnames = self._get_fieldnames(self.goea_results[0])
57
        # Ia. Explicitly exclude specific fields from named tuple
58
        if not_fldnames is not None:
59
            fldnames = [f for f in fldnames if f not in not_fldnames]
60
        nttyp = cx.namedtuple("NtGoeaResults", " ".join(fldnames))
61
        goid_idx = fldnames.index("GO") if 'GO' in fldnames else None
62
        # II. Loop through GOEA results stored in a GOEnrichmentRecord object
63
        for goerec in self.goea_results:
64
            vals = self._get_field_values(goerec, fldnames, rpt_fmt, kws.get('itemid2name', None))
65
            if indent:
66
                vals[goid_idx] = "".join([goerec.get_indent_dots(), vals[goid_idx]])
67
            ntobj = nttyp._make(vals)
68
            if keep_if is None or keep_if(goerec):
69
                data_nts.append(ntobj)
70
        return data_nts
71
72
    def mknts(self, add_dct):
73
        """Add information from add_dct to a new copy of namedtuples."""
74
        nts = []
75
        assert len(add_dct) == len(self.goea_results)
76
        flds = vars(next(iter(self.goea_results))).keys() + next(iter(add_dct)).keys()
77
        ntobj = cx.namedtuple("ntgoea", " ".join(flds))
78
        for dct_new, ntgoea in zip(add_dct, self.goea_results):
79
            dct_curr = ntgoea._asdict()
80
            for key, val in dct_new.items():
81
                dct_curr[key] = val
82
            nts.append(ntobj(**dct_curr))
83
        return nts
84
85
    def add_f2str(self, dcts, srcfld, dstfld, dstfmt):
86
        """Add a namedtuple field of type string generated from an existing namedtuple field."""
87
        # Example: f2str = objntmgr.add_f2str(dcts, "p_fdr_bh", "s_fdr_bh", "{:8.2e}")
88
        # ntobj = self.get_ntobj()
89
        # print(ntobj)
90
        assert len(dcts) == len(self.goea_results)
91
        for dct, ntgoea in zip(dcts, self.goea_results):
92
            valorig = getattr(ntgoea, srcfld)
93
            valstr = dstfmt.format(valorig)
94
            dct[dstfld] = valstr
95
96
    def get_ntobj(self):
97
        """Create namedtuple object with GOEA fields."""
98
        if self.goea_results:
99
            return cx.namedtuple("ntgoea", " ".join(vars(next(iter(self.goea_results))).keys()))
100
101
    def init_dicts(self):
102
        """Return a list of empty dicts to be filled with new data for revised namedtuples."""
103
        return [{} for _ in self.goea_results]
104
105
    @staticmethod
106
    def _get_field_values(item, fldnames, rpt_fmt=None, itemid2name=None):
107
        """Return fieldnames and values of either a namedtuple or GOEnrichmentRecord."""
108
        if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord
109
            return item.get_field_values(fldnames, rpt_fmt, itemid2name)
110
        if hasattr(item, "_fields"): # Is a namedtuple
111
            return [getattr(item, f) for f in fldnames]
112
113
    @staticmethod
114
    def _get_fieldnames(item):
115
        """Return fieldnames of either a namedtuple or GOEnrichmentRecord."""
116
        if hasattr(item, "_fldsdefprt"): # Is a GOEnrichmentRecord
117
            return item.get_prtflds_all()
118
        if hasattr(item, "_fields"): # Is a namedtuple
119
            return item._fields
120
121
# Copyright (C) 2010-2018, H Tang et al., All rights reserved.
122