|
1
|
|
|
"""Manages GO Term fill colors and bordercolors.""" |
|
2
|
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved." |
|
4
|
|
|
__author__ = "DV Klopfenstein" |
|
5
|
|
|
|
|
6
|
|
|
import sys |
|
7
|
|
|
import collections as cx |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
class GoeaResults(object): |
|
11
|
|
|
"""Manages GOEA Results for plotting.""" |
|
12
|
|
|
|
|
13
|
|
|
kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name']) |
|
14
|
|
|
|
|
15
|
|
|
dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms |
|
16
|
|
|
fmtres = "{study_count} genes" |
|
17
|
|
|
|
|
18
|
|
|
alpha2col = cx.OrderedDict([ |
|
19
|
|
|
# Enriched GOEA GO terms that are significant |
|
20
|
|
|
(0.005, 'mistyrose'), |
|
21
|
|
|
(0.010, 'moccasin'), |
|
22
|
|
|
(0.050, 'lemonchiffon1'), |
|
23
|
|
|
# GOEA GO terms that are not significant |
|
24
|
|
|
(1.000, 'grey95'), |
|
25
|
|
|
]) |
|
26
|
|
|
|
|
27
|
|
|
def __init__(self, goea_results, **kws): |
|
28
|
|
|
# kws: goea_results or go2nt |
|
29
|
|
|
assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS" |
|
30
|
|
|
# GOATOOLs results as objects (WAS: Kws goea_results go2nt) |
|
31
|
|
|
self.go2res = {r.GO: r for r in goea_results} |
|
32
|
|
|
self.is_goterm = hasattr(goea_results[0], "_fldsdefprt") |
|
33
|
|
|
# GOATOOLs results as a list of namedtuples |
|
34
|
|
|
self.pval_name = self._init_pval_name(**kws) |
|
35
|
|
|
self.study_items = kws.get('study_items', None) |
|
36
|
|
|
self.study_items_max = self._init_study_items_max() |
|
37
|
|
|
self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line |
|
38
|
|
|
self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} |
|
39
|
|
|
|
|
40
|
|
|
def prt_summary(self, prt=sys.stdout): |
|
41
|
|
|
"""Print summary of GOEA plotting object.""" |
|
42
|
|
|
desc = "NtGoeaResults" if self.is_goterm else "namedtuple" |
|
43
|
|
|
prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format( |
|
44
|
|
|
N=len(self.go2res), O=desc, P=self.pval_name)) |
|
45
|
|
|
|
|
46
|
|
|
def get_study_txt(self, goid): |
|
47
|
|
|
"""Get GO text from GOEA study.""" |
|
48
|
|
|
if goid in self.go2res: |
|
49
|
|
|
res = self.go2res[goid] |
|
50
|
|
|
if res.study_items is not None: |
|
51
|
|
|
return self._get_item_str(res) |
|
52
|
|
|
else: |
|
53
|
|
|
return self.fmtres.format(study_count=res.study_count) |
|
54
|
|
|
|
|
55
|
|
|
def set_goid2color_pval(self, goid2color): |
|
56
|
|
|
"""Fill missing colors based on p-value of an enriched GO term.""" |
|
57
|
|
|
alpha2col = self.alpha2col |
|
58
|
|
View Code Duplication |
if self.pval_name is not None: |
|
|
|
|
|
|
59
|
|
|
pval_name = self.pval_name |
|
60
|
|
|
for goid, res in self.go2res.items(): |
|
61
|
|
|
pval = getattr(res, pval_name, None) |
|
62
|
|
|
if pval is not None: |
|
63
|
|
|
for alpha, color in alpha2col.items(): |
|
64
|
|
|
if pval <= alpha and res.study_count != 0: |
|
65
|
|
|
if goid not in goid2color: |
|
66
|
|
|
goid2color[goid] = color |
|
67
|
|
|
|
|
68
|
|
|
def get_goid2color_pval(self): |
|
69
|
|
|
"""Return a go2color dict containing GO colors determined by P-value.""" |
|
70
|
|
|
go2color = {} |
|
71
|
|
|
self.set_goid2color_pval(go2color) |
|
72
|
|
|
color_dflt = self.alpha2col[1.000] |
|
73
|
|
|
for goid in self.go2res: |
|
74
|
|
|
if goid not in go2color: |
|
75
|
|
|
go2color[goid] = color_dflt |
|
76
|
|
|
return go2color |
|
77
|
|
|
|
|
78
|
|
View Code Duplication |
def _get_item_str(self, res): |
|
|
|
|
|
|
79
|
|
|
"""Return genes in any of these formats: |
|
80
|
|
|
1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 |
|
81
|
|
|
2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a |
|
82
|
|
|
3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... |
|
83
|
|
|
""" |
|
84
|
|
|
ipl = self.items_p_line |
|
85
|
|
|
prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items]) |
|
86
|
|
|
prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)] |
|
87
|
|
|
num_items = len(prt_items) |
|
88
|
|
|
if self.study_items_max is None: |
|
89
|
|
|
genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
|
90
|
|
|
return "{N}) {GENES}".format(N=num_items, GENES=genestr) |
|
91
|
|
|
else: |
|
92
|
|
|
if num_items <= self.study_items_max: |
|
93
|
|
|
gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] |
|
94
|
|
|
genestr = "\n".join(gene_lines) |
|
95
|
|
|
return genestr |
|
96
|
|
|
else: |
|
97
|
|
|
short_list = prt_items[:self.study_items_max] |
|
98
|
|
|
short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)] |
|
99
|
|
|
short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult] |
|
100
|
|
|
short_str = "\n".join(short_lines) |
|
101
|
|
|
return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) |
|
102
|
|
|
|
|
103
|
|
|
def _get_genestr(self, itemid): |
|
104
|
|
|
"""Given a geneid, return the string geneid or a gene symbol.""" |
|
105
|
|
|
if itemid in self.id2symbol: |
|
106
|
|
|
symbol = self.id2symbol[itemid] |
|
107
|
|
|
if symbol is not None: |
|
108
|
|
|
return symbol |
|
109
|
|
|
if isinstance(itemid, int): |
|
110
|
|
|
return str(itemid) |
|
111
|
|
|
return itemid |
|
112
|
|
|
|
|
113
|
|
|
|
|
114
|
|
|
def _init_pval_name(self, **kws): |
|
115
|
|
|
"""Initialize pvalue attribute name.""" |
|
116
|
|
|
if 'pval_name' in kws: |
|
117
|
|
|
return kws['pval_name'] |
|
118
|
|
|
# If go2res contains GO Terms |
|
119
|
|
|
if self.is_goterm: |
|
120
|
|
|
return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name()) |
|
121
|
|
|
# If go2res contains GO namedtuples |
|
122
|
|
|
for fld in next(iter(self.go2res.values()))._fields: |
|
123
|
|
|
if fld[:2] == 'p_' and fld != 'p_uncorrected': |
|
124
|
|
|
return fld |
|
125
|
|
|
|
|
126
|
|
|
def _init_study_items_max(self): |
|
127
|
|
|
"""User can limit the number of genes printed in a GO term.""" |
|
128
|
|
|
if self.study_items is None: |
|
129
|
|
|
return None |
|
130
|
|
|
if self.study_items is True: |
|
131
|
|
|
return None |
|
132
|
|
|
if isinstance(self.study_items, int): |
|
133
|
|
|
return self.study_items |
|
134
|
|
|
return None |
|
135
|
|
|
|
|
136
|
|
|
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved. |
|
137
|
|
|
|