1
|
|
|
"""Manages GO Term fill colors and bordercolors.""" |
2
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved." |
4
|
|
|
__author__ = "DV Klopfenstein" |
5
|
|
|
|
6
|
|
|
import sys |
7
|
|
|
import collections as cx |
8
|
|
|
|
9
|
|
|
|
10
|
|
|
class GoeaResults(object): |
11
|
|
|
"""Manages GOEA Results for plotting.""" |
12
|
|
|
|
13
|
|
|
kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name']) |
14
|
|
|
|
15
|
|
|
dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms |
16
|
|
|
fmtres = "{study_count} genes" |
17
|
|
|
|
18
|
|
|
alpha2col = cx.OrderedDict([ |
19
|
|
|
# Enriched GOEA GO terms that are significant |
20
|
|
|
(0.005, 'mistyrose'), |
21
|
|
|
(0.010, 'moccasin'), |
22
|
|
|
(0.050, 'lemonchiffon1'), |
23
|
|
|
# GOEA GO terms that are not significant |
24
|
|
|
(1.000, 'grey95'), |
25
|
|
|
]) |
26
|
|
|
|
27
|
|
|
def __init__(self, goea_results, **kws): |
28
|
|
|
# kws: goea_results or go2nt |
29
|
|
|
assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS" |
30
|
|
|
# GOATOOLs results as objects (WAS: Kws goea_results go2nt) |
31
|
|
|
self.go2res = {r.GO: r for r in goea_results} |
32
|
|
|
self.is_goterm = hasattr(goea_results[0], "_fldsdefprt") |
33
|
|
|
# GOATOOLs results as a list of namedtuples |
34
|
|
|
self.pval_name = self._init_pval_name(**kws) |
35
|
|
|
self.study_items = kws.get('study_items', None) |
36
|
|
|
self.study_items_max = self._init_study_items_max() |
37
|
|
|
self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line |
38
|
|
|
self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} |
39
|
|
|
|
40
|
|
|
def prt_summary(self, prt=sys.stdout): |
41
|
|
|
"""Print summary of GOEA plotting object.""" |
42
|
|
|
desc = "NtGoeaResults" if self.is_goterm else "namedtuple" |
43
|
|
|
prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format( |
44
|
|
|
N=len(self.go2res), O=desc, P=self.pval_name)) |
45
|
|
|
|
46
|
|
|
def get_study_txt(self, goid): |
47
|
|
|
"""Get GO text from GOEA study.""" |
48
|
|
|
if goid in self.go2res: |
49
|
|
|
res = self.go2res[goid] |
50
|
|
|
if res.study_items is not None: |
51
|
|
|
return self._get_item_str(res) |
52
|
|
|
else: |
53
|
|
|
return self.fmtres.format(study_count=res.study_count) |
54
|
|
|
|
55
|
|
|
def set_goid2color_pval(self, goid2color): |
56
|
|
|
"""Fill missing colors based on p-value of an enriched GO term.""" |
57
|
|
|
alpha2col = self.alpha2col |
58
|
|
View Code Duplication |
if self.pval_name is not None: |
|
|
|
|
59
|
|
|
pval_name = self.pval_name |
60
|
|
|
for goid, res in self.go2res.items(): |
61
|
|
|
pval = getattr(res, pval_name, None) |
62
|
|
|
if pval is not None: |
63
|
|
|
for alpha, color in alpha2col.items(): |
64
|
|
|
if pval <= alpha and res.study_count != 0: |
65
|
|
|
if goid not in goid2color: |
66
|
|
|
goid2color[goid] = color |
67
|
|
|
|
68
|
|
|
def get_goid2color_pval(self): |
69
|
|
|
"""Return a go2color dict containing GO colors determined by P-value.""" |
70
|
|
|
go2color = {} |
71
|
|
|
self.set_goid2color_pval(go2color) |
72
|
|
|
color_dflt = self.alpha2col[1.000] |
73
|
|
|
for goid in self.go2res: |
74
|
|
|
if goid not in go2color: |
75
|
|
|
go2color[goid] = color_dflt |
76
|
|
|
return go2color |
77
|
|
|
|
78
|
|
View Code Duplication |
def _get_item_str(self, res): |
|
|
|
|
79
|
|
|
"""Return genes in any of these formats: |
80
|
|
|
1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 |
81
|
|
|
2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a |
82
|
|
|
3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... |
83
|
|
|
""" |
84
|
|
|
ipl = self.items_p_line |
85
|
|
|
prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items]) |
86
|
|
|
prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)] |
87
|
|
|
num_items = len(prt_items) |
88
|
|
|
if self.study_items_max is None: |
89
|
|
|
genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
90
|
|
|
return "{N}) {GENES}".format(N=num_items, GENES=genestr) |
91
|
|
|
else: |
92
|
|
|
if num_items <= self.study_items_max: |
93
|
|
|
gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] |
94
|
|
|
genestr = "\n".join(gene_lines) |
95
|
|
|
return genestr |
96
|
|
|
else: |
97
|
|
|
short_list = prt_items[:self.study_items_max] |
98
|
|
|
short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)] |
99
|
|
|
short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult] |
100
|
|
|
short_str = "\n".join(short_lines) |
101
|
|
|
return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) |
102
|
|
|
|
103
|
|
|
def _get_genestr(self, itemid): |
104
|
|
|
"""Given a geneid, return the string geneid or a gene symbol.""" |
105
|
|
|
if itemid in self.id2symbol: |
106
|
|
|
symbol = self.id2symbol[itemid] |
107
|
|
|
if symbol is not None: |
108
|
|
|
return symbol |
109
|
|
|
if isinstance(itemid, int): |
110
|
|
|
return str(itemid) |
111
|
|
|
return itemid |
112
|
|
|
|
113
|
|
|
|
114
|
|
|
def _init_pval_name(self, **kws): |
115
|
|
|
"""Initialize pvalue attribute name.""" |
116
|
|
|
if 'pval_name' in kws: |
117
|
|
|
return kws['pval_name'] |
118
|
|
|
# If go2res contains GO Terms |
119
|
|
|
if self.is_goterm: |
120
|
|
|
return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name()) |
121
|
|
|
# If go2res contains GO namedtuples |
122
|
|
|
for fld in next(iter(self.go2res.values()))._fields: |
123
|
|
|
if fld[:2] == 'p_' and fld != 'p_uncorrected': |
124
|
|
|
return fld |
125
|
|
|
|
126
|
|
|
def _init_study_items_max(self): |
127
|
|
|
"""User can limit the number of genes printed in a GO term.""" |
128
|
|
|
if self.study_items is None: |
129
|
|
|
return None |
130
|
|
|
if self.study_items is True: |
131
|
|
|
return None |
132
|
|
|
if isinstance(self.study_items, int): |
133
|
|
|
return self.study_items |
134
|
|
|
return None |
135
|
|
|
|
136
|
|
|
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved. |
137
|
|
|
|