|
1
|
|
|
"""Test Gene Ontology Enrichement Analysis.""" |
|
2
|
|
|
|
|
3
|
|
|
import sys |
|
4
|
|
|
import os |
|
5
|
|
|
from goatools.go_enrichment import GOEnrichmentStudy |
|
6
|
|
|
from goatools.associations import read_associations |
|
7
|
|
|
from goatools.base import get_godag |
|
8
|
|
|
|
|
9
|
|
|
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved." |
|
10
|
|
|
|
|
11
|
|
|
REPO = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") |
|
12
|
|
|
|
|
13
|
|
|
def test_unknown_gos(): |
|
14
|
|
|
"""Ensure that a study with only unknown GO Terms will run gracefully.""" |
|
15
|
|
|
#pylint: disable=bad-whitespace |
|
16
|
|
|
os.system("python {SCR} --alpha=0.05 {STUDY} {POP} {ASSN} --obo={OBO}".format( |
|
17
|
|
|
SCR ="{REPO}/scripts/find_enrichment.py".format(REPO=REPO), |
|
18
|
|
|
OBO ="{REPO}/go-basic.obo".format(REPO=REPO), |
|
19
|
|
|
STUDY="{REPO}/tests/data/study_unknown".format(REPO=REPO), |
|
20
|
|
|
POP ="{REPO}/tests/data/small_population".format(REPO=REPO), |
|
21
|
|
|
ASSN ="{REPO}/tests/data/small_association".format(REPO=REPO))) |
|
22
|
|
|
|
|
23
|
|
|
def test_goea_fdr_dflt(): |
|
24
|
|
|
"""Test GOEA with method, fdr. Print original summary""" |
|
25
|
|
|
goeaobj = get_goeaobj() |
|
26
|
|
|
study_fin = "{REPO}/tests/data/small_study".format(REPO=REPO) |
|
27
|
|
|
study_ids = [line.rstrip() for line in open(study_fin)] |
|
28
|
|
|
goea_results = goeaobj.run_study(study_ids) |
|
29
|
|
|
goeaobj.print_summary(goea_results) |
|
30
|
|
|
|
|
31
|
|
|
def test_goea_local(log=sys.stdout): |
|
32
|
|
|
"""Test GOEA with local multipletest correction methods for local.""" |
|
33
|
|
|
goeaobj = get_goeaobj() |
|
34
|
|
|
study_fin = "{REPO}/tests/data/small_study".format(REPO=REPO) |
|
35
|
|
|
study_ids = [line.rstrip() for line in open(study_fin)] |
|
36
|
|
|
# prt_if = lambda nt: nt.p_uncorrected < 0.00005 |
|
37
|
|
|
prt_if = None |
|
38
|
|
|
for method in ("fdr", "bonferroni", "sidak", "holm"): |
|
39
|
|
|
goea_results = goeaobj.run_study(study_ids, methods=[method]) |
|
40
|
|
|
# Make format_string. Examples: |
|
41
|
|
|
# "{NS} {p_uncorrected:5.3e} {p_fdr:5.3e} {name} ({study_count} gene(s))\n" |
|
42
|
|
|
# "{NS} {p_uncorrected:5.3e} {p_bonferroni:5.3e} {name} ({study_count} gene(s))\n" |
|
43
|
|
|
# "{NS} {p_uncorrected:5.3e} {p_sidak:5.3e} {name} ({study_count} gene(s))\n" |
|
44
|
|
|
fmtstr = "".join(["{NS} {p_uncorrected:5.3e} {", |
|
45
|
|
|
"p_{M}:5.3e".format(M=method), |
|
46
|
|
|
"} {name} ({study_count} gene(s))\n"]) |
|
47
|
|
|
goeaobj.prt_txt(log, goea_results, fmtstr, prt_if=prt_if) |
|
48
|
|
|
|
|
49
|
|
|
def test_goea_bonferroni(): |
|
50
|
|
|
"""Test GOEA with method, bonferroni.""" |
|
51
|
|
|
goeaobj = get_goeaobj(['bonferroni']) |
|
52
|
|
|
study_fin = "{REPO}/tests/data/small_study".format(REPO=REPO) |
|
53
|
|
|
study_ids = [line.rstrip() for line in open(study_fin)] |
|
54
|
|
|
|
|
55
|
|
|
fout_xlsx = "{REPO}/goea_bonferroni_usrflds.xlsx".format(REPO=REPO) |
|
56
|
|
|
goea_results = goeaobj.run_study(study_ids) |
|
57
|
|
|
prt_flds = ["GO", "NS", "enrichment", "name"] |
|
58
|
|
|
# Counts, ratios |
|
59
|
|
|
prt_flds.extend(["ratio_in_study", "ratio_in_pop"]) |
|
60
|
|
|
# These fields have the same info as: ratio_in_study ratio_in_pop |
|
61
|
|
|
prt_flds.extend(["study_count", "study_n", "pop_count", "pop_n"]) |
|
62
|
|
|
prt_flds.extend(["p_uncorrected", "depth", "p_bonferroni", "study_items"]) |
|
63
|
|
|
goeaobj.wr_xlsx(fout_xlsx, goea_results, prt_flds=prt_flds) |
|
64
|
|
|
|
|
65
|
|
|
# Only print if bonferonni value < 0.05 |
|
66
|
|
|
# prt_if = lambda nt: nt.p_bonferroni < 0.05 |
|
67
|
|
|
prt_if = None |
|
68
|
|
|
# Print to tab-separated table and Excel spreadsheet |
|
69
|
|
|
goeaobj.wr_tsv("{REPO}/goea_bonferroni.tsv".format(REPO=REPO), goea_results, prt_if=prt_if) |
|
70
|
|
|
# Print level in addition to all the regular fields |
|
71
|
|
|
# User can control which fields are printed and the order that they appear in the table |
|
72
|
|
|
prt_flds = "NS level GO name ratio_in_study ratio_in_pop p_uncorrected p_bonferroni".split() |
|
73
|
|
|
fout_xlsx = "{REPO}/goea_bonferroni_lev.xlsx".format(REPO=REPO) |
|
74
|
|
|
goeaobj.wr_xlsx(fout_xlsx, goea_results, prt_if=prt_if, prt_flds=prt_flds) |
|
75
|
|
|
|
|
76
|
|
|
def get_goeaobj(methods=None): |
|
77
|
|
|
"""Test GOEA with method, fdr.""" |
|
78
|
|
|
obo_fin = os.path.join(REPO, "go-basic.obo") |
|
79
|
|
|
obo_dag = get_godag(obo_fin, loading_bar=None) |
|
80
|
|
|
assoc = read_associations("{REPO}/tests/data/small_association".format(REPO=REPO), no_top=True) |
|
81
|
|
|
popul_fin = "{REPO}/tests/data/small_population".format(REPO=REPO) |
|
82
|
|
|
popul_ids = [line.rstrip() for line in open(popul_fin)] |
|
83
|
|
|
goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods) |
|
84
|
|
|
return goeaobj |
|
85
|
|
|
|
|
86
|
|
|
def run_all(): |
|
87
|
|
|
"""Run all local multiple tests.""" |
|
88
|
|
|
test_unknown_gos() |
|
89
|
|
|
test_goea_fdr_dflt() |
|
90
|
|
|
test_goea_local() |
|
91
|
|
|
test_goea_bonferroni() |
|
92
|
|
|
|
|
93
|
|
|
if __name__ == '__main__': |
|
94
|
|
|
run_all() |
|
95
|
|
|
|
|
96
|
|
|
# Copyright (C) 2010-2018, H Tang et al., All rights reserved. |
|
97
|
|
|
|