|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
"""Test that two different but equivalent fishers functions give the similar results.""" |
|
3
|
|
|
|
|
4
|
|
|
import sys |
|
5
|
|
|
import os |
|
6
|
|
|
from itertools import combinations |
|
7
|
|
|
import collections as cx |
|
8
|
|
|
import xlrd |
|
9
|
|
|
|
|
10
|
|
|
from goatools.go_enrichment import GOEnrichmentStudy |
|
11
|
|
|
from goatools.base import get_godag |
|
12
|
|
|
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus |
|
13
|
|
|
from goatools.test_data.nature3102_goea import get_geneid2symbol |
|
14
|
|
|
from goatools.associations import get_assoc_ncbi_taxids |
|
15
|
|
|
|
|
16
|
|
|
def test_pvalcalc(prt=None): |
|
17
|
|
|
"""Test P-value calculations.""" |
|
18
|
|
|
pvalfnc_names = ['fisher', 'fisher_scipy_stats'] |
|
19
|
|
|
fisher2pvals = _get_pvals(pvalfnc_names) |
|
20
|
|
|
_chk_pvals(fisher2pvals, prt) |
|
21
|
|
|
|
|
22
|
|
|
def _chk_pvals(fisher2pvals, prt): |
|
23
|
|
|
for fish1, fish2 in combinations(fisher2pvals.keys(), 2): |
|
24
|
|
|
ctr = cx.Counter() |
|
25
|
|
|
pvals1 = cx.OrderedDict(sorted([(r.GO, r.p_uncorrected) for r in fisher2pvals[fish1]])) |
|
26
|
|
|
pvals2 = cx.OrderedDict(sorted([(r.GO, r.p_uncorrected) for r in fisher2pvals[fish2]])) |
|
27
|
|
|
assert len(pvals1) == len(pvals2) |
|
28
|
|
|
for go_id, pval1 in pvals1.items(): |
|
29
|
|
|
pval2 = pvals2[go_id] |
|
30
|
|
|
ctr[pval1 == pval2] += 1 |
|
31
|
|
|
if prt is not None: |
|
32
|
|
|
# Are values from 'fisher' and scipy stats 'fisher_exact' equivalent? |
|
33
|
|
|
if abs(pval1 - pval2) > 0.00001: |
|
34
|
|
|
prt.write("{GO} {N1}({P1:4.2f}) {N2}({P2:4.2f}) {N1}({p1}) {N2}({p2})\n".format( |
|
35
|
|
|
GO=go_id, N1=fish1, N2=fish2, P1=pval1, P2=pval2, p1=pval1, p2=pval2)) |
|
36
|
|
|
if prt is not None: |
|
37
|
|
|
pat = "{N:>5,} {RES:5}: {N1} == {N2}\n" |
|
38
|
|
|
for val, cnt in ctr.most_common(): |
|
39
|
|
|
prt.write(pat.format(N=cnt, RES=val, N1=fish1, N2=fish2)) |
|
40
|
|
|
|
|
41
|
|
|
def _get_pvals(pvalfnc_names, prt=sys.stdout): |
|
42
|
|
|
fisher2pvals = {} |
|
43
|
|
|
taxid = 10090 # Mouse study |
|
44
|
|
|
file_obo = os.path.join(os.getcwd(), "go-basic.obo") |
|
45
|
|
|
obo_dag = get_godag(file_obo, prt, loading_bar=None) |
|
46
|
|
|
geneids_pop = GeneID2nt_mus.keys() |
|
47
|
|
|
assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None) |
|
48
|
|
|
geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") |
|
49
|
|
|
for fisher in pvalfnc_names: |
|
50
|
|
|
goeaobj = GOEnrichmentStudy( |
|
51
|
|
|
geneids_pop, |
|
52
|
|
|
assoc_geneid2gos, |
|
53
|
|
|
obo_dag, |
|
54
|
|
|
propagate_counts=False, |
|
55
|
|
|
alpha=0.05, |
|
56
|
|
|
methods=None, |
|
57
|
|
|
pvalcalc=fisher) |
|
58
|
|
|
fisher2pvals[fisher] = goeaobj.get_pval_uncorr(geneids_study, prt) |
|
59
|
|
|
return fisher2pvals |
|
60
|
|
|
|
|
61
|
|
|
|
|
62
|
|
|
if __name__ == '__main__': |
|
63
|
|
|
test_pvalcalc(sys.stdout) |
|
64
|
|
|
|