Completed
Push — master ( 2119d7...1db109 )
by Haibao
35s
created

_get_geneid2symbol()   A

Complexity

Conditions 4

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
c 0
b 0
f 0
dl 0
loc 12
rs 9.2
1
#!/usr/bin/env python
2
"""Test that two different but equivalent fishers functions give the similar results."""
3
4
import sys
5
import os
6
from itertools import combinations
7
import collections as cx
8
import xlrd
9
10
from goatools.go_enrichment import GOEnrichmentStudy
11
from goatools.base import get_godag
12
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
13
from goatools.test_data.nature3102_goea import get_geneid2symbol
14
from goatools.associations import get_assoc_ncbi_taxids
15
16
def test_pvalcalc(prt=None):
17
    """Test P-value calculations."""
18
    pvalfnc_names = ['fisher', 'fisher_scipy_stats']
19
    fisher2pvals = _get_pvals(pvalfnc_names)
20
    _chk_pvals(fisher2pvals, prt)
21
22
def _chk_pvals(fisher2pvals, prt):
23
    for fish1, fish2 in combinations(fisher2pvals.keys(), 2):
24
        ctr = cx.Counter()
25
        pvals1 = cx.OrderedDict(sorted([(r.GO, r.p_uncorrected) for r in fisher2pvals[fish1]]))
26
        pvals2 = cx.OrderedDict(sorted([(r.GO, r.p_uncorrected) for r in fisher2pvals[fish2]]))
27
        assert len(pvals1) == len(pvals2)
28
        for go_id, pval1 in pvals1.items():
29
            pval2 = pvals2[go_id]
30
            ctr[pval1 == pval2] += 1
31
            if prt is not None:
32
                # Are values from 'fisher' and scipy stats 'fisher_exact' equivalent?
33
                if abs(pval1 - pval2) > 0.00001:
34
                    prt.write("{GO} {N1}({P1:4.2f}) {N2}({P2:4.2f}) {N1}({p1}) {N2}({p2})\n".format(
35
                        GO=go_id, N1=fish1, N2=fish2, P1=pval1, P2=pval2, p1=pval1, p2=pval2))
36
        if prt is not None:
37
            pat = "{N:>5,} {RES:5}: {N1} == {N2}\n"
38
            for val, cnt in ctr.most_common():
39
                prt.write(pat.format(N=cnt, RES=val, N1=fish1, N2=fish2))
40
41
def _get_pvals(pvalfnc_names, prt=sys.stdout):
42
    fisher2pvals = {}
43
    taxid = 10090 # Mouse study
44
    file_obo = os.path.join(os.getcwd(), "go-basic.obo")
45
    obo_dag = get_godag(file_obo, prt, loading_bar=None)
46
    geneids_pop = GeneID2nt_mus.keys()
47
    assoc_geneid2gos = get_assoc_ncbi_taxids([taxid], loading_bar=None)
48
    geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
49
    for fisher in pvalfnc_names:
50
        goeaobj = GOEnrichmentStudy(
51
            geneids_pop,
52
            assoc_geneid2gos,
53
            obo_dag,
54
            propagate_counts=False,
55
            alpha=0.05,
56
            methods=None,
57
            pvalcalc=fisher)
58
        fisher2pvals[fisher] = goeaobj.get_pval_uncorr(geneids_study, prt)
59
    return fisher2pvals
60
61
62
if __name__ == '__main__':
63
    test_pvalcalc(sys.stdout)
64