|
1
|
|
|
"""Options for calculating uncorrected p-values.""" |
|
2
|
|
|
|
|
3
|
|
|
from __future__ import print_function |
|
4
|
|
|
|
|
5
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved." |
|
6
|
|
|
__author__ = "DV Klopfenstein" |
|
7
|
|
|
|
|
8
|
|
|
import collections as cx |
|
9
|
|
|
import sys |
|
10
|
|
|
|
|
11
|
|
|
class PvalCalcBase(object): |
|
12
|
|
|
"""Base class for initial p-value calculations.""" |
|
13
|
|
|
|
|
14
|
|
|
def __init__(self, name, pval_fnc, log): |
|
15
|
|
|
self.log = log |
|
16
|
|
|
self.name = name |
|
17
|
|
|
self.pval_fnc = pval_fnc |
|
18
|
|
|
|
|
19
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
|
20
|
|
|
"""pvalues are calculated in derived classes.""" |
|
21
|
|
|
fnc_call = "calc_pvalue({SCNT}, {STOT}, {PCNT} {PTOT})".format( |
|
22
|
|
|
SCNT=study_count, STOT=study_n, PCNT=pop_count, PTOT=pop_n) |
|
23
|
|
|
raise Exception("NOT IMPLEMENTED: {FNC_CALL} using {FNC}.".format( |
|
24
|
|
|
FNC_CALL=fnc_call, FNC=self.pval_fnc)) |
|
25
|
|
|
|
|
26
|
|
|
|
|
27
|
|
|
class FisherClass(PvalCalcBase): |
|
28
|
|
|
"""From the 'fisher' package, use function, pvalue_population.""" |
|
29
|
|
|
|
|
30
|
|
|
def __init__(self, name, log): |
|
31
|
|
|
import fisher |
|
32
|
|
|
super(FisherClass, self).__init__(name, fisher.pvalue_population, log) |
|
33
|
|
|
|
|
34
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
|
35
|
|
|
"""Calculate uncorrected p-values.""" |
|
36
|
|
|
# k, n = study_true, study_tot, |
|
37
|
|
|
# K, N = population_true, population_tot |
|
38
|
|
|
# def pvalue_population(int k, int n, int K, int N): ... |
|
39
|
|
|
return self.pval_fnc(study_count, study_n, pop_count, pop_n).two_tail |
|
40
|
|
|
|
|
41
|
|
|
|
|
42
|
|
|
class FisherScipyStats(PvalCalcBase): |
|
43
|
|
|
"""From the scipy stats package, use function, fisher_exact.""" |
|
44
|
|
|
|
|
45
|
|
|
fmterr = "STUDY={A}/{B} POP={C}/{D} scnt({scnt}) stot({stot}) pcnt({pcnt}) ptot({ptot})" |
|
46
|
|
|
|
|
47
|
|
|
def __init__(self, name, log): |
|
48
|
|
|
from scipy import stats |
|
49
|
|
|
super(FisherScipyStats, self).__init__(name, stats.fisher_exact, log) |
|
50
|
|
|
|
|
51
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
|
52
|
|
|
"""Calculate uncorrected p-values.""" |
|
53
|
|
|
# http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.fisher_exact.html |
|
54
|
|
|
# |
|
55
|
|
|
# Atlantic Indian YES NO |
|
56
|
|
|
# whales 8 2 | 10 whales study_genes 8 scnt 2 | 10 = study_n |
|
57
|
|
|
# sharks 1 5 | 6 sharks not s_genes 1 5 | 6 |
|
58
|
|
|
# -------- ------ -------- ----- |
|
59
|
|
|
# 9 7 16 = pop_n pop_genes 9 pcnt 7 16 = pop_n |
|
60
|
|
|
# |
|
61
|
|
|
# We use the preceeding table to find the p-value for whales/sharks: |
|
62
|
|
|
# |
|
63
|
|
|
# >>> import scipy.stats as stats |
|
64
|
|
|
# >>> oddsratio, pvalue = stats.fisher_exact([[8, 2], [1, 5]]) |
|
65
|
|
|
# a b c d |
|
66
|
|
|
avar = study_count |
|
67
|
|
|
bvar = study_n - study_count |
|
68
|
|
|
cvar = pop_count - study_count |
|
69
|
|
|
dvar = pop_n - pop_count - bvar |
|
70
|
|
|
assert cvar >= 0, self.fmterr.format( |
|
71
|
|
|
A=avar, B=bvar, C=cvar, D=dvar, scnt=study_count, stot=study_n, pcnt=pop_count, ptot=pop_n) |
|
72
|
|
|
# stats.fisher_exact returns oddsratio, pval_uncorrected |
|
73
|
|
|
_, p_uncorrected = self.pval_fnc([[avar, bvar], [cvar, dvar]]) |
|
74
|
|
|
return p_uncorrected |
|
75
|
|
|
|
|
76
|
|
|
|
|
77
|
|
|
class FisherFactory(object): |
|
78
|
|
|
"""Factory for choosing a fisher function.""" |
|
79
|
|
|
|
|
80
|
|
|
options = cx.OrderedDict([ |
|
81
|
|
|
('fisher', FisherClass), |
|
82
|
|
|
('fisher_scipy_stats', FisherScipyStats), |
|
83
|
|
|
]) |
|
84
|
|
|
|
|
85
|
|
|
def __init__(self, **kws): |
|
86
|
|
|
self.log = kws['log'] if 'log' in kws else sys.stdout |
|
87
|
|
|
self.pval_fnc_name = kws["pvalcalc"] if "pvalcalc" in kws else "fisher" |
|
88
|
|
|
self.pval_obj = self._init_pval_obj() |
|
89
|
|
|
|
|
90
|
|
|
def _init_pval_obj(self): |
|
91
|
|
|
"""Returns a Fisher object based on user-input.""" |
|
92
|
|
|
if self.pval_fnc_name in self.options.keys(): |
|
93
|
|
|
try: |
|
94
|
|
|
fisher_obj = self.options[self.pval_fnc_name](self.pval_fnc_name, self.log) |
|
95
|
|
|
except ImportError: |
|
96
|
|
|
print("fisher module not installed. Falling back on scipy.stats.fisher_exact") |
|
97
|
|
|
fisher_obj = self.options['fisher_scipy_stats']('fisher_scipy_stats', self.log) |
|
98
|
|
|
|
|
99
|
|
|
return fisher_obj |
|
100
|
|
|
|
|
101
|
|
|
raise Exception("PVALUE FUNCTION({FNC}) NOT FOUND".format(FNC=self.pval_fnc_name)) |
|
102
|
|
|
|
|
103
|
|
|
def __str__(self): |
|
104
|
|
|
return " ".join(self.options.keys()) |
|
105
|
|
|
|
|
106
|
|
|
|
|
107
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved. |
|
108
|
|
|
|