1
|
|
|
"""Options for calculating uncorrected p-values.""" |
2
|
|
|
|
3
|
|
|
from __future__ import print_function |
4
|
|
|
|
5
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved." |
6
|
|
|
__author__ = "DV Klopfenstein" |
7
|
|
|
|
8
|
|
|
import collections as cx |
9
|
|
|
import sys |
10
|
|
|
|
11
|
|
|
class PvalCalcBase(object): |
12
|
|
|
"""Base class for initial p-value calculations.""" |
13
|
|
|
|
14
|
|
|
def __init__(self, name, pval_fnc, log): |
15
|
|
|
self.log = log |
16
|
|
|
self.name = name |
17
|
|
|
self.pval_fnc = pval_fnc |
18
|
|
|
|
19
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
20
|
|
|
"""pvalues are calculated in derived classes.""" |
21
|
|
|
fnc_call = "calc_pvalue({SCNT}, {STOT}, {PCNT} {PTOT})".format( |
22
|
|
|
SCNT=study_count, STOT=study_n, PCNT=pop_count, PTOT=pop_n) |
23
|
|
|
raise Exception("NOT IMPLEMENTED: {FNC_CALL} using {FNC}.".format( |
24
|
|
|
FNC_CALL=fnc_call, FNC=self.pval_fnc)) |
25
|
|
|
|
26
|
|
|
|
27
|
|
|
class FisherClass(PvalCalcBase): |
28
|
|
|
"""From the 'fisher' package, use function, pvalue_population.""" |
29
|
|
|
|
30
|
|
|
def __init__(self, name, log): |
31
|
|
|
import fisher |
32
|
|
|
super(FisherClass, self).__init__(name, fisher.pvalue_population, log) |
33
|
|
|
|
34
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
35
|
|
|
"""Calculate uncorrected p-values.""" |
36
|
|
|
# k, n = study_true, study_tot, |
37
|
|
|
# K, N = population_true, population_tot |
38
|
|
|
# def pvalue_population(int k, int n, int K, int N): ... |
39
|
|
|
return self.pval_fnc(study_count, study_n, pop_count, pop_n).two_tail |
40
|
|
|
|
41
|
|
|
|
42
|
|
|
class FisherScipyStats(PvalCalcBase): |
43
|
|
|
"""From the scipy stats package, use function, fisher_exact.""" |
44
|
|
|
|
45
|
|
|
fmterr = "STUDY={A}/{B} POP={C}/{D} scnt({scnt}) stot({stot}) pcnt({pcnt}) ptot({ptot})" |
46
|
|
|
|
47
|
|
|
def __init__(self, name, log): |
48
|
|
|
from scipy import stats |
49
|
|
|
super(FisherScipyStats, self).__init__(name, stats.fisher_exact, log) |
50
|
|
|
|
51
|
|
|
def calc_pvalue(self, study_count, study_n, pop_count, pop_n): |
52
|
|
|
"""Calculate uncorrected p-values.""" |
53
|
|
|
# http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.fisher_exact.html |
54
|
|
|
# |
55
|
|
|
# Atlantic Indian YES NO |
56
|
|
|
# whales 8 2 | 10 whales study_genes 8 scnt 2 | 10 = study_n |
57
|
|
|
# sharks 1 5 | 6 sharks not s_genes 1 5 | 6 |
58
|
|
|
# -------- ------ -------- ----- |
59
|
|
|
# 9 7 16 = pop_n pop_genes 9 pcnt 7 16 = pop_n |
60
|
|
|
# |
61
|
|
|
# We use the preceeding table to find the p-value for whales/sharks: |
62
|
|
|
# |
63
|
|
|
# >>> import scipy.stats as stats |
64
|
|
|
# >>> oddsratio, pvalue = stats.fisher_exact([[8, 2], [1, 5]]) |
65
|
|
|
# a b c d |
66
|
|
|
avar = study_count |
67
|
|
|
bvar = study_n - study_count |
68
|
|
|
cvar = pop_count - study_count |
69
|
|
|
dvar = pop_n - pop_count - bvar |
70
|
|
|
assert cvar >= 0, self.fmterr.format( |
71
|
|
|
A=avar, B=bvar, C=cvar, D=dvar, scnt=study_count, stot=study_n, pcnt=pop_count, ptot=pop_n) |
72
|
|
|
# stats.fisher_exact returns oddsratio, pval_uncorrected |
73
|
|
|
_, p_uncorrected = self.pval_fnc([[avar, bvar], [cvar, dvar]]) |
74
|
|
|
return p_uncorrected |
75
|
|
|
|
76
|
|
|
|
77
|
|
|
class FisherFactory(object): |
78
|
|
|
"""Factory for choosing a fisher function.""" |
79
|
|
|
|
80
|
|
|
options = cx.OrderedDict([ |
81
|
|
|
('fisher', FisherClass), |
82
|
|
|
('fisher_scipy_stats', FisherScipyStats), |
83
|
|
|
]) |
84
|
|
|
|
85
|
|
|
def __init__(self, **kws): |
86
|
|
|
self.log = kws['log'] if 'log' in kws else sys.stdout |
87
|
|
|
self.pval_fnc_name = kws["pvalcalc"] if "pvalcalc" in kws else "fisher" |
88
|
|
|
self.pval_obj = self._init_pval_obj() |
89
|
|
|
|
90
|
|
|
def _init_pval_obj(self): |
91
|
|
|
"""Returns a Fisher object based on user-input.""" |
92
|
|
|
if self.pval_fnc_name in self.options.keys(): |
93
|
|
|
try: |
94
|
|
|
fisher_obj = self.options[self.pval_fnc_name](self.pval_fnc_name, self.log) |
95
|
|
|
except ImportError: |
96
|
|
|
print("fisher module not installed. Falling back on scipy.stats.fisher_exact") |
97
|
|
|
fisher_obj = self.options['fisher_scipy_stats']('fisher_scipy_stats', self.log) |
98
|
|
|
|
99
|
|
|
return fisher_obj |
100
|
|
|
|
101
|
|
|
raise Exception("PVALUE FUNCTION({FNC}) NOT FOUND".format(FNC=self.pval_fnc_name)) |
102
|
|
|
|
103
|
|
|
def __str__(self): |
104
|
|
|
return " ".join(self.options.keys()) |
105
|
|
|
|
106
|
|
|
|
107
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved. |
108
|
|
|
|