Completed
Push — master ( 907c98...6ddfe6 )
by
unknown
56s
created

FisherScipyStats.calc_pvalue()   B

Complexity

Conditions 2

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
dl 0
loc 24
rs 8.9713
c 1
b 0
f 0
1
"""Options for calculating uncorrected p-values."""
2
3
from __future__ import print_function
4
5
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved."
6
__author__ = "DV Klopfenstein"
7
8
import collections as cx
9
import sys
10
11
class PvalCalcBase(object):
12
    """Base class for initial p-value calculations."""
13
14
    def __init__(self, name, pval_fnc, log):
15
        self.log = log
16
        self.name = name
17
        self.pval_fnc = pval_fnc
18
19
    def calc_pvalue(self, study_count, study_n, pop_count, pop_n):
20
        """pvalues are calculated in derived classes."""
21
        fnc_call = "calc_pvalue({SCNT}, {STOT}, {PCNT} {PTOT})".format(
22
            SCNT=study_count, STOT=study_n, PCNT=pop_count, PTOT=pop_n)
23
        raise Exception("NOT IMPLEMENTED: {FNC_CALL} using {FNC}.".format(
24
            FNC_CALL=fnc_call, FNC=self.pval_fnc))
25
26
27
class FisherClass(PvalCalcBase):
28
    """From the 'fisher' package, use function, pvalue_population."""
29
30
    def __init__(self, name, log):
31
        import fisher
32
        super(FisherClass, self).__init__(name, fisher.pvalue_population, log)
33
34
    def calc_pvalue(self, study_count, study_n, pop_count, pop_n):
35
        """Calculate uncorrected p-values."""
36
        # k, n = study_true, study_tot,
37
        # K, N = population_true, population_tot
38
        # def pvalue_population(int k, int n, int K, int N): ...
39
        return self.pval_fnc(study_count, study_n, pop_count, pop_n).two_tail
40
41
42
class FisherScipyStats(PvalCalcBase):
43
    """From the scipy stats package, use function, fisher_exact."""
44
45
    fmterr = "STUDY={A}/{B} POP={C}/{D} scnt({scnt}) stot({stot}) pcnt({pcnt}) ptot({ptot})"
46
47
    def __init__(self, name, log):
48
        from scipy import stats
49
        super(FisherScipyStats, self).__init__(name, stats.fisher_exact, log)
50
51
    def calc_pvalue(self, study_count, study_n, pop_count, pop_n):
52
        """Calculate uncorrected p-values."""
53
        # http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.fisher_exact.html
54
        #
55
        #         Atlantic  Indian                              YES       NO
56
        # whales     8        2    | 10 whales    study_genes    8 scnt   2    | 10 = study_n
57
        # sharks     1        5    |  6 sharks    not s_genes    1        5    |  6
58
        #         --------  ------                            --------   -----
59
        #            9        7      16 = pop_n     pop_genes    9 pcnt   7      16 = pop_n
60
        #
61
        # We use the preceeding table to find the p-value for whales/sharks:
62
        #
63
        # >>> import scipy.stats as stats
64
        # >>> oddsratio, pvalue = stats.fisher_exact([[8, 2], [1, 5]])
65
        #                                              a  b    c  d
66
        avar = study_count
67
        bvar = study_n - study_count
68
        cvar = pop_count - study_count
69
        dvar = pop_n - pop_count - bvar
70
        assert cvar >= 0, self.fmterr.format(
71
            A=avar, B=bvar, C=cvar, D=dvar, scnt=study_count, stot=study_n, pcnt=pop_count, ptot=pop_n)
72
        # stats.fisher_exact returns oddsratio, pval_uncorrected
73
        _, p_uncorrected = self.pval_fnc([[avar, bvar], [cvar, dvar]])
74
        return p_uncorrected
75
76
77
class FisherFactory(object):
78
    """Factory for choosing a fisher function."""
79
80
    options = cx.OrderedDict([
81
        ('fisher', FisherClass),
82
        ('fisher_scipy_stats', FisherScipyStats),
83
    ])
84
85
    def __init__(self, **kws):
86
        self.log = kws['log'] if 'log' in kws else sys.stdout
87
        self.pval_fnc_name = kws["pvalcalc"] if "pvalcalc" in kws else "fisher"
88
        self.pval_obj = self._init_pval_obj()
89
90
    def _init_pval_obj(self):
91
        """Returns a Fisher object based on user-input."""
92
        if self.pval_fnc_name in self.options.keys():
93
            try:
94
                fisher_obj = self.options[self.pval_fnc_name](self.pval_fnc_name, self.log)
95
            except ImportError:
96
                print("fisher module not installed.  Falling back on scipy.stats.fisher_exact")
97
                fisher_obj = self.options['fisher_scipy_stats']('fisher_scipy_stats', self.log)
98
99
            return fisher_obj
100
101
        raise Exception("PVALUE FUNCTION({FNC}) NOT FOUND".format(FNC=self.pval_fnc_name))
102
103
    def __str__(self):
104
        return " ".join(self.options.keys())
105
106
107
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved.
108