| 1 |  |  | """Options for calculating uncorrected p-values.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | from __future__ import print_function | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | __copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | __author__ = "DV Klopfenstein" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | import collections as cx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | class PvalCalcBase(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     """Base class for initial p-value calculations.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     def __init__(self, name, pval_fnc, log): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |         self.log = log | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |         self.name = name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |         self.pval_fnc = pval_fnc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |     def calc_pvalue(self, study_count, study_n, pop_count, pop_n): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |         """pvalues are calculated in derived classes.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         fnc_call = "calc_pvalue({SCNT}, {STOT}, {PCNT} {PTOT})".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |             SCNT=study_count, STOT=study_n, PCNT=pop_count, PTOT=pop_n) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         raise Exception("NOT IMPLEMENTED: {FNC_CALL} using {FNC}.".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |             FNC_CALL=fnc_call, FNC=self.pval_fnc)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | class FisherClass(PvalCalcBase): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     """From the 'fisher' package, use function, pvalue_population.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     def __init__(self, name, log): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |         import fisher | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |         super(FisherClass, self).__init__(name, fisher.pvalue_population, log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     def calc_pvalue(self, study_count, study_n, pop_count, pop_n): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         """Calculate uncorrected p-values.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |         # k, n = study_true, study_tot, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         # K, N = population_true, population_tot | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |         # def pvalue_population(int k, int n, int K, int N): ... | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |         return self.pval_fnc(study_count, study_n, pop_count, pop_n).two_tail | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  | class FisherScipyStats(PvalCalcBase): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     """From the scipy stats package, use function, fisher_exact.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     fmterr = "STUDY={A}/{B} POP={C}/{D} scnt({scnt}) stot({stot}) pcnt({pcnt}) ptot({ptot})" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     def __init__(self, name, log): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         from scipy import stats | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         super(FisherScipyStats, self).__init__(name, stats.fisher_exact, log) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 50 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |     def calc_pvalue(self, study_count, study_n, pop_count, pop_n): | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |         """Calculate uncorrected p-values.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |         # http://docs.scipy.org/doc/scipy-0.17.0/reference/generated/scipy.stats.fisher_exact.html | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |         # | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |         #         Atlantic  Indian                              YES       NO | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |         # whales     8        2    | 10 whales    study_genes    8 scnt   2    | 10 = study_n | 
            
                                                                        
                            
            
                                    
            
            
                | 57 |  |  |         # sharks     1        5    |  6 sharks    not s_genes    1        5    |  6 | 
            
                                                                        
                            
            
                                    
            
            
                | 58 |  |  |         #         --------  ------                            --------   ----- | 
            
                                                                        
                            
            
                                    
            
            
                | 59 |  |  |         #            9        7      16 = pop_n     pop_genes    9 pcnt   7      16 = pop_n | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |         # | 
            
                                                                        
                            
            
                                    
            
            
                | 61 |  |  |         # We use the preceeding table to find the p-value for whales/sharks: | 
            
                                                                        
                            
            
                                    
            
            
                | 62 |  |  |         # | 
            
                                                                        
                            
            
                                    
            
            
                | 63 |  |  |         # >>> import scipy.stats as stats | 
            
                                                                        
                            
            
                                    
            
            
                | 64 |  |  |         # >>> oddsratio, pvalue = stats.fisher_exact([[8, 2], [1, 5]]) | 
            
                                                                        
                            
            
                                    
            
            
                | 65 |  |  |         #                                              a  b    c  d | 
            
                                                                        
                            
            
                                    
            
            
                | 66 |  |  |         avar = study_count | 
            
                                                                        
                            
            
                                    
            
            
                | 67 |  |  |         bvar = study_n - study_count | 
            
                                                                        
                            
            
                                    
            
            
                | 68 |  |  |         cvar = pop_count - study_count | 
            
                                                                        
                            
            
                                    
            
            
                | 69 |  |  |         dvar = pop_n - pop_count - bvar | 
            
                                                                        
                            
            
                                    
            
            
                | 70 |  |  |         assert cvar >= 0, self.fmterr.format( | 
            
                                                                        
                            
            
                                    
            
            
                | 71 |  |  |             A=avar, B=bvar, C=cvar, D=dvar, scnt=study_count, stot=study_n, pcnt=pop_count, ptot=pop_n) | 
            
                                                                        
                            
            
                                    
            
            
                | 72 |  |  |         # stats.fisher_exact returns oddsratio, pval_uncorrected | 
            
                                                                        
                            
            
                                    
            
            
                | 73 |  |  |         _, p_uncorrected = self.pval_fnc([[avar, bvar], [cvar, dvar]]) | 
            
                                                                        
                            
            
                                    
            
            
                | 74 |  |  |         return p_uncorrected | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  | class FisherFactory(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     """Factory for choosing a fisher function.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |     options = cx.OrderedDict([ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |         ('fisher', FisherClass), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         ('fisher_scipy_stats', FisherScipyStats), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     ]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |     def __init__(self, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         self.log = kws['log'] if 'log' in kws else sys.stdout | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         self.pval_fnc_name = kws["pvalcalc"] if "pvalcalc" in kws else "fisher" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         self.pval_obj = self._init_pval_obj() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     def _init_pval_obj(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         """Returns a Fisher object based on user-input.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         if self.pval_fnc_name in self.options.keys(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |             try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                 fisher_obj = self.options[self.pval_fnc_name](self.pval_fnc_name, self.log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |             except ImportError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |                 print("fisher module not installed.  Falling back on scipy.stats.fisher_exact") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |                 fisher_obj = self.options['fisher_scipy_stats']('fisher_scipy_stats', self.log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |             return fisher_obj | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |         raise Exception("PVALUE FUNCTION({FNC}) NOT FOUND".format(FNC=self.pval_fnc_name)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |     def __str__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         return " ".join(self.options.keys()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 107 |  |  | # Copyright (C) 2016-2018, DV Klopfenstein, H Tang et al., All rights reserved. | 
            
                                                        
            
                                    
            
            
                | 108 |  |  |  |