| 1 |  |  | #!/usr/bin/env python | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | # -*- coding: UTF-8 -*- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | python %prog study.file population.file gene-association.file | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | This program returns P-values for functional enrichment in a cluster of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | study genes using Fisher's exact test, and corrected for multiple testing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | (including Bonferroni, Holm, Sidak, and false discovery rate) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | from __future__ import print_function | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | from __future__ import absolute_import | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | __copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | __author__ = "various" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | import collections as cx | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  | import datetime | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | from goatools.multiple_testing import Methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  | from goatools.multiple_testing import Bonferroni | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  | from goatools.multiple_testing import Sidak | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  | from goatools.multiple_testing import HolmBonferroni | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | from goatools.multiple_testing import FDR | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  | from goatools.multiple_testing import calc_qval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  | from goatools.ratio import get_terms, count_terms, is_ratio_different | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  | import goatools.wr_tbl as RPT | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | from goatools.pvalcalc import FisherFactory | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  | from goatools.rpt.goea_nt_xfrm import MgrNtGOEAs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  | class GOEnrichmentRecord(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     """Represents one result (from a single GOTerm) in the GOEnrichmentStudy | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |     namespace2NS = cx.OrderedDict([ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |         ('biological_process', 'BP'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         ('molecular_function', 'MF'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         ('cellular_component', 'CC')]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |     # Fields seen in every enrichment result | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     _fldsdefprt = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |         "GO", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         "NS", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         "enrichment", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         "name", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         "ratio_in_study", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         "ratio_in_pop", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         "p_uncorrected", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         "depth", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         "study_count", | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         "study_items"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |     _fldsdeffmt = ["%s"]*3 + ["%-30s"] + ["%d/%d"] * 2 + ["%.3g"] + ["%d"] * 2 + ["%15s"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |     _flds = set(_fldsdefprt).intersection( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         set(['study_items', 'study_count', 'study_n', 'pop_items', 'pop_count', 'pop_n'])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     def __init__(self, **kwargs): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |         # Methods seen in current enrichment result | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         self.method_flds = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         for key, val in kwargs.items(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |             setattr(self, key, val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |             if key == 'ratio_in_study': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |                 setattr(self, 'study_count', val[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |                 setattr(self, 'study_n', val[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |             if key == 'ratio_in_pop': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |                 setattr(self, 'pop_count', val[0]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |                 setattr(self, 'pop_n', val[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         self._init_enrichment() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |         self.goterm = None  # the reference to the GOTerm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |     def get_method_name(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         """Return name of first method in the method_flds list.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |         return self.method_flds[0].fieldname | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |     def get_pvalue(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |         """Returns pval for 1st method, if it exists. Else returns uncorrected pval.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |         if self.method_flds: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             return getattr(self, "p_{m}".format(m=self.get_method_name())) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |         return getattr(self, "p_uncorrected") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     def set_corrected_pval(self, nt_method, pvalue): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         """Add object attribute based on method name.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         self.method_flds.append(nt_method) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         fieldname = "".join(["p_", nt_method.fieldname]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         setattr(self, fieldname, pvalue) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     def __str__(self, indent=False): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         field_data = [getattr(self, f, "n.a.") for f in self._fldsdefprt[:-1]] + \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |                      [getattr(self, "p_{}".format(m.fieldname)) for m in self.method_flds] + \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |                      [", ".join(sorted(getattr(self, self._fldsdefprt[-1], set())))] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         fldsdeffmt = self._fldsdeffmt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         field_formatter = fldsdeffmt[:-1] + ["%.3g"]*len(self.method_flds) + [fldsdeffmt[-1]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |         self._chk_fields(field_data, field_formatter) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |         # default formatting only works for non-"n.a" data | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         for idx, fld in enumerate(field_data): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |             if fld == "n.a.": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |                 field_formatter[idx] = "%s" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |         # print dots to show the level of the term | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |         dots = self.get_indent_dots() if indent else "" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |         prtdata = "\t".join(a % b for (a, b) in zip(field_formatter, field_data)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         return "".join([dots, prtdata]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     def get_indent_dots(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |         """Get a string of dots ("....") representing the level of the GO term.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         return "." * self.goterm.level if self.goterm is not None else "" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |     def _chk_fields(field_data, field_formatter): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |         """Check that expected fields are present.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         if len(field_data) == len(field_formatter): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |         len_dat = len(field_data) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         len_fmt = len(field_formatter) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         msg = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |             "FIELD DATA({d}) != FORMATTER({f})".format(d=len_dat, f=len_fmt), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |             "DAT({N}): {D}".format(N=len_dat, D=field_data), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |             "FMT({N}): {F}".format(N=len_fmt, F=field_formatter)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         raise Exception("\n".join(msg)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |     def __repr__(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         return "GOEnrichmentRecord({GO})".format(GO=self.GO) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     def set_goterm(self, go2obj): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         """Set goterm and copy GOTerm's name and namespace.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |         self.goterm = go2obj.get(self.GO, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |         present = self.goterm is not None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |         self.name = self.goterm.name if present else "n.a." | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |         self.NS = self.namespace2NS[self.goterm.namespace] if present else "XX" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     def _init_enrichment(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |         """Mark as 'enriched' or 'purified'.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |         self.enrichment = 'e' if ((1.0 * self.study_count / self.study_n) > | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |                                   (1.0 * self.pop_count / self.pop_n)) else 'p' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |     def update_remaining_fldsdefprt(self, min_ratio=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |         """Finish updating self (GOEnrichmentRecord) field, is_ratio_different.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         self.is_ratio_different = is_ratio_different(min_ratio, self.study_count, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |                                                      self.study_n, self.pop_count, self.pop_n) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |     # ------------------------------------------------------------------------------------- | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |     # Methods for getting flat namedtuple values from GOEnrichmentRecord object | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |     def get_prtflds_default(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |         """Get default fields.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |         return self._fldsdefprt[:-1] + \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |                ["p_{M}".format(M=m.fieldname) for m in self.method_flds] + \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |                [self._fldsdefprt[-1]] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |     def get_prtflds_all(self): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         """When converting to a namedtuple, get all possible fields in their original order.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |         flds = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |         dont_add = set(['_parents', 'method_flds', 'relationship_rev', 'relationship']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |         # Fields: GO NS enrichment name ratio_in_study ratio_in_pop p_uncorrected | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |         #         depth study_count p_sm_bonferroni p_fdr_bh study_items | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |         self._flds_append(flds, self.get_prtflds_default(), dont_add) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |         # Fields: GO NS goterm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 162 |  |  |         #         ratio_in_pop pop_n pop_count pop_items name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 163 |  |  |         #         ratio_in_study study_n study_count study_items | 
            
                                                                                                            
                            
            
                                    
            
            
                | 164 |  |  |         #         method_flds enrichment p_uncorrected p_sm_bonferroni p_fdr_bh | 
            
                                                                                                            
                            
            
                                    
            
            
                | 165 |  |  |         self._flds_append(flds, vars(self).keys(), dont_add) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 166 |  |  |         # Fields: name level is_obsolete namespace id depth parents children _parents alt_ids | 
            
                                                                                                            
                            
            
                                    
            
            
                | 167 |  |  |         self._flds_append(flds, vars(self.goterm).keys(), dont_add) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 168 |  |  |         return flds | 
            
                                                                                                            
                            
            
                                    
            
            
                | 169 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 170 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 171 |  |  |     def _flds_append(flds, addthese, dont_add): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 172 |  |  |         """Retain order of fields as we add them once to the list.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 173 |  |  |         for fld in addthese: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 174 |  |  |             if fld not in flds and fld not in dont_add: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 175 |  |  |                 flds.append(fld) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 176 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 177 |  |  |     def get_field_values(self, fldnames, rpt_fmt=True, itemid2name=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 178 |  |  |         """Get flat namedtuple fields for one GOEnrichmentRecord.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 179 |  |  |         row = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 180 |  |  |         # Loop through each user field desired | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |         for fld in fldnames: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |             # 1. Check the GOEnrichmentRecord's attributes | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |             val = getattr(self, fld, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |             if val is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |                 if rpt_fmt: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |                     val = self._get_rpt_fmt(fld, val, itemid2name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |                 row.append(val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |             else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |                 # 2. Check the GO object for the field | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |                 val = getattr(self.goterm, fld, None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |                 if rpt_fmt: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |                     val = self._get_rpt_fmt(fld, val, itemid2name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |                 if val is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |                     row.append(val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |                 else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |                     # 3. Field not found, raise Exception | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |                     self._err_fld(fld, fldnames) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |             if rpt_fmt: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |                 assert not isinstance(val, list), \ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |                    "UNEXPECTED LIST: FIELD({F}) VALUE({V}) FMT({P})".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |                        P=rpt_fmt, F=fld, V=val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |         return row | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |     def _get_rpt_fmt(fld, val, itemid2name=None): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 206 |  |  |         """Return values in a format amenable to printing in a table.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 207 |  |  |         if fld.startswith("ratio_"): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 208 |  |  |             return "{N}/{TOT}".format(N=val[0], TOT=val[1]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 209 |  |  |         elif fld in set(['study_items', 'pop_items', 'alt_ids']): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 210 |  |  |             if itemid2name is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 211 |  |  |                 val = [itemid2name.get(v, v) for v in val] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 212 |  |  |             return ", ".join([str(v) for v in sorted(val)]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 213 |  |  |         return val | 
            
                                                                                                            
                            
            
                                    
            
            
                | 214 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 215 |  |  |     def _err_fld(self, fld, fldnames): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 216 |  |  |         """Unrecognized field. Print detailed Failure message.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 217 |  |  |         msg = ['ERROR. UNRECOGNIZED FIELD({F})'.format(F=fld)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 218 |  |  |         actual_flds = set(self.get_prtflds_default() + self.goterm.__dict__.keys()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 219 |  |  |         bad_flds = set(fldnames).difference(set(actual_flds)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 220 |  |  |         if bad_flds: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 221 |  |  |             msg.append("\nGOEA RESULT FIELDS: {}".format(" ".join(self._fldsdefprt))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 222 |  |  |             msg.append("GO FIELDS: {}".format(" ".join(self.goterm.__dict__.keys()))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 223 |  |  |             msg.append("\nFATAL: {N} UNEXPECTED FIELDS({F})\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 224 |  |  |                 N=len(bad_flds), F=" ".join(bad_flds))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 225 |  |  |             msg.append("  {N} User-provided fields:".format(N=len(fldnames))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 226 |  |  |             for idx, fld in enumerate(fldnames, 1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 227 |  |  |                 mrk = "ERROR -->" if fld in bad_flds else "" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 228 |  |  |                 msg.append("  {M:>9} {I:>2}) {F}".format(M=mrk, I=idx, F=fld)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 229 |  |  |         raise Exception("\n".join(msg)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 230 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 231 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 232 |  |  | class GOEnrichmentStudy(object): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 233 |  |  |     """Runs Fisher's exact test, as well as multiple corrections | 
            
                                                                                                            
                            
            
                                    
            
            
                | 234 |  |  |     """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 235 |  |  |     # Default Excel table column widths for GOEA results | 
            
                                                                                                            
                            
            
                                    
            
            
                | 236 |  |  |     default_fld2col_widths = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 237 |  |  |         'NS'        :  3, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 238 |  |  |         'GO'        : 12, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 239 |  |  |         'alt'       :  2, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 240 |  |  |         'level'     :  3, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 241 |  |  |         'depth'     :  3, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 242 |  |  |         'enrichment':  1, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 243 |  |  |         'name'      : 60, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 244 |  |  |         'ratio_in_study':  8, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 245 |  |  |         'ratio_in_pop'  : 12, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 246 |  |  |         'study_items'   : 15, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 247 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 248 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 249 |  |  |     def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 250 |  |  |         self.log = kws['log'] if 'log' in kws else sys.stdout | 
            
                                                                                                            
                            
            
                                    
            
            
                | 251 |  |  |         self._run_multitest = { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 252 |  |  |             'local':lambda iargs: self._run_multitest_local(iargs), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 253 |  |  |             'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 254 |  |  |         self.pop = set(pop) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 255 |  |  |         self.pop_n = len(pop) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 256 |  |  |         self.assoc = assoc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 257 |  |  |         self.obo_dag = obo_dag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 258 |  |  |         self.alpha = alpha | 
            
                                                                                                            
                            
            
                                    
            
            
                | 259 |  |  |         if methods is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 260 |  |  |             methods = ["bonferroni", "sidak", "holm"] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 261 |  |  |         self.methods = Methods(methods) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 262 |  |  |         self.pval_obj = FisherFactory(**kws).pval_obj | 
            
                                                                                                            
                            
            
                                    
            
            
                | 263 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 264 |  |  |         if propagate_counts: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 265 |  |  |             sys.stderr.write("Propagating term counts to parents ..\n") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 266 |  |  |             obo_dag.update_association(assoc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 267 |  |  |         self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 268 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 269 |  |  |     def run_study(self, study, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 270 |  |  |         """Run Gene Ontology Enrichment Study (GOEA) on study ids.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 271 |  |  |         # Key-word arguments: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 272 |  |  |         methods = Methods(kws['methods']) if 'methods' in kws else self.methods | 
            
                                                                                                            
                            
            
                                    
            
            
                | 273 |  |  |         alpha = kws['alpha'] if 'alpha' in kws else self.alpha | 
            
                                                                                                            
                            
            
                                    
            
            
                | 274 |  |  |         log = kws['log'] if 'log' in kws else self.log | 
            
                                                                                                            
                            
            
                                    
            
            
                | 275 |  |  |         # Calculate uncorrected pvalues | 
            
                                                                                                            
                            
            
                                    
            
            
                | 276 |  |  |         results = self.get_pval_uncorr(study, log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 277 |  |  |         if not results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 278 |  |  |             return [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 279 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 280 |  |  |         if log is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 281 |  |  |             log.write("  {MSG}\n".format(MSG="\n  ".join(self.get_results_msg(results, study)))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 282 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 283 |  |  |         # Do multipletest corrections on uncorrected pvalues and update results | 
            
                                                                                                            
                            
            
                                    
            
            
                | 284 |  |  |         self._run_multitest_corr(results, methods, alpha, study, log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 285 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 286 |  |  |         for rec in results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 287 |  |  |             # get go term for name and level | 
            
                                                                                                            
                            
            
                                    
            
            
                | 288 |  |  |             rec.set_goterm(self.obo_dag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 289 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 290 |  |  |         # 'keep_if' can be used to keep only significant GO terms. Example: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 291 |  |  |         #     >>> keep_if = lambda nt: nt.p_fdr_bh < 0.05 # if results are significant | 
            
                                                                                                            
                            
            
                                    
            
            
                | 292 |  |  |         #     >>> goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 293 |  |  |         if 'keep_if' in kws: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 294 |  |  |             keep_if = kws['keep_if'] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 295 |  |  |             results = [r for r in results if keep_if(r)] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 296 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 297 |  |  |         # Default sort order: First, sort by BP, MF, CC. Second, sort by pval | 
            
                                                                                                            
                            
            
                                    
            
            
                | 298 |  |  |         results.sort(key=lambda r: [r.NS, r.enrichment, r.p_uncorrected]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 299 |  |  |         return results # list of GOEnrichmentRecord objects | 
            
                                                                                                            
                            
            
                                    
            
            
                | 300 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 301 |  |  |     def run_study_nts(self, study, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 302 |  |  |         """Run GOEA on study ids. Return results as a list of namedtuples.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 303 |  |  |         goea_results = self.run_study(study, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 304 |  |  |         return MgrNtGOEAs(goea_results).get_goea_nts_all() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 305 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 306 |  |  |     def get_results_msg(self, results, study): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 307 |  |  |         """Return summary for GOEA results.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 308 |  |  |         # To convert msg list to string: "\n".join(msg) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 309 |  |  |         msg = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 310 |  |  |         if results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 311 |  |  |             fmt = "{M:6,} GO terms are associated with {N:6,} of {NT:6,}" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 312 |  |  |             stu_items, num_gos_stu = self.get_item_cnt(results, "study_items") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 313 |  |  |             pop_items, num_gos_pop = self.get_item_cnt(results, "pop_items") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 314 |  |  |             stu_txt = fmt.format(N=len(stu_items), M=num_gos_stu, NT=len(set(study))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 315 |  |  |             pop_txt = fmt.format(N=len(pop_items), M=num_gos_pop, NT=self.pop_n) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 316 |  |  |             msg.append("{POP} population items".format(POP=pop_txt)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 317 |  |  |             msg.append("{STU} study items".format(STU=stu_txt)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 318 |  |  |         return msg | 
            
                                                                                                            
                            
            
                                    
            
            
                | 319 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 320 |  |  |     def get_pval_uncorr(self, study, log=sys.stdout): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 321 |  |  |         """Calculate the uncorrected pvalues for study items.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 322 |  |  |         results = [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 323 |  |  |         study_in_pop = self.pop.intersection(study) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 324 |  |  |         # " 99%    378 of    382 study items found in population" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 325 |  |  |         go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 326 |  |  |         pop_n, study_n = self.pop_n, len(study_in_pop) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 327 |  |  |         allterms = set(go2studyitems).union(set(self.go2popitems)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 328 |  |  |         if log is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 329 |  |  |             study_n_orig = len(study) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 330 |  |  |             log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 331 |  |  |                 N=study_n, M=study_n_orig, P=pop_n, R=100.0*study_n/study_n_orig)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 332 |  |  |             log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 333 |  |  |                 N=len(allterms), PFNC=self.pval_obj.name)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 334 |  |  |         calc_pvalue = self.pval_obj.calc_pvalue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 335 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 336 |  |  |         for goid in allterms: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 337 |  |  |             study_items = go2studyitems.get(goid, set()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 338 |  |  |             study_count = len(study_items) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 339 |  |  |             pop_items = self.go2popitems.get(goid, set()) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 340 |  |  |             pop_count = len(pop_items) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 341 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 342 |  |  |             one_record = GOEnrichmentRecord( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 343 |  |  |                 GO=goid, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 344 |  |  |                 p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 345 |  |  |                 study_items=study_items, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 346 |  |  |                 pop_items=pop_items, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 347 |  |  |                 ratio_in_study=(study_count, study_n), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 348 |  |  |                 ratio_in_pop=(pop_count, pop_n)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 349 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 350 |  |  |             results.append(one_record) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 351 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 352 |  |  |         return results | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 353 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 354 |  |  |     def _run_multitest_corr(self, results, usrmethod_flds, alpha, study, log): | 
            
                                                                        
                            
            
                                    
            
            
                | 355 |  |  |         """Do multiple-test corrections on uncorrected pvalues.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 356 |  |  |         assert 0 < alpha < 1, "Test-wise alpha must fall between (0, 1)" | 
            
                                                                        
                            
            
                                    
            
            
                | 357 |  |  |         pvals = [r.p_uncorrected for r in results] | 
            
                                                                        
                            
            
                                    
            
            
                | 358 |  |  |         ntobj = cx.namedtuple("ntobj", "results pvals alpha nt_method study") | 
            
                                                                        
                            
            
                                    
            
            
                | 359 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 360 |  |  |         for nt_method in usrmethod_flds: | 
            
                                                                        
                            
            
                                    
            
            
                | 361 |  |  |             ntmt = ntobj(results, pvals, alpha, nt_method, study) | 
            
                                                                        
                            
            
                                    
            
            
                | 362 |  |  |             self._run_multitest[nt_method.source](ntmt) | 
            
                                                                        
                            
            
                                    
            
            
                | 363 |  |  |             if log is not None: | 
            
                                                                        
                            
            
                                    
            
            
                | 364 |  |  |                 self._log_multitest_corr(log, results, ntmt, alpha) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 365 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 366 |  |  |     def _log_multitest_corr(self, log, results, ntmt, alpha): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 367 |  |  |         """Print information regarding multitest correction results.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 368 |  |  |         ntm = ntmt.nt_method | 
            
                                                                                                            
                            
            
                                    
            
            
                | 369 |  |  |         attr_mult = "p_{M}".format(M=self.methods.get_fieldname(ntm.source, ntm.method)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 370 |  |  |         sig_cnt = sum(1 for r in results if getattr(r, attr_mult) < alpha) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 371 |  |  |         log.write("{N:8,} GO terms found significant (< {A}=alpha) after ".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 372 |  |  |             N=sig_cnt, A=alpha)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 373 |  |  |         log.write("multitest correction: ") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 374 |  |  |         log.write("{MSRC} {METHOD}\n".format(MSRC=ntm.source, METHOD=ntm.method)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 375 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 376 |  |  |     def _run_multitest_statsmodels(self, ntmt): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 377 |  |  |         """Use multitest mthods that have been implemented in statsmodels.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 378 |  |  |         # Only load statsmodels if it is used | 
            
                                                                                                            
                            
            
                                    
            
            
                | 379 |  |  |         multipletests = self.methods.get_statsmodels_multipletests() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 380 |  |  |         results = multipletests(ntmt.pvals, ntmt.alpha, ntmt.nt_method.method) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 381 |  |  |         pvals_corrected = results[1] # reject_lst, pvals_corrected, alphacSidak, alphacBonf | 
            
                                                                                                            
                            
            
                                    
            
            
                | 382 |  |  |         self._update_pvalcorr(ntmt, pvals_corrected) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 383 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 384 |  |  |     def _run_multitest_local(self, ntmt): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 385 |  |  |         """Use multitest mthods that have been implemented locally.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 386 |  |  |         corrected_pvals = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 387 |  |  |         method = ntmt.nt_method.method | 
            
                                                                                                            
                            
            
                                    
            
            
                | 388 |  |  |         if method == "bonferroni": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 389 |  |  |             corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 390 |  |  |         elif method == "sidak": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 391 |  |  |             corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 392 |  |  |         elif method == "holm": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 393 |  |  |             corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 394 |  |  |         elif method == "fdr": | 
            
                                                                                                            
                            
            
                                    
            
            
                | 395 |  |  |             # get the empirical p-value distributions for FDR | 
            
                                                                                                            
                            
            
                                    
            
            
                | 396 |  |  |             term_pop = getattr(self, 'term_pop', None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 397 |  |  |             if term_pop is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 398 |  |  |                 term_pop = count_terms(self.pop, self.assoc, self.obo_dag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 399 |  |  |             p_val_distribution = calc_qval(len(ntmt.study), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 400 |  |  |                                            self.pop_n, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 401 |  |  |                                            self.pop, self.assoc, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 402 |  |  |                                            term_pop, self.obo_dag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 403 |  |  |             corrected_pvals = FDR(p_val_distribution, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 404 |  |  |                                   ntmt.results, ntmt.alpha).corrected_pvals | 
            
                                                                                                            
                            
            
                                    
            
            
                | 405 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 406 |  |  |         self._update_pvalcorr(ntmt, corrected_pvals) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 407 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 408 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 409 |  |  |     def _update_pvalcorr(ntmt, corrected_pvals): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 410 |  |  |         """Add data members to store multiple test corrections.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 411 |  |  |         if corrected_pvals is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 412 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 413 |  |  |         for rec, val in zip(ntmt.results, corrected_pvals): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 414 |  |  |             rec.set_corrected_pval(ntmt.nt_method, val) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 415 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 416 |  |  |     # Methods for writing results into tables: text, tab-separated, Excel spreadsheets | 
            
                                                                                                            
                            
            
                                    
            
            
                | 417 |  |  |     def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 418 |  |  |         """Print GOEA results to text file.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 419 |  |  |         if not goea_results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 420 |  |  |             sys.stdout.write("      0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 421 |  |  |             return | 
            
                                                                                                            
                            
            
                                    
            
            
                | 422 |  |  |         with open(fout_txt, 'w') as prt: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 423 |  |  |             if 'title' in kws: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 424 |  |  |                 prt.write("{TITLE}\n".format(TITLE=kws['title'])) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 425 |  |  |             data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 426 |  |  |             log = self.log if self.log is not None else sys.stdout | 
            
                                                                                                            
                            
            
                                    
            
            
                | 427 |  |  |             log.write("  {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 428 |  |  |                 N=len(data_nts), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 429 |  |  |                 CUR=len(MgrNtGOEAs(goea_results).get_study_items()), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 430 |  |  |                 F=fout_txt)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 431 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 432 |  |  |     def prt_txt(self, prt, goea_results, prtfmt=None, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 433 |  |  |         """Print GOEA results in text format.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 434 |  |  |         if prtfmt is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 435 |  |  |             prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} " | 
            
                                                                                                            
                            
            
                                    
            
            
                | 436 |  |  |                       "{depth:02} {name:40} {study_items}\n") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 437 |  |  |         prtfmt = self.adjust_prtfmt(prtfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 438 |  |  |         prt_flds = RPT.get_fmtflds(prtfmt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 439 |  |  |         data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 440 |  |  |         RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 441 |  |  |         return data_nts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 442 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 443 |  |  |     def wr_xlsx(self, fout_xlsx, goea_results, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 444 |  |  |         """Write a xlsx file.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 445 |  |  |         # kws: prt_if indent itemid2name(study_items) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 446 |  |  |         prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 447 |  |  |         xlsx_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 448 |  |  |         if 'fld2col_widths' not in kws: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 449 |  |  |             kws['fld2col_widths'] = {f:self.default_fld2col_widths.get(f, 8) for f in prt_flds} | 
            
                                                                                                            
                            
            
                                    
            
            
                | 450 |  |  |         RPT.wr_xlsx(fout_xlsx, xlsx_data, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 451 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 452 |  |  |     def wr_tsv(self, fout_tsv, goea_results, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 453 |  |  |         """Write tab-separated table data to file""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 454 |  |  |         prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 455 |  |  |         tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 456 |  |  |         RPT.wr_tsv(fout_tsv, tsv_data, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 457 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 458 |  |  |     def prt_tsv(self, prt, goea_results, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 459 |  |  |         """Write tab-separated table data""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 460 |  |  |         prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 461 |  |  |         tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 462 |  |  |         RPT.prt_tsv(prt, tsv_data, prt_flds, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 463 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 464 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 465 |  |  |     def adjust_prtfmt(prtfmt): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 466 |  |  |         """Adjust format_strings for legal values.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 467 |  |  |         prtfmt = prtfmt.replace("{p_holm-sidak", "{p_holm_sidak") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 468 |  |  |         prtfmt = prtfmt.replace("{p_simes-hochberg", "{p_simes_hochberg") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 469 |  |  |         return prtfmt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 470 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 471 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 472 |  |  |     def get_ns2nts(results, fldnames=None, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 473 |  |  |         """Get namedtuples of GOEA results, split into BP, MF, CC.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 474 |  |  |         ns2nts = cx.defaultdict(list) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 475 |  |  |         nts = MgrNtGOEAs(results).get_goea_nts_all(fldnames, **kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 476 |  |  |         for ntgoea in nts: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 477 |  |  |             ns2nts[ntgoea.NS].append(ntgoea) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 478 |  |  |         return ns2nts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 479 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 480 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 481 |  |  |     def get_item_cnt(results, attrname="study_items"): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 482 |  |  |         """Get all study or population items (e.g., geneids).""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 483 |  |  |         items = set() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 484 |  |  |         go_cnt = 0 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 485 |  |  |         for rec in results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 486 |  |  |             if hasattr(rec, attrname): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 487 |  |  |                 items_cur = getattr(rec, attrname) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 488 |  |  |                 # Only count GO term if there are items in the set. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 489 |  |  |                 if len(items_cur) != 0: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 490 |  |  |                     items |= items_cur | 
            
                                                                                                            
                            
            
                                    
            
            
                | 491 |  |  |                     go_cnt += 1 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 492 |  |  |         return items, go_cnt | 
            
                                                                                                            
                            
            
                                    
            
            
                | 493 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 494 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 495 |  |  |     def get_prtflds_default(results): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 496 |  |  |         """Get default fields names. Used in printing GOEA results. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 497 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 498 |  |  |            Researchers can control which fields they want to print in the GOEA results | 
            
                                                                                                            
                            
            
                                    
            
            
                | 499 |  |  |            or they can use the default fields. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 500 |  |  |         """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 501 |  |  |         if results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 502 |  |  |             return results[0].get_prtflds_default() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 503 |  |  |         return [] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 504 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 505 |  |  |     @staticmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 506 |  |  |     def print_summary(results, min_ratio=None, indent=False, pval=0.05): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 507 |  |  |         """Print summary.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 508 |  |  |         import goatools | 
            
                                                                                                            
                            
            
                                    
            
            
                | 509 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 510 |  |  |         # Header contains provenance and parameters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 511 |  |  |         date = datetime.date.today() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 512 |  |  |         print("# Generated by GOATOOLS v{0} ({1})".format(goatools.__version__, date)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 513 |  |  |         print("# min_ratio={0} pval={1}".format(min_ratio, pval)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 514 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 515 |  |  |         # field names for output | 
            
                                                                                                            
                            
            
                                    
            
            
                | 516 |  |  |         if results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 517 |  |  |             print("\t".join(GOEnrichmentStudy.get_prtflds_default(results))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 518 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 519 |  |  |         for rec in results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 520 |  |  |             # calculate some additional statistics | 
            
                                                                                                            
                            
            
                                    
            
            
                | 521 |  |  |             # (over_under, is_ratio_different) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 522 |  |  |             rec.update_remaining_fldsdefprt(min_ratio=min_ratio) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 523 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 524 |  |  |             if pval is not None and rec.p_uncorrected >= pval: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 525 |  |  |                 continue | 
            
                                                                                                            
                            
            
                                    
            
            
                | 526 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 527 |  |  |             if rec.is_ratio_different: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 528 |  |  |                 print(rec.__str__(indent=indent)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 529 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 530 |  |  |     def wr_py_goea_results(self, fout_py, goea_results, **kws): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 531 |  |  |         """Save GOEA results into Python package containing list of namedtuples.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 532 |  |  |         var_name = kws.get("var_name", "goea_results") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 533 |  |  |         docstring = kws.get("docstring", "") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 534 |  |  |         sortby = kws.get("sortby", None) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 535 |  |  |         if goea_results: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 536 |  |  |             from goatools.nt_utils import wr_py_nts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 537 |  |  |             nts_goea = goea_results | 
            
                                                                                                            
                            
            
                                    
            
            
                | 538 |  |  |             # If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 539 |  |  |             if hasattr(goea_results[0], "_fldsdefprt") or hasattr(goea_results[0], 'goterm'): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 540 |  |  |                 # Exclude some attributes from the namedtuple when saving results | 
            
                                                                                                            
                            
            
                                    
            
            
                | 541 |  |  |                 # to a Python file because the information is redundant or verbose. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 542 |  |  |                 nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 543 |  |  |             docstring = "\n".join([docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)]) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 544 |  |  |             assert hasattr(nts_goea[0], '_fields') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 545 |  |  |             if sortby is None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 546 |  |  |                 sortby = lambda nt: [getattr(nt, 'namespace'), getattr(nt, 'enrichment'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 547 |  |  |                                      getattr(nt, 'p_uncorrected'), getattr(nt, 'depth'), | 
            
                                                                                                            
                            
            
                                    
            
            
                | 548 |  |  |                                      getattr(nt, 'GO')] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 549 |  |  |             nts_goea = sorted(nts_goea, key=sortby) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 550 |  |  |             wr_py_nts(fout_py, nts_goea, docstring, var_name) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 551 |  |  |  | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 552 |  |  | # Copyright (C) 2010-2018, H Tang et al., All rights reserved. | 
            
                                                        
            
                                    
            
            
                | 553 |  |  |  |