Completed
Push — master ( 0b9f50...907c98 )
by
unknown
01:04
created

GOEnrichmentStudy.__init__()   B

Complexity

Conditions 6

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
dl 0
loc 19
rs 8
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
"""
5
python %prog study.file population.file gene-association.file
6
7
This program returns P-values for functional enrichment in a cluster of
8
study genes using Fisher's exact test, and corrected for multiple testing
9
(including Bonferroni, Holm, Sidak, and false discovery rate)
10
"""
11
12
from __future__ import print_function
13
from __future__ import absolute_import
14
15
16
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved."
17
__author__ = "various"
18
19
import sys
20
import collections as cx
21
import datetime
22
23
from goatools.multiple_testing import Methods
24
from goatools.multiple_testing import Bonferroni
25
from goatools.multiple_testing import Sidak
26
from goatools.multiple_testing import HolmBonferroni
27
from goatools.multiple_testing import FDR
28
from goatools.multiple_testing import calc_qval
29
from goatools.ratio import get_terms, count_terms, is_ratio_different
30
import goatools.wr_tbl as RPT
31
from goatools.pvalcalc import FisherFactory
32
33
from goatools.goea.goea_nt_xfrm import MgrNtGOEAs
34
35
class GOEnrichmentRecord(object):
36
    """Represents one result (from a single GOTerm) in the GOEnrichmentStudy
37
    """
38
    namespace2NS = cx.OrderedDict([
39
        ('biological_process', 'BP'),
40
        ('molecular_function', 'MF'),
41
        ('cellular_component', 'CC')])
42
43
    # Fields seen in every enrichment result
44
    _fldsdefprt = [
45
        "GO",
46
        "NS",
47
        "enrichment",
48
        "name",
49
        "ratio_in_study",
50
        "ratio_in_pop",
51
        "p_uncorrected",
52
        "depth",
53
        "study_count",
54
        "study_items"]
55
    _fldsdeffmt = ["%s"]*3 + ["%-30s"] + ["%d/%d"] * 2 + ["%.3g"] + ["%d"] * 2 + ["%15s"]
56
57
    _flds = set(_fldsdefprt).intersection(
58
        set(['study_items', 'study_count', 'study_n', 'pop_items', 'pop_count', 'pop_n']))
59
60
    def __init__(self, **kwargs):
61
        # Methods seen in current enrichment result
62
        self._methods = []
63
        for key, val in kwargs.items():
64
            setattr(self, key, val)
65
            if key == 'ratio_in_study':
66
                setattr(self, 'study_count', val[0])
67
                setattr(self, 'study_n', val[1])
68
            if key == 'ratio_in_pop':
69
                setattr(self, 'pop_count', val[0])
70
                setattr(self, 'pop_n', val[1])
71
        self._init_enrichment()
72
        self.goterm = None  # the reference to the GOTerm
73
74
    def get_method_name(self):
75
        """Return name of first method in the _methods list."""
76
        return self._methods[0].fieldname
77
78
    def get_pvalue(self):
79
        """Returns pval for 1st method, if it exists. Else returns uncorrected pval."""
80
        if self._methods:
81
            return getattr(self, "p_{m}".format(m=self.get_method_name()))
82
        return getattr(self, "p_uncorrected")
83
84
    def set_corrected_pval(self, nt_method, pvalue):
85
        """Add object attribute based on method name."""
86
        self._methods.append(nt_method)
87
        fieldname = "".join(["p_", nt_method.fieldname])
88
        setattr(self, fieldname, pvalue)
89
90
    def __str__(self, indent=False):
91
        field_data = [getattr(self, f, "n.a.") for f in self._fldsdefprt[:-1]] + \
92
                     [getattr(self, "p_{}".format(m.fieldname)) for m in self._methods] + \
93
                     [", ".join(sorted(getattr(self, self._fldsdefprt[-1], set())))]
94
        fldsdeffmt = self._fldsdeffmt
95
        field_formatter = fldsdeffmt[:-1] + ["%.3g"]*len(self._methods) + [fldsdeffmt[-1]]
96
        self._chk_fields(field_data, field_formatter)
97
98
        # default formatting only works for non-"n.a" data
99
        for idx, fld in enumerate(field_data):
100
            if fld == "n.a.":
101
                field_formatter[idx] = "%s"
102
103
        # print dots to show the level of the term
104
        dots = self.get_indent_dots() if indent else ""
105
        prtdata = "\t".join(a % b for (a, b) in zip(field_formatter, field_data))
106
        return "".join([dots, prtdata])
107
108
    def get_indent_dots(self):
109
        """Get a string of dots ("....") representing the level of the GO term."""
110
        return "." * self.goterm.level if self.goterm is not None else ""
111
112
    @staticmethod
113
    def _chk_fields(field_data, field_formatter):
114
        """Check that expected fields are present."""
115
        if len(field_data) == len(field_formatter):
116
            return
117
        len_dat = len(field_data)
118
        len_fmt = len(field_formatter)
119
        msg = [
120
            "FIELD DATA({d}) != FORMATTER({f})".format(d=len_dat, f=len_fmt),
121
            "DAT({N}): {D}".format(N=len_dat, D=field_data),
122
            "FMT({N}): {F}".format(N=len_fmt, F=field_formatter)]
123
        raise Exception("\n".join(msg))
124
125
    def __repr__(self):
126
        return "GOEnrichmentRecord({GO})".format(GO=self.GO)
127
128
    def set_goterm(self, go2obj):
129
        """Set goterm and copy GOTerm's name and namespace."""
130
        self.goterm = go2obj.get(self.GO, None)
131
        present = self.goterm is not None
132
        self.name = self.goterm.name if present else "n.a."
133
        self.NS = self.namespace2NS[self.goterm.namespace] if present else "XX"
134
135
    def _init_enrichment(self):
136
        """Mark as 'enriched' or 'purified'."""
137
        self.enrichment = 'e' if ((1.0 * self.study_count / self.study_n) >
138
                                  (1.0 * self.pop_count / self.pop_n)) else 'p'
139
140
    def update_remaining_fldsdefprt(self, min_ratio=None):
141
        """Finish updating self (GOEnrichmentRecord) field, is_ratio_different."""
142
        self.is_ratio_different = is_ratio_different(min_ratio, self.study_count,
143
                                                     self.study_n, self.pop_count, self.pop_n)
144
145
146
    # -------------------------------------------------------------------------------------
147
    # Methods for getting flat namedtuple values from GOEnrichmentRecord object
148
    def get_prtflds_default(self):
149
        """Get default fields."""
150
        return self._fldsdefprt[:-1] + \
151
               ["p_{M}".format(M=m.fieldname) for m in self._methods] + \
152
               [self._fldsdefprt[-1]]
153
154
    def get_prtflds_all(self):
155
        """When converting to a namedtuple, get all possible fields in their original order."""
156
        flds = []
157
        dont_add = set(['_parents', '_methods', 'relationship_rev', 'relationship'])
158
        # Fields: GO NS enrichment name ratio_in_study ratio_in_pop p_uncorrected
159
        #         depth study_count p_sm_bonferroni p_fdr_bh study_items
160
        self._flds_append(flds, self.get_prtflds_default(), dont_add)
161
        # Fields: GO NS goterm
162
        #         ratio_in_pop pop_n pop_count pop_items name
163
        #         ratio_in_study study_n study_count study_items
164
        #         _methods enrichment p_uncorrected p_sm_bonferroni p_fdr_bh
165
        self._flds_append(flds, vars(self).keys(), dont_add)
166
        # Fields: name level is_obsolete namespace id depth parents children _parents alt_ids
167
        self._flds_append(flds, vars(self.goterm).keys(), dont_add)
168
        return flds
169
170
    @staticmethod
171
    def _flds_append(flds, addthese, dont_add):
172
        """Retain order of fields as we add them once to the list."""
173
        for fld in addthese:
174
            if fld not in flds and fld not in dont_add:
175
                flds.append(fld)
176
177
    def get_field_values(self, fldnames, rpt_fmt=True, itemid2name=None):
178
        """Get flat namedtuple fields for one GOEnrichmentRecord."""
179
        row = []
180
        # Loop through each user field desired
181
        for fld in fldnames:
182
            # 1. Check the GOEnrichmentRecord's attributes
183
            val = getattr(self, fld, None)
184
            if val is not None:
185
                if rpt_fmt:
186
                    val = self._get_rpt_fmt(fld, val, itemid2name)
187
                row.append(val)
188
            else:
189
                # 2. Check the GO object for the field
190
                val = getattr(self.goterm, fld, None)
191
                if rpt_fmt:
192
                    val = self._get_rpt_fmt(fld, val, itemid2name)
193
                if val is not None:
194
                    row.append(val)
195
                else:
196
                    # 3. Field not found, raise Exception
197
                    self._err_fld(fld, fldnames)
198
            if rpt_fmt:
199
                assert not isinstance(val, list), \
200
                   "UNEXPECTED LIST: FIELD({F}) VALUE({V}) FMT({P})".format(
201
                       P=rpt_fmt, F=fld, V=val)
202
        return row
203
204
    @staticmethod
205
    def _get_rpt_fmt(fld, val, itemid2name=None):
206
        """Return values in a format amenable to printing in a table."""
207
        if fld.startswith("ratio_"):
208
            return "{N}/{TOT}".format(N=val[0], TOT=val[1])
209
        elif fld in set(['study_items', 'pop_items', 'alt_ids']):
210
            if itemid2name is not None:
211
                val = [itemid2name.get(v, v) for v in val]
212
            return ", ".join([str(v) for v in sorted(val)])
213
        return val
214
215
    def _err_fld(self, fld, fldnames):
216
        """Unrecognized field. Print detailed Failure message."""
217
        msg = ['ERROR. UNRECOGNIZED FIELD({F})'.format(F=fld)]
218
        actual_flds = set(self.get_prtflds_default() + self.goterm.__dict__.keys())
219
        bad_flds = set(fldnames).difference(set(actual_flds))
220
        if bad_flds:
221
            msg.append("\nGOEA RESULT FIELDS: {}".format(" ".join(self._fldsdefprt)))
222
            msg.append("GO FIELDS: {}".format(" ".join(self.goterm.__dict__.keys())))
223
            msg.append("\nFATAL: {N} UNEXPECTED FIELDS({F})\n".format(
224
                N=len(bad_flds), F=" ".join(bad_flds)))
225
            msg.append("  {N} User-provided fields:".format(N=len(fldnames)))
226
            for idx, fld in enumerate(fldnames, 1):
227
                mrk = "ERROR -->" if fld in bad_flds else ""
228
                msg.append("  {M:>9} {I:>2}) {F}".format(M=mrk, I=idx, F=fld))
229
        raise Exception("\n".join(msg))
230
231
232
class GOEnrichmentStudy(object):
233
    """Runs Fisher's exact test, as well as multiple corrections
234
    """
235
    # Default Excel table column widths for GOEA results
236
    default_fld2col_widths = {
237
        'NS'        :  3,
238
        'GO'        : 12,
239
        'alt'       :  2,
240
        'level'     :  3,
241
        'depth'     :  3,
242
        'enrichment':  1,
243
        'name'      : 60,
244
        'ratio_in_study':  8,
245
        'ratio_in_pop'  : 12,
246
        'study_items'   : 15,
247
    }
248
249
    def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws):
250
        self.log = kws['log'] if 'log' in kws else sys.stdout
251
        self._run_multitest = {
252
            'local':lambda iargs: self._run_multitest_local(iargs),
253
            'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)}
254
        self.pop = pop
255
        self.pop_n = len(pop)
256
        self.assoc = assoc
257
        self.obo_dag = obo_dag
258
        self.alpha = alpha
259
        if methods is None:
260
            methods = ["bonferroni", "sidak", "holm"]
261
        self.methods = Methods(methods)
262
        self.pval_obj = FisherFactory(**kws).pval_obj
263
264
        if propagate_counts:
265
            sys.stderr.write("Propagating term counts to parents ..\n")
266
            obo_dag.update_association(assoc)
267
        self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
268
269
    def run_study(self, study, **kws):
270
        """Run Gene Ontology Enrichment Study (GOEA) on study ids."""
271
        # Key-word arguments:
272
        methods = Methods(kws['methods']) if 'methods' in kws else self.methods
273
        alpha = kws['alpha'] if 'alpha' in kws else self.alpha
274
        log = kws['log'] if 'log' in kws else self.log
275
        # Calculate uncorrected pvalues
276
        results = self.get_pval_uncorr(study, log)
277
        if not results:
278
            return []
279
280
        if log is not None:
281
            log.write("  {MSG}\n".format(MSG="\n  ".join(self.get_results_msg(results, study))))
282
283
        # Do multipletest corrections on uncorrected pvalues and update results
284
        self._run_multitest_corr(results, methods, alpha, study, log)
285
286
        for rec in results:
287
            # get go term for name and level
288
            rec.set_goterm(self.obo_dag)
289
290
        # 'keep_if' can be used to keep only significant GO terms. Example:
291
        #     >>> keep_if = lambda nt: nt.p_fdr_bh < 0.05 # if results are significant
292
        #     >>> goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if)
293
        if 'keep_if' in kws:
294
            keep_if = kws['keep_if']
295
            results = [r for r in results if keep_if(r)]
296
297
        # Default sort order: First, sort by BP, MF, CC. Second, sort by pval
298
        results.sort(key=lambda r: [r.NS, r.enrichment, r.p_uncorrected])
299
        return results # list of GOEnrichmentRecord objects
300
301
    def run_study_nts(self, study, **kws):
302
        """Run GOEA on study ids. Return results as a list of namedtuples."""
303
        goea_results = self.run_study(study, **kws)
304
        return MgrNtGOEAs(goea_results).get_goea_nts_all()
305
306
    def get_results_msg(self, results, study):
307
        """Return summary for GOEA results."""
308
        # To convert msg list to string: "\n".join(msg)
309
        msg = []
310
        if results:
311
            fmt = "{M:6,} GO terms are associated with {N:6,} of {NT:6,}"
312
            stu_items, num_gos_stu = self.get_item_cnt(results, "study_items")
313
            pop_items, num_gos_pop = self.get_item_cnt(results, "pop_items")
314
            stu_txt = fmt.format(N=len(stu_items), M=num_gos_stu, NT=len(set(study)))
315
            pop_txt = fmt.format(N=len(pop_items), M=num_gos_pop, NT=self.pop_n)
316
            msg.append("{POP} population items".format(POP=pop_txt))
317
            msg.append("{STU} study items".format(STU=stu_txt))
318
        return msg
319
320
    def get_pval_uncorr(self, study, log=sys.stdout):
321
        """Calculate the uncorrected pvalues for study items."""
322
        results = []
323
        go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log)
324
        pop_n, study_n = self.pop_n, len(study)
325
        assert study_n > 0, "NO STUDY GENES"
326
        assert pop_n > 0, "NO POPULATION GENES"
327
        allterms = set(go2studyitems).union(set(self.go2popitems))
328
        if log is not None:
329
            log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
330
                N=len(allterms), PFNC=self.pval_obj.name))
331
        calc_pvalue = self.pval_obj.calc_pvalue
332
333
        for goid in allterms:
334
            study_items = go2studyitems.get(goid, set())
335
            study_count = len(study_items)
336
            pop_items = self.go2popitems.get(goid, set())
337
            pop_count = len(pop_items)
338
339
            assert study_count >= 0, "BAD STUDY GENES: {G}\n{T}".format(G=study_items, T=goid)
340
            assert pop_count >= 0, "BAD POPULATION GENES: {G}\n{T}".format(G=pop_items, T=goid)
341
            one_record = GOEnrichmentRecord(
342
                GO=goid,
343
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
344
                study_items=study_items,
345
                pop_items=pop_items,
346
                ratio_in_study=(study_count, study_n),
347
                ratio_in_pop=(pop_count, pop_n))
348
349
            results.append(one_record)
350
351
        return results
352
353
    def _run_multitest_corr(self, results, usr_methods, alpha, study, log):
354
        """Do multiple-test corrections on uncorrected pvalues."""
355
        assert 0 < alpha < 1, "Test-wise alpha must fall between (0, 1)"
356
        pvals = [r.p_uncorrected for r in results]
357
        ntobj = cx.namedtuple("ntobj", "results pvals alpha nt_method study")
358
359
        for nt_method in usr_methods:
360
            ntmt = ntobj(results, pvals, alpha, nt_method, study)
361
            self._run_multitest[nt_method.source](ntmt)
362
            if log is not None:
363
                self._log_multitest_corr(log, results, ntmt, alpha)
364
365
    def _log_multitest_corr(self, log, results, ntmt, alpha):
366
        """Print information regarding multitest correction results."""
367
        ntm = ntmt.nt_method
368
        attr_mult = "p_{M}".format(M=self.methods.get_fieldname(ntm.source, ntm.method))
369
        sig_cnt = sum(1 for r in results if getattr(r, attr_mult) < alpha)
370
        log.write("{N:8,} GO terms found significant (< {A}=alpha) after ".format(
371
            N=sig_cnt, A=alpha))
372
        log.write("multitest correction: ")
373
        log.write("{MSRC} {METHOD}\n".format(MSRC=ntm.source, METHOD=ntm.method))
374
375
    def _run_multitest_statsmodels(self, ntmt):
376
        """Use multitest mthods that have been implemented in statsmodels."""
377
        # Only load statsmodels if it is used
378
        multipletests = self.methods.get_statsmodels_multipletests()
379
        results = multipletests(ntmt.pvals, ntmt.alpha, ntmt.nt_method.method)
380
        pvals_corrected = results[1] # reject_lst, pvals_corrected, alphacSidak, alphacBonf
381
        self._update_pvalcorr(ntmt, pvals_corrected)
382
383
    def _run_multitest_local(self, ntmt):
384
        """Use multitest mthods that have been implemented locally."""
385
        corrected_pvals = None
386
        method = ntmt.nt_method.method
387
        if method == "bonferroni":
388
            corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
389
        elif method == "sidak":
390
            corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals
391
        elif method == "holm":
392
            corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
393
        elif method == "fdr":
394
            # get the empirical p-value distributions for FDR
395
            term_pop = getattr(self, 'term_pop', None)
396
            if term_pop is None:
397
                term_pop = count_terms(self.pop, self.assoc, self.obo_dag)
398
            p_val_distribution = calc_qval(len(ntmt.study),
399
                                           self.pop_n,
400
                                           self.pop, self.assoc,
401
                                           term_pop, self.obo_dag)
402
            corrected_pvals = FDR(p_val_distribution,
403
                                  ntmt.results, ntmt.alpha).corrected_pvals
404
405
        self._update_pvalcorr(ntmt, corrected_pvals)
406
407
    @staticmethod
408
    def _update_pvalcorr(ntmt, corrected_pvals):
409
        """Add data members to store multiple test corrections."""
410
        if corrected_pvals is None:
411
            return
412
        for rec, val in zip(ntmt.results, corrected_pvals):
413
            rec.set_corrected_pval(ntmt.nt_method, val)
414
415
    # Methods for writing results into tables: text, tab-separated, Excel spreadsheets
416
    def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws):
417
        """Print GOEA results to text file."""
418
        if not goea_results:
419
            sys.stdout.write("      0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt))
420
            return
421
        with open(fout_txt, 'w') as prt:
422
            if 'title' in kws:
423
                prt.write("{TITLE}\n".format(TITLE=kws['title']))
424
            data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws)
425
            log = self.log if self.log is not None else sys.stdout
426
            log.write("  {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format(
427
                N=len(data_nts),
428
                CUR=len(MgrNtGOEAs(goea_results).get_study_items()),
429
                F=fout_txt))
430
431
    def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
432
        """Print GOEA results in text format."""
433
        if prtfmt is None:
434
            prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
435
                      "{depth:02} {name:40} {study_items}\n")
436
        prtfmt = self.adjust_prtfmt(prtfmt)
437
        prt_flds = RPT.get_fmtflds(prtfmt)
438
        data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
439
        RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
440
        return data_nts
441
442
    def wr_xlsx(self, fout_xlsx, goea_results, **kws):
443
        """Write a xlsx file."""
444
        # kws: prt_if indent itemid2name(study_items)
445
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
446
        xlsx_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
447
        if 'fld2col_widths' not in kws:
448
            kws['fld2col_widths'] = {f:self.default_fld2col_widths.get(f, 8) for f in prt_flds}
449
        RPT.wr_xlsx(fout_xlsx, xlsx_data, **kws)
450
451
    def wr_tsv(self, fout_tsv, goea_results, **kws):
452
        """Write tab-separated table data to file"""
453
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
454
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
455
        RPT.wr_tsv(fout_tsv, tsv_data, **kws)
456
457
    def prt_tsv(self, prt, goea_results, **kws):
458
        """Write tab-separated table data"""
459
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
460
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
461
        RPT.prt_tsv(prt, tsv_data, prt_flds, **kws)
462
463
    @staticmethod
464
    def adjust_prtfmt(prtfmt):
465
        """Adjust format_strings for legal values."""
466
        prtfmt = prtfmt.replace("{p_holm-sidak", "{p_holm_sidak")
467
        prtfmt = prtfmt.replace("{p_simes-hochberg", "{p_simes_hochberg")
468
        return prtfmt
469
470
    @staticmethod
471
    def get_ns2nts(results, fldnames=None, **kws):
472
        """Get namedtuples of GOEA results, split into BP, MF, CC."""
473
        ns2nts = cx.defaultdict(list)
474
        nts = MgrNtGOEAs(results).get_goea_nts_all(fldnames, **kws)
475
        for ntgoea in nts:
476
            ns2nts[ntgoea.NS].append(ntgoea)
477
        return ns2nts
478
479
    @staticmethod
480
    def get_item_cnt(results, attrname="study_items"):
481
        """Get all study or population items (e.g., geneids)."""
482
        items = set()
483
        go_cnt = 0
484
        for rec in results:
485
            if hasattr(rec, attrname):
486
                items_cur = getattr(rec, attrname)
487
                # Only count GO term if there are items in the set.
488
                if len(items_cur) != 0:
489
                    items |= items_cur
490
                    go_cnt += 1
491
        return items, go_cnt
492
493
    @staticmethod
494
    def get_prtflds_default(results):
495
        """Get default fields names. Used in printing GOEA results.
496
497
           Researchers can control which fields they want to print in the GOEA results
498
           or they can use the default fields.
499
        """
500
        if results:
501
            return results[0].get_prtflds_default()
502
        return []
503
504
    @staticmethod
505
    def print_summary(results, min_ratio=None, indent=False, pval=0.05):
506
        """Print summary."""
507
        import goatools
508
509
        # Header contains provenance and parameters
510
        date = datetime.date.today()
511
        print("# Generated by GOATOOLS v{0} ({1})".format(goatools.__version__, date))
512
        print("# min_ratio={0} pval={1}".format(min_ratio, pval))
513
514
        # field names for output
515
        if results:
516
            print("\t".join(GOEnrichmentStudy.get_prtflds_default(results)))
517
518
        for rec in results:
519
            # calculate some additional statistics
520
            # (over_under, is_ratio_different)
521
            rec.update_remaining_fldsdefprt(min_ratio=min_ratio)
522
523
            if pval is not None and rec.p_uncorrected >= pval:
524
                continue
525
526
            if rec.is_ratio_different:
527
                print(rec.__str__(indent=indent))
528
529
    def wr_py_goea_results(self, fout_py, goea_results, **kws):
530
        """Save GOEA results into Python package containing list of namedtuples."""
531
        var_name = kws.get("var_name", "goea_results")
532
        docstring = kws.get("docstring", "")
533
        sortby = kws.get("sortby", None)
534
        if goea_results:
535
            from goatools.nt_utils import wr_py_nts
536
            nts_goea = goea_results
537
            # If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields.
538
            if hasattr(goea_results[0], "_fldsdefprt") or hasattr(goea_results[0], 'goterm'):
539
                # Exclude some attributes from the namedtuple when saving results
540
                # to a Python file because the information is redundant or verbose.
541
                nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
542
            docstring = "\n".join([docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)])
543
            assert hasattr(nts_goea[0], '_fields')
544
            if sortby is None:
545
                sortby = lambda nt: [getattr(nt, 'namespace'), getattr(nt, 'enrichment'),
546
                                     getattr(nt, 'p_uncorrected'), getattr(nt, 'depth'),
547
                                     getattr(nt, 'GO')]
548
            nts_goea = sorted(nts_goea, key=sortby)
549
            wr_py_nts(fout_py, nts_goea, docstring, var_name)
550
551
# Copyright (C) 2010-2018, H Tang et al., All rights reserved.
552