Completed
Push — master ( 907c98...6ddfe6 )
by
unknown
56s
created

GOEnrichmentStudy.get_prtflds_default()   A

Complexity

Conditions 2

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
c 1
b 0
f 0
dl 0
loc 10
rs 9.4285
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
"""
5
python %prog study.file population.file gene-association.file
6
7
This program returns P-values for functional enrichment in a cluster of
8
study genes using Fisher's exact test, and corrected for multiple testing
9
(including Bonferroni, Holm, Sidak, and false discovery rate)
10
"""
11
12
from __future__ import print_function
13
from __future__ import absolute_import
14
15
16
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved."
17
__author__ = "various"
18
19
import sys
20
import collections as cx
21
import datetime
22
23
from goatools.multiple_testing import Methods
24
from goatools.multiple_testing import Bonferroni
25
from goatools.multiple_testing import Sidak
26
from goatools.multiple_testing import HolmBonferroni
27
from goatools.multiple_testing import FDR
28
from goatools.multiple_testing import calc_qval
29
from goatools.ratio import get_terms, count_terms, is_ratio_different
30
import goatools.wr_tbl as RPT
31
from goatools.pvalcalc import FisherFactory
32
33
from goatools.goea.goea_nt_xfrm import MgrNtGOEAs
34
35
class GOEnrichmentRecord(object):
36
    """Represents one result (from a single GOTerm) in the GOEnrichmentStudy
37
    """
38
    namespace2NS = cx.OrderedDict([
39
        ('biological_process', 'BP'),
40
        ('molecular_function', 'MF'),
41
        ('cellular_component', 'CC')])
42
43
    # Fields seen in every enrichment result
44
    _fldsdefprt = [
45
        "GO",
46
        "NS",
47
        "enrichment",
48
        "name",
49
        "ratio_in_study",
50
        "ratio_in_pop",
51
        "p_uncorrected",
52
        "depth",
53
        "study_count",
54
        "study_items"]
55
    _fldsdeffmt = ["%s"]*3 + ["%-30s"] + ["%d/%d"] * 2 + ["%.3g"] + ["%d"] * 2 + ["%15s"]
56
57
    _flds = set(_fldsdefprt).intersection(
58
        set(['study_items', 'study_count', 'study_n', 'pop_items', 'pop_count', 'pop_n']))
59
60
    def __init__(self, **kwargs):
61
        # Methods seen in current enrichment result
62
        self._methods = []
63
        for key, val in kwargs.items():
64
            setattr(self, key, val)
65
            if key == 'ratio_in_study':
66
                setattr(self, 'study_count', val[0])
67
                setattr(self, 'study_n', val[1])
68
            if key == 'ratio_in_pop':
69
                setattr(self, 'pop_count', val[0])
70
                setattr(self, 'pop_n', val[1])
71
        self._init_enrichment()
72
        self.goterm = None  # the reference to the GOTerm
73
74
    def get_method_name(self):
75
        """Return name of first method in the _methods list."""
76
        return self._methods[0].fieldname
77
78
    def get_pvalue(self):
79
        """Returns pval for 1st method, if it exists. Else returns uncorrected pval."""
80
        if self._methods:
81
            return getattr(self, "p_{m}".format(m=self.get_method_name()))
82
        return getattr(self, "p_uncorrected")
83
84
    def set_corrected_pval(self, nt_method, pvalue):
85
        """Add object attribute based on method name."""
86
        self._methods.append(nt_method)
87
        fieldname = "".join(["p_", nt_method.fieldname])
88
        setattr(self, fieldname, pvalue)
89
90
    def __str__(self, indent=False):
91
        field_data = [getattr(self, f, "n.a.") for f in self._fldsdefprt[:-1]] + \
92
                     [getattr(self, "p_{}".format(m.fieldname)) for m in self._methods] + \
93
                     [", ".join(sorted(getattr(self, self._fldsdefprt[-1], set())))]
94
        fldsdeffmt = self._fldsdeffmt
95
        field_formatter = fldsdeffmt[:-1] + ["%.3g"]*len(self._methods) + [fldsdeffmt[-1]]
96
        self._chk_fields(field_data, field_formatter)
97
98
        # default formatting only works for non-"n.a" data
99
        for idx, fld in enumerate(field_data):
100
            if fld == "n.a.":
101
                field_formatter[idx] = "%s"
102
103
        # print dots to show the level of the term
104
        dots = self.get_indent_dots() if indent else ""
105
        prtdata = "\t".join(a % b for (a, b) in zip(field_formatter, field_data))
106
        return "".join([dots, prtdata])
107
108
    def get_indent_dots(self):
109
        """Get a string of dots ("....") representing the level of the GO term."""
110
        return "." * self.goterm.level if self.goterm is not None else ""
111
112
    @staticmethod
113
    def _chk_fields(field_data, field_formatter):
114
        """Check that expected fields are present."""
115
        if len(field_data) == len(field_formatter):
116
            return
117
        len_dat = len(field_data)
118
        len_fmt = len(field_formatter)
119
        msg = [
120
            "FIELD DATA({d}) != FORMATTER({f})".format(d=len_dat, f=len_fmt),
121
            "DAT({N}): {D}".format(N=len_dat, D=field_data),
122
            "FMT({N}): {F}".format(N=len_fmt, F=field_formatter)]
123
        raise Exception("\n".join(msg))
124
125
    def __repr__(self):
126
        return "GOEnrichmentRecord({GO})".format(GO=self.GO)
127
128
    def set_goterm(self, go2obj):
129
        """Set goterm and copy GOTerm's name and namespace."""
130
        self.goterm = go2obj.get(self.GO, None)
131
        present = self.goterm is not None
132
        self.name = self.goterm.name if present else "n.a."
133
        self.NS = self.namespace2NS[self.goterm.namespace] if present else "XX"
134
135
    def _init_enrichment(self):
136
        """Mark as 'enriched' or 'purified'."""
137
        self.enrichment = 'e' if ((1.0 * self.study_count / self.study_n) >
138
                                  (1.0 * self.pop_count / self.pop_n)) else 'p'
139
140
    def update_remaining_fldsdefprt(self, min_ratio=None):
141
        """Finish updating self (GOEnrichmentRecord) field, is_ratio_different."""
142
        self.is_ratio_different = is_ratio_different(min_ratio, self.study_count,
143
                                                     self.study_n, self.pop_count, self.pop_n)
144
145
146
    # -------------------------------------------------------------------------------------
147
    # Methods for getting flat namedtuple values from GOEnrichmentRecord object
148
    def get_prtflds_default(self):
149
        """Get default fields."""
150
        return self._fldsdefprt[:-1] + \
151
               ["p_{M}".format(M=m.fieldname) for m in self._methods] + \
152
               [self._fldsdefprt[-1]]
153
154
    def get_prtflds_all(self):
155
        """When converting to a namedtuple, get all possible fields in their original order."""
156
        flds = []
157
        dont_add = set(['_parents', '_methods', 'relationship_rev', 'relationship'])
158
        # Fields: GO NS enrichment name ratio_in_study ratio_in_pop p_uncorrected
159
        #         depth study_count p_sm_bonferroni p_fdr_bh study_items
160
        self._flds_append(flds, self.get_prtflds_default(), dont_add)
161
        # Fields: GO NS goterm
162
        #         ratio_in_pop pop_n pop_count pop_items name
163
        #         ratio_in_study study_n study_count study_items
164
        #         _methods enrichment p_uncorrected p_sm_bonferroni p_fdr_bh
165
        self._flds_append(flds, vars(self).keys(), dont_add)
166
        # Fields: name level is_obsolete namespace id depth parents children _parents alt_ids
167
        self._flds_append(flds, vars(self.goterm).keys(), dont_add)
168
        return flds
169
170
    @staticmethod
171
    def _flds_append(flds, addthese, dont_add):
172
        """Retain order of fields as we add them once to the list."""
173
        for fld in addthese:
174
            if fld not in flds and fld not in dont_add:
175
                flds.append(fld)
176
177
    def get_field_values(self, fldnames, rpt_fmt=True, itemid2name=None):
178
        """Get flat namedtuple fields for one GOEnrichmentRecord."""
179
        row = []
180
        # Loop through each user field desired
181
        for fld in fldnames:
182
            # 1. Check the GOEnrichmentRecord's attributes
183
            val = getattr(self, fld, None)
184
            if val is not None:
185
                if rpt_fmt:
186
                    val = self._get_rpt_fmt(fld, val, itemid2name)
187
                row.append(val)
188
            else:
189
                # 2. Check the GO object for the field
190
                val = getattr(self.goterm, fld, None)
191
                if rpt_fmt:
192
                    val = self._get_rpt_fmt(fld, val, itemid2name)
193
                if val is not None:
194
                    row.append(val)
195
                else:
196
                    # 3. Field not found, raise Exception
197
                    self._err_fld(fld, fldnames)
198
            if rpt_fmt:
199
                assert not isinstance(val, list), \
200
                   "UNEXPECTED LIST: FIELD({F}) VALUE({V}) FMT({P})".format(
201
                       P=rpt_fmt, F=fld, V=val)
202
        return row
203
204
    @staticmethod
205
    def _get_rpt_fmt(fld, val, itemid2name=None):
206
        """Return values in a format amenable to printing in a table."""
207
        if fld.startswith("ratio_"):
208
            return "{N}/{TOT}".format(N=val[0], TOT=val[1])
209
        elif fld in set(['study_items', 'pop_items', 'alt_ids']):
210
            if itemid2name is not None:
211
                val = [itemid2name.get(v, v) for v in val]
212
            return ", ".join([str(v) for v in sorted(val)])
213
        return val
214
215
    def _err_fld(self, fld, fldnames):
216
        """Unrecognized field. Print detailed Failure message."""
217
        msg = ['ERROR. UNRECOGNIZED FIELD({F})'.format(F=fld)]
218
        actual_flds = set(self.get_prtflds_default() + self.goterm.__dict__.keys())
219
        bad_flds = set(fldnames).difference(set(actual_flds))
220
        if bad_flds:
221
            msg.append("\nGOEA RESULT FIELDS: {}".format(" ".join(self._fldsdefprt)))
222
            msg.append("GO FIELDS: {}".format(" ".join(self.goterm.__dict__.keys())))
223
            msg.append("\nFATAL: {N} UNEXPECTED FIELDS({F})\n".format(
224
                N=len(bad_flds), F=" ".join(bad_flds)))
225
            msg.append("  {N} User-provided fields:".format(N=len(fldnames)))
226
            for idx, fld in enumerate(fldnames, 1):
227
                mrk = "ERROR -->" if fld in bad_flds else ""
228
                msg.append("  {M:>9} {I:>2}) {F}".format(M=mrk, I=idx, F=fld))
229
        raise Exception("\n".join(msg))
230
231
232
class GOEnrichmentStudy(object):
233
    """Runs Fisher's exact test, as well as multiple corrections
234
    """
235
    # Default Excel table column widths for GOEA results
236
    default_fld2col_widths = {
237
        'NS'        :  3,
238
        'GO'        : 12,
239
        'alt'       :  2,
240
        'level'     :  3,
241
        'depth'     :  3,
242
        'enrichment':  1,
243
        'name'      : 60,
244
        'ratio_in_study':  8,
245
        'ratio_in_pop'  : 12,
246
        'study_items'   : 15,
247
    }
248
249
    def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws):
250
        self.log = kws['log'] if 'log' in kws else sys.stdout
251
        self._run_multitest = {
252
            'local':lambda iargs: self._run_multitest_local(iargs),
253
            'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)}
254
        self.pop = pop
255
        self.pop_n = len(pop)
256
        self.assoc = assoc
257
        self.obo_dag = obo_dag
258
        self.alpha = alpha
259
        if methods is None:
260
            methods = ["bonferroni", "sidak", "holm"]
261
        self.methods = Methods(methods)
262
        self.pval_obj = FisherFactory(**kws).pval_obj
263
264
        if propagate_counts:
265
            sys.stderr.write("Propagating term counts to parents ..\n")
266
            obo_dag.update_association(assoc)
267
        self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
268
269
    def run_study(self, study, **kws):
270
        """Run Gene Ontology Enrichment Study (GOEA) on study ids."""
271
        # Key-word arguments:
272
        methods = Methods(kws['methods']) if 'methods' in kws else self.methods
273
        alpha = kws['alpha'] if 'alpha' in kws else self.alpha
274
        log = kws['log'] if 'log' in kws else self.log
275
        # Calculate uncorrected pvalues
276
        results = self.get_pval_uncorr(study, log)
277
        if not results:
278
            return []
279
280
        if log is not None:
281
            log.write("  {MSG}\n".format(MSG="\n  ".join(self.get_results_msg(results, study))))
282
283
        # Do multipletest corrections on uncorrected pvalues and update results
284
        self._run_multitest_corr(results, methods, alpha, study, log)
285
286
        for rec in results:
287
            # get go term for name and level
288
            rec.set_goterm(self.obo_dag)
289
290
        # 'keep_if' can be used to keep only significant GO terms. Example:
291
        #     >>> keep_if = lambda nt: nt.p_fdr_bh < 0.05 # if results are significant
292
        #     >>> goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if)
293
        if 'keep_if' in kws:
294
            keep_if = kws['keep_if']
295
            results = [r for r in results if keep_if(r)]
296
297
        # Default sort order: First, sort by BP, MF, CC. Second, sort by pval
298
        results.sort(key=lambda r: [r.NS, r.enrichment, r.p_uncorrected])
299
        return results # list of GOEnrichmentRecord objects
300
301
    def run_study_nts(self, study, **kws):
302
        """Run GOEA on study ids. Return results as a list of namedtuples."""
303
        goea_results = self.run_study(study, **kws)
304
        return MgrNtGOEAs(goea_results).get_goea_nts_all()
305
306
    def get_results_msg(self, results, study):
307
        """Return summary for GOEA results."""
308
        # To convert msg list to string: "\n".join(msg)
309
        msg = []
310
        if results:
311
            fmt = "{M:6,} GO terms are associated with {N:6,} of {NT:6,}"
312
            stu_items, num_gos_stu = self.get_item_cnt(results, "study_items")
313
            pop_items, num_gos_pop = self.get_item_cnt(results, "pop_items")
314
            stu_txt = fmt.format(N=len(stu_items), M=num_gos_stu, NT=len(set(study)))
315
            pop_txt = fmt.format(N=len(pop_items), M=num_gos_pop, NT=self.pop_n)
316
            msg.append("{POP} population items".format(POP=pop_txt))
317
            msg.append("{STU} study items".format(STU=stu_txt))
318
        return msg
319
320
    def get_pval_uncorr(self, study, log=sys.stdout):
321
        """Calculate the uncorrected pvalues for study items."""
322
        results = []
323
        go2studyitems = get_terms("study", study, self.assoc, self.obo_dag, log)
324
        pop_n, study_n = self.pop_n, len(study)
325
        allterms = set(go2studyitems).union(set(self.go2popitems))
326
        if log is not None:
327
            log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
328
                N=len(allterms), PFNC=self.pval_obj.name))
329
        calc_pvalue = self.pval_obj.calc_pvalue
330
331
        for goid in allterms:
332
            study_items = go2studyitems.get(goid, set())
333
            study_count = len(study_items)
334
            pop_items = self.go2popitems.get(goid, set())
335
            pop_count = len(pop_items)
336
337
            one_record = GOEnrichmentRecord(
338
                GO=goid,
339
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
340
                study_items=study_items,
341
                pop_items=pop_items,
342
                ratio_in_study=(study_count, study_n),
343
                ratio_in_pop=(pop_count, pop_n))
344
345
            results.append(one_record)
346
347
        return results
348
349
    def _run_multitest_corr(self, results, usr_methods, alpha, study, log):
350
        """Do multiple-test corrections on uncorrected pvalues."""
351
        assert 0 < alpha < 1, "Test-wise alpha must fall between (0, 1)"
352
        pvals = [r.p_uncorrected for r in results]
353
        ntobj = cx.namedtuple("ntobj", "results pvals alpha nt_method study")
354
355
        for nt_method in usr_methods:
356
            ntmt = ntobj(results, pvals, alpha, nt_method, study)
357
            self._run_multitest[nt_method.source](ntmt)
358
            if log is not None:
359
                self._log_multitest_corr(log, results, ntmt, alpha)
360
361
    def _log_multitest_corr(self, log, results, ntmt, alpha):
362
        """Print information regarding multitest correction results."""
363
        ntm = ntmt.nt_method
364
        attr_mult = "p_{M}".format(M=self.methods.get_fieldname(ntm.source, ntm.method))
365
        sig_cnt = sum(1 for r in results if getattr(r, attr_mult) < alpha)
366
        log.write("{N:8,} GO terms found significant (< {A}=alpha) after ".format(
367
            N=sig_cnt, A=alpha))
368
        log.write("multitest correction: ")
369
        log.write("{MSRC} {METHOD}\n".format(MSRC=ntm.source, METHOD=ntm.method))
370
371
    def _run_multitest_statsmodels(self, ntmt):
372
        """Use multitest mthods that have been implemented in statsmodels."""
373
        # Only load statsmodels if it is used
374
        multipletests = self.methods.get_statsmodels_multipletests()
375
        results = multipletests(ntmt.pvals, ntmt.alpha, ntmt.nt_method.method)
376
        pvals_corrected = results[1] # reject_lst, pvals_corrected, alphacSidak, alphacBonf
377
        self._update_pvalcorr(ntmt, pvals_corrected)
378
379
    def _run_multitest_local(self, ntmt):
380
        """Use multitest mthods that have been implemented locally."""
381
        corrected_pvals = None
382
        method = ntmt.nt_method.method
383
        if method == "bonferroni":
384
            corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
385
        elif method == "sidak":
386
            corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals
387
        elif method == "holm":
388
            corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
389
        elif method == "fdr":
390
            # get the empirical p-value distributions for FDR
391
            term_pop = getattr(self, 'term_pop', None)
392
            if term_pop is None:
393
                term_pop = count_terms(self.pop, self.assoc, self.obo_dag)
394
            p_val_distribution = calc_qval(len(ntmt.study),
395
                                           self.pop_n,
396
                                           self.pop, self.assoc,
397
                                           term_pop, self.obo_dag)
398
            corrected_pvals = FDR(p_val_distribution,
399
                                  ntmt.results, ntmt.alpha).corrected_pvals
400
401
        self._update_pvalcorr(ntmt, corrected_pvals)
402
403
    @staticmethod
404
    def _update_pvalcorr(ntmt, corrected_pvals):
405
        """Add data members to store multiple test corrections."""
406
        if corrected_pvals is None:
407
            return
408
        for rec, val in zip(ntmt.results, corrected_pvals):
409
            rec.set_corrected_pval(ntmt.nt_method, val)
410
411
    # Methods for writing results into tables: text, tab-separated, Excel spreadsheets
412
    def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws):
413
        """Print GOEA results to text file."""
414
        if not goea_results:
415
            sys.stdout.write("      0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt))
416
            return
417
        with open(fout_txt, 'w') as prt:
418
            if 'title' in kws:
419
                prt.write("{TITLE}\n".format(TITLE=kws['title']))
420
            data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws)
421
            log = self.log if self.log is not None else sys.stdout
422
            log.write("  {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format(
423
                N=len(data_nts),
424
                CUR=len(MgrNtGOEAs(goea_results).get_study_items()),
425
                F=fout_txt))
426
427
    def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
428
        """Print GOEA results in text format."""
429
        if prtfmt is None:
430
            prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
431
                      "{depth:02} {name:40} {study_items}\n")
432
        prtfmt = self.adjust_prtfmt(prtfmt)
433
        prt_flds = RPT.get_fmtflds(prtfmt)
434
        data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
435
        RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
436
        return data_nts
437
438
    def wr_xlsx(self, fout_xlsx, goea_results, **kws):
439
        """Write a xlsx file."""
440
        # kws: prt_if indent itemid2name(study_items)
441
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
442
        xlsx_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
443
        if 'fld2col_widths' not in kws:
444
            kws['fld2col_widths'] = {f:self.default_fld2col_widths.get(f, 8) for f in prt_flds}
445
        RPT.wr_xlsx(fout_xlsx, xlsx_data, **kws)
446
447
    def wr_tsv(self, fout_tsv, goea_results, **kws):
448
        """Write tab-separated table data to file"""
449
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
450
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
451
        RPT.wr_tsv(fout_tsv, tsv_data, **kws)
452
453
    def prt_tsv(self, prt, goea_results, **kws):
454
        """Write tab-separated table data"""
455
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
456
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
457
        RPT.prt_tsv(prt, tsv_data, prt_flds, **kws)
458
459
    @staticmethod
460
    def adjust_prtfmt(prtfmt):
461
        """Adjust format_strings for legal values."""
462
        prtfmt = prtfmt.replace("{p_holm-sidak", "{p_holm_sidak")
463
        prtfmt = prtfmt.replace("{p_simes-hochberg", "{p_simes_hochberg")
464
        return prtfmt
465
466
    @staticmethod
467
    def get_ns2nts(results, fldnames=None, **kws):
468
        """Get namedtuples of GOEA results, split into BP, MF, CC."""
469
        ns2nts = cx.defaultdict(list)
470
        nts = MgrNtGOEAs(results).get_goea_nts_all(fldnames, **kws)
471
        for ntgoea in nts:
472
            ns2nts[ntgoea.NS].append(ntgoea)
473
        return ns2nts
474
475
    @staticmethod
476
    def get_item_cnt(results, attrname="study_items"):
477
        """Get all study or population items (e.g., geneids)."""
478
        items = set()
479
        go_cnt = 0
480
        for rec in results:
481
            if hasattr(rec, attrname):
482
                items_cur = getattr(rec, attrname)
483
                # Only count GO term if there are items in the set.
484
                if len(items_cur) != 0:
485
                    items |= items_cur
486
                    go_cnt += 1
487
        return items, go_cnt
488
489
    @staticmethod
490
    def get_prtflds_default(results):
491
        """Get default fields names. Used in printing GOEA results.
492
493
           Researchers can control which fields they want to print in the GOEA results
494
           or they can use the default fields.
495
        """
496
        if results:
497
            return results[0].get_prtflds_default()
498
        return []
499
500
    @staticmethod
501
    def print_summary(results, min_ratio=None, indent=False, pval=0.05):
502
        """Print summary."""
503
        import goatools
504
505
        # Header contains provenance and parameters
506
        date = datetime.date.today()
507
        print("# Generated by GOATOOLS v{0} ({1})".format(goatools.__version__, date))
508
        print("# min_ratio={0} pval={1}".format(min_ratio, pval))
509
510
        # field names for output
511
        if results:
512
            print("\t".join(GOEnrichmentStudy.get_prtflds_default(results)))
513
514
        for rec in results:
515
            # calculate some additional statistics
516
            # (over_under, is_ratio_different)
517
            rec.update_remaining_fldsdefprt(min_ratio=min_ratio)
518
519
            if pval is not None and rec.p_uncorrected >= pval:
520
                continue
521
522
            if rec.is_ratio_different:
523
                print(rec.__str__(indent=indent))
524
525
    def wr_py_goea_results(self, fout_py, goea_results, **kws):
526
        """Save GOEA results into Python package containing list of namedtuples."""
527
        var_name = kws.get("var_name", "goea_results")
528
        docstring = kws.get("docstring", "")
529
        sortby = kws.get("sortby", None)
530
        if goea_results:
531
            from goatools.nt_utils import wr_py_nts
532
            nts_goea = goea_results
533
            # If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields.
534
            if hasattr(goea_results[0], "_fldsdefprt") or hasattr(goea_results[0], 'goterm'):
535
                # Exclude some attributes from the namedtuple when saving results
536
                # to a Python file because the information is redundant or verbose.
537
                nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
538
            docstring = "\n".join([docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)])
539
            assert hasattr(nts_goea[0], '_fields')
540
            if sortby is None:
541
                sortby = lambda nt: [getattr(nt, 'namespace'), getattr(nt, 'enrichment'),
542
                                     getattr(nt, 'p_uncorrected'), getattr(nt, 'depth'),
543
                                     getattr(nt, 'GO')]
544
            nts_goea = sorted(nts_goea, key=sortby)
545
            wr_py_nts(fout_py, nts_goea, docstring, var_name)
546
547
# Copyright (C) 2010-2018, H Tang et al., All rights reserved.
548