Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

GOEnrichmentStudy.run_study()   C

Complexity

Conditions 11

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 11
dl 0
loc 31
rs 5.4
c 1
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like GOEnrichmentStudy.run_study() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
"""
5
python %prog study.file population.file gene-association.file
6
7
This program returns P-values for functional enrichment in a cluster of
8
study genes using Fisher's exact test, and corrected for multiple testing
9
(including Bonferroni, Holm, Sidak, and false discovery rate)
10
"""
11
12
from __future__ import print_function
13
from __future__ import absolute_import
14
15
16
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved."
17
__author__ = "various"
18
19
import sys
20
import collections as cx
21
import datetime
22
23
from goatools.multiple_testing import Methods
24
from goatools.multiple_testing import Bonferroni
25
from goatools.multiple_testing import Sidak
26
from goatools.multiple_testing import HolmBonferroni
27
from goatools.multiple_testing import FDR
28
from goatools.multiple_testing import calc_qval
29
from goatools.ratio import get_terms, count_terms, is_ratio_different
30
import goatools.wr_tbl as RPT
31
from goatools.pvalcalc import FisherFactory
32
33
from goatools.rpt.goea_nt_xfrm import MgrNtGOEAs
34
35
class GOEnrichmentRecord(object):
36
    """Represents one result (from a single GOTerm) in the GOEnrichmentStudy
37
    """
38
    namespace2NS = cx.OrderedDict([
39
        ('biological_process', 'BP'),
40
        ('molecular_function', 'MF'),
41
        ('cellular_component', 'CC')])
42
43
    # Fields seen in every enrichment result
44
    _fldsdefprt = [
45
        "GO",
46
        "NS",
47
        "enrichment",
48
        "name",
49
        "ratio_in_study",
50
        "ratio_in_pop",
51
        "p_uncorrected",
52
        "depth",
53
        "study_count",
54
        "study_items"]
55
    _fldsdeffmt = ["%s"]*3 + ["%-30s"] + ["%d/%d"] * 2 + ["%.3g"] + ["%d"] * 2 + ["%15s"]
56
57
    _flds = set(_fldsdefprt).intersection(
58
        set(['study_items', 'study_count', 'study_n', 'pop_items', 'pop_count', 'pop_n']))
59
60
    def __init__(self, **kwargs):
61
        # Methods seen in current enrichment result
62
        self.method_flds = []
63
        for key, val in kwargs.items():
64
            setattr(self, key, val)
65
            if key == 'ratio_in_study':
66
                setattr(self, 'study_count', val[0])
67
                setattr(self, 'study_n', val[1])
68
            if key == 'ratio_in_pop':
69
                setattr(self, 'pop_count', val[0])
70
                setattr(self, 'pop_n', val[1])
71
        self._init_enrichment()
72
        self.goterm = None  # the reference to the GOTerm
73
74
    def get_method_name(self):
75
        """Return name of first method in the method_flds list."""
76
        return self.method_flds[0].fieldname
77
78
    def get_pvalue(self):
79
        """Returns pval for 1st method, if it exists. Else returns uncorrected pval."""
80
        if self.method_flds:
81
            return getattr(self, "p_{m}".format(m=self.get_method_name()))
82
        return getattr(self, "p_uncorrected")
83
84
    def set_corrected_pval(self, nt_method, pvalue):
85
        """Add object attribute based on method name."""
86
        self.method_flds.append(nt_method)
87
        fieldname = "".join(["p_", nt_method.fieldname])
88
        setattr(self, fieldname, pvalue)
89
90
    def __str__(self, indent=False):
91
        field_data = [getattr(self, f, "n.a.") for f in self._fldsdefprt[:-1]] + \
92
                     [getattr(self, "p_{}".format(m.fieldname)) for m in self.method_flds] + \
93
                     [", ".join(sorted(getattr(self, self._fldsdefprt[-1], set())))]
94
        fldsdeffmt = self._fldsdeffmt
95
        field_formatter = fldsdeffmt[:-1] + ["%.3g"]*len(self.method_flds) + [fldsdeffmt[-1]]
96
        self._chk_fields(field_data, field_formatter)
97
98
        # default formatting only works for non-"n.a" data
99
        for idx, fld in enumerate(field_data):
100
            if fld == "n.a.":
101
                field_formatter[idx] = "%s"
102
103
        # print dots to show the level of the term
104
        dots = self.get_indent_dots() if indent else ""
105
        prtdata = "\t".join(a % b for (a, b) in zip(field_formatter, field_data))
106
        return "".join([dots, prtdata])
107
108
    def get_indent_dots(self):
109
        """Get a string of dots ("....") representing the level of the GO term."""
110
        return "." * self.goterm.level if self.goterm is not None else ""
111
112
    @staticmethod
113
    def _chk_fields(field_data, field_formatter):
114
        """Check that expected fields are present."""
115
        if len(field_data) == len(field_formatter):
116
            return
117
        len_dat = len(field_data)
118
        len_fmt = len(field_formatter)
119
        msg = [
120
            "FIELD DATA({d}) != FORMATTER({f})".format(d=len_dat, f=len_fmt),
121
            "DAT({N}): {D}".format(N=len_dat, D=field_data),
122
            "FMT({N}): {F}".format(N=len_fmt, F=field_formatter)]
123
        raise Exception("\n".join(msg))
124
125
    def __repr__(self):
126
        return "GOEnrichmentRecord({GO})".format(GO=self.GO)
127
128
    def set_goterm(self, go2obj):
129
        """Set goterm and copy GOTerm's name and namespace."""
130
        self.goterm = go2obj.get(self.GO, None)
131
        present = self.goterm is not None
132
        self.name = self.goterm.name if present else "n.a."
133
        self.NS = self.namespace2NS[self.goterm.namespace] if present else "XX"
134
135
    def _init_enrichment(self):
136
        """Mark as 'enriched' or 'purified'."""
137
        self.enrichment = 'e' if ((1.0 * self.study_count / self.study_n) >
138
                                  (1.0 * self.pop_count / self.pop_n)) else 'p'
139
140
    def update_remaining_fldsdefprt(self, min_ratio=None):
141
        """Finish updating self (GOEnrichmentRecord) field, is_ratio_different."""
142
        self.is_ratio_different = is_ratio_different(min_ratio, self.study_count,
143
                                                     self.study_n, self.pop_count, self.pop_n)
144
145
146
    # -------------------------------------------------------------------------------------
147
    # Methods for getting flat namedtuple values from GOEnrichmentRecord object
148
    def get_prtflds_default(self):
149
        """Get default fields."""
150
        return self._fldsdefprt[:-1] + \
151
               ["p_{M}".format(M=m.fieldname) for m in self.method_flds] + \
152
               [self._fldsdefprt[-1]]
153
154
    def get_prtflds_all(self):
155
        """When converting to a namedtuple, get all possible fields in their original order."""
156
        flds = []
157
        dont_add = set(['_parents', 'method_flds', 'relationship_rev', 'relationship'])
158
        # Fields: GO NS enrichment name ratio_in_study ratio_in_pop p_uncorrected
159
        #         depth study_count p_sm_bonferroni p_fdr_bh study_items
160
        self._flds_append(flds, self.get_prtflds_default(), dont_add)
161
        # Fields: GO NS goterm
162
        #         ratio_in_pop pop_n pop_count pop_items name
163
        #         ratio_in_study study_n study_count study_items
164
        #         method_flds enrichment p_uncorrected p_sm_bonferroni p_fdr_bh
165
        self._flds_append(flds, vars(self).keys(), dont_add)
166
        # Fields: name level is_obsolete namespace id depth parents children _parents alt_ids
167
        self._flds_append(flds, vars(self.goterm).keys(), dont_add)
168
        return flds
169
170
    @staticmethod
171
    def _flds_append(flds, addthese, dont_add):
172
        """Retain order of fields as we add them once to the list."""
173
        for fld in addthese:
174
            if fld not in flds and fld not in dont_add:
175
                flds.append(fld)
176
177
    def get_field_values(self, fldnames, rpt_fmt=True, itemid2name=None):
178
        """Get flat namedtuple fields for one GOEnrichmentRecord."""
179
        row = []
180
        # Loop through each user field desired
181
        for fld in fldnames:
182
            # 1. Check the GOEnrichmentRecord's attributes
183
            val = getattr(self, fld, None)
184
            if val is not None:
185
                if rpt_fmt:
186
                    val = self._get_rpt_fmt(fld, val, itemid2name)
187
                row.append(val)
188
            else:
189
                # 2. Check the GO object for the field
190
                val = getattr(self.goterm, fld, None)
191
                if rpt_fmt:
192
                    val = self._get_rpt_fmt(fld, val, itemid2name)
193
                if val is not None:
194
                    row.append(val)
195
                else:
196
                    # 3. Field not found, raise Exception
197
                    self._err_fld(fld, fldnames)
198
            if rpt_fmt:
199
                assert not isinstance(val, list), \
200
                   "UNEXPECTED LIST: FIELD({F}) VALUE({V}) FMT({P})".format(
201
                       P=rpt_fmt, F=fld, V=val)
202
        return row
203
204
    @staticmethod
205
    def _get_rpt_fmt(fld, val, itemid2name=None):
206
        """Return values in a format amenable to printing in a table."""
207
        if fld.startswith("ratio_"):
208
            return "{N}/{TOT}".format(N=val[0], TOT=val[1])
209
        elif fld in set(['study_items', 'pop_items', 'alt_ids']):
210
            if itemid2name is not None:
211
                val = [itemid2name.get(v, v) for v in val]
212
            return ", ".join([str(v) for v in sorted(val)])
213
        return val
214
215
    def _err_fld(self, fld, fldnames):
216
        """Unrecognized field. Print detailed Failure message."""
217
        msg = ['ERROR. UNRECOGNIZED FIELD({F})'.format(F=fld)]
218
        actual_flds = set(self.get_prtflds_default() + self.goterm.__dict__.keys())
219
        bad_flds = set(fldnames).difference(set(actual_flds))
220
        if bad_flds:
221
            msg.append("\nGOEA RESULT FIELDS: {}".format(" ".join(self._fldsdefprt)))
222
            msg.append("GO FIELDS: {}".format(" ".join(self.goterm.__dict__.keys())))
223
            msg.append("\nFATAL: {N} UNEXPECTED FIELDS({F})\n".format(
224
                N=len(bad_flds), F=" ".join(bad_flds)))
225
            msg.append("  {N} User-provided fields:".format(N=len(fldnames)))
226
            for idx, fld in enumerate(fldnames, 1):
227
                mrk = "ERROR -->" if fld in bad_flds else ""
228
                msg.append("  {M:>9} {I:>2}) {F}".format(M=mrk, I=idx, F=fld))
229
        raise Exception("\n".join(msg))
230
231
232
class GOEnrichmentStudy(object):
233
    """Runs Fisher's exact test, as well as multiple corrections
234
    """
235
    # Default Excel table column widths for GOEA results
236
    default_fld2col_widths = {
237
        'NS'        :  3,
238
        'GO'        : 12,
239
        'alt'       :  2,
240
        'level'     :  3,
241
        'depth'     :  3,
242
        'enrichment':  1,
243
        'name'      : 60,
244
        'ratio_in_study':  8,
245
        'ratio_in_pop'  : 12,
246
        'study_items'   : 15,
247
    }
248
249
    def __init__(self, pop, assoc, obo_dag, propagate_counts=True, alpha=.05, methods=None, **kws):
250
        self.log = kws['log'] if 'log' in kws else sys.stdout
251
        self._run_multitest = {
252
            'local':lambda iargs: self._run_multitest_local(iargs),
253
            'statsmodels':lambda iargs: self._run_multitest_statsmodels(iargs)}
254
        self.pop = set(pop)
255
        self.pop_n = len(pop)
256
        self.assoc = assoc
257
        self.obo_dag = obo_dag
258
        self.alpha = alpha
259
        if methods is None:
260
            methods = ["bonferroni", "sidak", "holm"]
261
        self.methods = Methods(methods)
262
        self.pval_obj = FisherFactory(**kws).pval_obj
263
264
        if propagate_counts:
265
            sys.stderr.write("Propagating term counts to parents ..\n")
266
            obo_dag.update_association(assoc)
267
        self.go2popitems = get_terms("population", pop, assoc, obo_dag, self.log)
268
269
    def run_study(self, study, **kws):
270
        """Run Gene Ontology Enrichment Study (GOEA) on study ids."""
271
        # Key-word arguments:
272
        methods = Methods(kws['methods']) if 'methods' in kws else self.methods
273
        alpha = kws['alpha'] if 'alpha' in kws else self.alpha
274
        log = kws['log'] if 'log' in kws else self.log
275
        # Calculate uncorrected pvalues
276
        results = self.get_pval_uncorr(study, log)
277
        if not results:
278
            return []
279
280
        if log is not None:
281
            log.write("  {MSG}\n".format(MSG="\n  ".join(self.get_results_msg(results, study))))
282
283
        # Do multipletest corrections on uncorrected pvalues and update results
284
        self._run_multitest_corr(results, methods, alpha, study, log)
285
286
        for rec in results:
287
            # get go term for name and level
288
            rec.set_goterm(self.obo_dag)
289
290
        # 'keep_if' can be used to keep only significant GO terms. Example:
291
        #     >>> keep_if = lambda nt: nt.p_fdr_bh < 0.05 # if results are significant
292
        #     >>> goea_results = goeaobj.run_study(geneids_study, keep_if=keep_if)
293
        if 'keep_if' in kws:
294
            keep_if = kws['keep_if']
295
            results = [r for r in results if keep_if(r)]
296
297
        # Default sort order: First, sort by BP, MF, CC. Second, sort by pval
298
        results.sort(key=lambda r: [r.NS, r.enrichment, r.p_uncorrected])
299
        return results # list of GOEnrichmentRecord objects
300
301
    def run_study_nts(self, study, **kws):
302
        """Run GOEA on study ids. Return results as a list of namedtuples."""
303
        goea_results = self.run_study(study, **kws)
304
        return MgrNtGOEAs(goea_results).get_goea_nts_all()
305
306
    def get_results_msg(self, results, study):
307
        """Return summary for GOEA results."""
308
        # To convert msg list to string: "\n".join(msg)
309
        msg = []
310
        if results:
311
            fmt = "{M:6,} GO terms are associated with {N:6,} of {NT:6,}"
312
            stu_items, num_gos_stu = self.get_item_cnt(results, "study_items")
313
            pop_items, num_gos_pop = self.get_item_cnt(results, "pop_items")
314
            stu_txt = fmt.format(N=len(stu_items), M=num_gos_stu, NT=len(set(study)))
315
            pop_txt = fmt.format(N=len(pop_items), M=num_gos_pop, NT=self.pop_n)
316
            msg.append("{POP} population items".format(POP=pop_txt))
317
            msg.append("{STU} study items".format(STU=stu_txt))
318
        return msg
319
320
    def get_pval_uncorr(self, study, log=sys.stdout):
321
        """Calculate the uncorrected pvalues for study items."""
322
        results = []
323
        study_in_pop = self.pop.intersection(study)
324
        # " 99%    378 of    382 study items found in population"
325
        go2studyitems = get_terms("study", study_in_pop, self.assoc, self.obo_dag, log)
326
        pop_n, study_n = self.pop_n, len(study_in_pop)
327
        allterms = set(go2studyitems).union(set(self.go2popitems))
328
        if log is not None:
329
            study_n_orig = len(study)
330
            log.write("{R:3.0f}% {N:>6,} of {M:>6,} study items found in population({P})\n".format(
331
                N=study_n, M=study_n_orig, P=pop_n, R=100.0*study_n/study_n_orig))
332
            log.write("Calculating {N:,} uncorrected p-values using {PFNC}\n".format(
333
                N=len(allterms), PFNC=self.pval_obj.name))
334
        calc_pvalue = self.pval_obj.calc_pvalue
335
336
        for goid in allterms:
337
            study_items = go2studyitems.get(goid, set())
338
            study_count = len(study_items)
339
            pop_items = self.go2popitems.get(goid, set())
340
            pop_count = len(pop_items)
341
342
            one_record = GOEnrichmentRecord(
343
                GO=goid,
344
                p_uncorrected=calc_pvalue(study_count, study_n, pop_count, pop_n),
345
                study_items=study_items,
346
                pop_items=pop_items,
347
                ratio_in_study=(study_count, study_n),
348
                ratio_in_pop=(pop_count, pop_n))
349
350
            results.append(one_record)
351
352
        return results
353
354
    def _run_multitest_corr(self, results, usrmethod_flds, alpha, study, log):
355
        """Do multiple-test corrections on uncorrected pvalues."""
356
        assert 0 < alpha < 1, "Test-wise alpha must fall between (0, 1)"
357
        pvals = [r.p_uncorrected for r in results]
358
        ntobj = cx.namedtuple("ntobj", "results pvals alpha nt_method study")
359
360
        for nt_method in usrmethod_flds:
361
            ntmt = ntobj(results, pvals, alpha, nt_method, study)
362
            self._run_multitest[nt_method.source](ntmt)
363
            if log is not None:
364
                self._log_multitest_corr(log, results, ntmt, alpha)
365
366
    def _log_multitest_corr(self, log, results, ntmt, alpha):
367
        """Print information regarding multitest correction results."""
368
        ntm = ntmt.nt_method
369
        attr_mult = "p_{M}".format(M=self.methods.get_fieldname(ntm.source, ntm.method))
370
        sig_cnt = sum(1 for r in results if getattr(r, attr_mult) < alpha)
371
        log.write("{N:8,} GO terms found significant (< {A}=alpha) after ".format(
372
            N=sig_cnt, A=alpha))
373
        log.write("multitest correction: ")
374
        log.write("{MSRC} {METHOD}\n".format(MSRC=ntm.source, METHOD=ntm.method))
375
376
    def _run_multitest_statsmodels(self, ntmt):
377
        """Use multitest mthods that have been implemented in statsmodels."""
378
        # Only load statsmodels if it is used
379
        multipletests = self.methods.get_statsmodels_multipletests()
380
        results = multipletests(ntmt.pvals, ntmt.alpha, ntmt.nt_method.method)
381
        pvals_corrected = results[1] # reject_lst, pvals_corrected, alphacSidak, alphacBonf
382
        self._update_pvalcorr(ntmt, pvals_corrected)
383
384
    def _run_multitest_local(self, ntmt):
385
        """Use multitest mthods that have been implemented locally."""
386
        corrected_pvals = None
387
        method = ntmt.nt_method.method
388
        if method == "bonferroni":
389
            corrected_pvals = Bonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
390
        elif method == "sidak":
391
            corrected_pvals = Sidak(ntmt.pvals, ntmt.alpha).corrected_pvals
392
        elif method == "holm":
393
            corrected_pvals = HolmBonferroni(ntmt.pvals, ntmt.alpha).corrected_pvals
394
        elif method == "fdr":
395
            # get the empirical p-value distributions for FDR
396
            term_pop = getattr(self, 'term_pop', None)
397
            if term_pop is None:
398
                term_pop = count_terms(self.pop, self.assoc, self.obo_dag)
399
            p_val_distribution = calc_qval(len(ntmt.study),
400
                                           self.pop_n,
401
                                           self.pop, self.assoc,
402
                                           term_pop, self.obo_dag)
403
            corrected_pvals = FDR(p_val_distribution,
404
                                  ntmt.results, ntmt.alpha).corrected_pvals
405
406
        self._update_pvalcorr(ntmt, corrected_pvals)
407
408
    @staticmethod
409
    def _update_pvalcorr(ntmt, corrected_pvals):
410
        """Add data members to store multiple test corrections."""
411
        if corrected_pvals is None:
412
            return
413
        for rec, val in zip(ntmt.results, corrected_pvals):
414
            rec.set_corrected_pval(ntmt.nt_method, val)
415
416
    # Methods for writing results into tables: text, tab-separated, Excel spreadsheets
417
    def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws):
418
        """Print GOEA results to text file."""
419
        if not goea_results:
420
            sys.stdout.write("      0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt))
421
            return
422
        with open(fout_txt, 'w') as prt:
423
            if 'title' in kws:
424
                prt.write("{TITLE}\n".format(TITLE=kws['title']))
425
            data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws)
426
            log = self.log if self.log is not None else sys.stdout
427
            log.write("  {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format(
428
                N=len(data_nts),
429
                CUR=len(MgrNtGOEAs(goea_results).get_study_items()),
430
                F=fout_txt))
431
432
    def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
433
        """Print GOEA results in text format."""
434
        if prtfmt is None:
435
            prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
436
                      "{depth:02} {name:40} {study_items}\n")
437
        prtfmt = self.adjust_prtfmt(prtfmt)
438
        prt_flds = RPT.get_fmtflds(prtfmt)
439
        data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
440
        RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
441
        return data_nts
442
443
    def wr_xlsx(self, fout_xlsx, goea_results, **kws):
444
        """Write a xlsx file."""
445
        # kws: prt_if indent itemid2name(study_items)
446
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
447
        xlsx_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
448
        if 'fld2col_widths' not in kws:
449
            kws['fld2col_widths'] = {f:self.default_fld2col_widths.get(f, 8) for f in prt_flds}
450
        RPT.wr_xlsx(fout_xlsx, xlsx_data, **kws)
451
452
    def wr_tsv(self, fout_tsv, goea_results, **kws):
453
        """Write tab-separated table data to file"""
454
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
455
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
456
        RPT.wr_tsv(fout_tsv, tsv_data, **kws)
457
458
    def prt_tsv(self, prt, goea_results, **kws):
459
        """Write tab-separated table data"""
460
        prt_flds = kws.get('prt_flds', self.get_prtflds_default(goea_results))
461
        tsv_data = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
462
        RPT.prt_tsv(prt, tsv_data, prt_flds, **kws)
463
464
    @staticmethod
465
    def adjust_prtfmt(prtfmt):
466
        """Adjust format_strings for legal values."""
467
        prtfmt = prtfmt.replace("{p_holm-sidak", "{p_holm_sidak")
468
        prtfmt = prtfmt.replace("{p_simes-hochberg", "{p_simes_hochberg")
469
        return prtfmt
470
471
    @staticmethod
472
    def get_ns2nts(results, fldnames=None, **kws):
473
        """Get namedtuples of GOEA results, split into BP, MF, CC."""
474
        ns2nts = cx.defaultdict(list)
475
        nts = MgrNtGOEAs(results).get_goea_nts_all(fldnames, **kws)
476
        for ntgoea in nts:
477
            ns2nts[ntgoea.NS].append(ntgoea)
478
        return ns2nts
479
480
    @staticmethod
481
    def get_item_cnt(results, attrname="study_items"):
482
        """Get all study or population items (e.g., geneids)."""
483
        items = set()
484
        go_cnt = 0
485
        for rec in results:
486
            if hasattr(rec, attrname):
487
                items_cur = getattr(rec, attrname)
488
                # Only count GO term if there are items in the set.
489
                if len(items_cur) != 0:
490
                    items |= items_cur
491
                    go_cnt += 1
492
        return items, go_cnt
493
494
    @staticmethod
495
    def get_prtflds_default(results):
496
        """Get default fields names. Used in printing GOEA results.
497
498
           Researchers can control which fields they want to print in the GOEA results
499
           or they can use the default fields.
500
        """
501
        if results:
502
            return results[0].get_prtflds_default()
503
        return []
504
505
    @staticmethod
506
    def print_summary(results, min_ratio=None, indent=False, pval=0.05):
507
        """Print summary."""
508
        import goatools
509
510
        # Header contains provenance and parameters
511
        date = datetime.date.today()
512
        print("# Generated by GOATOOLS v{0} ({1})".format(goatools.__version__, date))
513
        print("# min_ratio={0} pval={1}".format(min_ratio, pval))
514
515
        # field names for output
516
        if results:
517
            print("\t".join(GOEnrichmentStudy.get_prtflds_default(results)))
518
519
        for rec in results:
520
            # calculate some additional statistics
521
            # (over_under, is_ratio_different)
522
            rec.update_remaining_fldsdefprt(min_ratio=min_ratio)
523
524
            if pval is not None and rec.p_uncorrected >= pval:
525
                continue
526
527
            if rec.is_ratio_different:
528
                print(rec.__str__(indent=indent))
529
530
    def wr_py_goea_results(self, fout_py, goea_results, **kws):
531
        """Save GOEA results into Python package containing list of namedtuples."""
532
        var_name = kws.get("var_name", "goea_results")
533
        docstring = kws.get("docstring", "")
534
        sortby = kws.get("sortby", None)
535
        if goea_results:
536
            from goatools.nt_utils import wr_py_nts
537
            nts_goea = goea_results
538
            # If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields.
539
            if hasattr(goea_results[0], "_fldsdefprt") or hasattr(goea_results[0], 'goterm'):
540
                # Exclude some attributes from the namedtuple when saving results
541
                # to a Python file because the information is redundant or verbose.
542
                nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
543
            docstring = "\n".join([docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)])
544
            assert hasattr(nts_goea[0], '_fields')
545
            if sortby is None:
546
                sortby = lambda nt: [getattr(nt, 'namespace'), getattr(nt, 'enrichment'),
547
                                     getattr(nt, 'p_uncorrected'), getattr(nt, 'depth'),
548
                                     getattr(nt, 'GO')]
549
            nts_goea = sorted(nts_goea, key=sortby)
550
            wr_py_nts(fout_py, nts_goea, docstring, var_name)
551
552
# Copyright (C) 2010-2018, H Tang et al., All rights reserved.
553