Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

test_wr_methods()   A

Complexity

Conditions 2

Size

Total Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
dl 0
loc 27
rs 9.232
c 0
b 0
f 0
1
"""Show two methods to write the same data into: xlsx file, tsv file, and text file."""
2
3
import os
4
import sys
5
6
import filecmp
7
from goatools.wr_tbl import get_fmtflds, wr_xlsx, wr_tsv
8
from goatools.test_data.nature3102_goea import get_goea_results
9
from goatools.rpt.goea_nt_xfrm import get_goea_nts_prt
10
11
12
def test_wr_methods(log=sys.stdout):
13
    """Demonstrate printing a subset of all available fields using two methods."""
14
    # 1. Gene Ontology Enrichment Analysis
15
    #    1a. Initialize: Load ontologies, associations, and population gene IDs
16
    nature_data = get_goea_results()
17
    goeaobj = nature_data['goeaobj']
18
    goea_results = nature_data['goea_results']
19
    # 2. Write results
20
    #    Write parameters:
21
    #    The format_string names below are the same names as in the namedtuple field_names.
22
    prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n"
23
    wr_params = {
24
        # Format for printing in text format
25
        'prtfmt' : prtfmt,
26
        # Format for p-values in tsv and xlsx format
27
        'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'},
28
        # Print a subset namedtuple fields, don't print all fields in namedtuple.
29
        'prt_flds' : get_fmtflds(prtfmt)
30
    }
31
    #    2a. Use the write functions inside the GOEnrichmentStudy class.
32
    cwddir = os.getcwd()
33
    tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv')
34
    tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv')
35
    _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log)
36
    #    2b. Use the write functions from the wr_tbl package to print a list of namedtuples.
37
    _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log)
38
    assert filecmp.cmp(tsv_obj, tsv_nts)
39
40
41
42
def _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log):
43
    """Demonstrate printing a subset of GOEA fields."""
44
    # List of all fields, printable or not, available from GOEA
45
    flds = goea_results[0].get_prtflds_all()
46
    log.write("\nALL GOEA FIELDS: {F}\n".format(F=" ".join(flds)))
47
    # Use the subset of namedtuple fields_names that are listed in the format string:
48
    # Same format: print to screen and print to file:
49
    goeaobj.prt_txt(log, goea_results, **wr_params) # Print to screen
50
    goeaobj.wr_txt("nbt3102_subset_obj.txt", goea_results, **wr_params)
51
    # Print to Excel Spreadsheet
52
    title = "Print subset of GOEA fields"
53
    goeaobj.wr_xlsx("nbt3102_subset_obj.xlsx", goea_results, title=title, **wr_params)
54
    # Print to tab-separated file
55
    goeaobj.wr_tsv(tsv_obj, goea_results, **wr_params)
56
57
58
def _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log):
59
    """Using functions in the wr_tbl pkg, demonstrate printing a subset of namedtuple fields."""
60
    goea_nts = get_goea_nts_prt(goea_results)
61
    # List of all fields, printable or not, in namedtuple (nt):
62
    log.write("\nnamedtuple FIELDS: {F}\n".format(F=" ".join(goea_nts[0]._fields)))
63
    # Print to Excel Spreadsheet
64
    title = "Print subset of namedtuple fields"
65
    wr_xlsx("nbt3102_subset_nt.xlsx", goea_nts, title=title, **wr_params)
66
    # Print to tab-separated file
67
    wr_tsv(tsv_nts, goea_nts, **wr_params)
68
69
70
if __name__ == '__main__':
71
    test_wr_methods(sys.stdout)
72