|
1
|
|
|
"""Show two methods to write the same data into: xlsx file, tsv file, and text file.""" |
|
2
|
|
|
|
|
3
|
|
|
import os |
|
4
|
|
|
import sys |
|
5
|
|
|
|
|
6
|
|
|
import filecmp |
|
7
|
|
|
from goatools.wr_tbl import get_fmtflds, wr_xlsx, wr_tsv |
|
8
|
|
|
from goatools.test_data.nature3102_goea import get_goea_results |
|
9
|
|
|
from goatools.rpt.goea_nt_xfrm import get_goea_nts_prt |
|
10
|
|
|
|
|
11
|
|
|
|
|
12
|
|
|
def test_wr_methods(log=sys.stdout): |
|
13
|
|
|
"""Demonstrate printing a subset of all available fields using two methods.""" |
|
14
|
|
|
# 1. Gene Ontology Enrichment Analysis |
|
15
|
|
|
# 1a. Initialize: Load ontologies, associations, and population gene IDs |
|
16
|
|
|
nature_data = get_goea_results() |
|
17
|
|
|
goeaobj = nature_data['goeaobj'] |
|
18
|
|
|
goea_results = nature_data['goea_results'] |
|
19
|
|
|
# 2. Write results |
|
20
|
|
|
# Write parameters: |
|
21
|
|
|
# The format_string names below are the same names as in the namedtuple field_names. |
|
22
|
|
|
prtfmt = "{GO} {NS} {level:>2} {depth:>2} {p_fdr_bh:5.2e} {study_count:>5} {name}\n" |
|
23
|
|
|
wr_params = { |
|
24
|
|
|
# Format for printing in text format |
|
25
|
|
|
'prtfmt' : prtfmt, |
|
26
|
|
|
# Format for p-values in tsv and xlsx format |
|
27
|
|
|
'fld2fmt' : {'p_fdr_bh' : '{:8.2e}'}, |
|
28
|
|
|
# Print a subset namedtuple fields, don't print all fields in namedtuple. |
|
29
|
|
|
'prt_flds' : get_fmtflds(prtfmt) |
|
30
|
|
|
} |
|
31
|
|
|
# 2a. Use the write functions inside the GOEnrichmentStudy class. |
|
32
|
|
|
cwddir = os.getcwd() |
|
33
|
|
|
tsv_obj = os.path.join(cwddir, 'nbt3102_subset_obj.tsv') |
|
34
|
|
|
tsv_nts = os.path.join(cwddir, 'nbt3102_subset_nt.tsv') |
|
35
|
|
|
_wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log) |
|
36
|
|
|
# 2b. Use the write functions from the wr_tbl package to print a list of namedtuples. |
|
37
|
|
|
_wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log) |
|
38
|
|
|
assert filecmp.cmp(tsv_obj, tsv_nts) |
|
39
|
|
|
|
|
40
|
|
|
|
|
41
|
|
|
|
|
42
|
|
|
def _wr_3fmt_goeaobj(tsv_obj, goea_results, goeaobj, wr_params, log): |
|
43
|
|
|
"""Demonstrate printing a subset of GOEA fields.""" |
|
44
|
|
|
# List of all fields, printable or not, available from GOEA |
|
45
|
|
|
flds = goea_results[0].get_prtflds_all() |
|
46
|
|
|
log.write("\nALL GOEA FIELDS: {F}\n".format(F=" ".join(flds))) |
|
47
|
|
|
# Use the subset of namedtuple fields_names that are listed in the format string: |
|
48
|
|
|
# Same format: print to screen and print to file: |
|
49
|
|
|
goeaobj.prt_txt(log, goea_results, **wr_params) # Print to screen |
|
50
|
|
|
goeaobj.wr_txt("nbt3102_subset_obj.txt", goea_results, **wr_params) |
|
51
|
|
|
# Print to Excel Spreadsheet |
|
52
|
|
|
title = "Print subset of GOEA fields" |
|
53
|
|
|
goeaobj.wr_xlsx("nbt3102_subset_obj.xlsx", goea_results, title=title, **wr_params) |
|
54
|
|
|
# Print to tab-separated file |
|
55
|
|
|
goeaobj.wr_tsv(tsv_obj, goea_results, **wr_params) |
|
56
|
|
|
|
|
57
|
|
|
|
|
58
|
|
|
def _wr_3fmt_wrtbl(tsv_nts, goea_results, wr_params, log): |
|
59
|
|
|
"""Using functions in the wr_tbl pkg, demonstrate printing a subset of namedtuple fields.""" |
|
60
|
|
|
goea_nts = get_goea_nts_prt(goea_results) |
|
61
|
|
|
# List of all fields, printable or not, in namedtuple (nt): |
|
62
|
|
|
log.write("\nnamedtuple FIELDS: {F}\n".format(F=" ".join(goea_nts[0]._fields))) |
|
63
|
|
|
# Print to Excel Spreadsheet |
|
64
|
|
|
title = "Print subset of namedtuple fields" |
|
65
|
|
|
wr_xlsx("nbt3102_subset_nt.xlsx", goea_nts, title=title, **wr_params) |
|
66
|
|
|
# Print to tab-separated file |
|
67
|
|
|
wr_tsv(tsv_nts, goea_nts, **wr_params) |
|
68
|
|
|
|
|
69
|
|
|
|
|
70
|
|
|
if __name__ == '__main__': |
|
71
|
|
|
test_wr_methods(sys.stdout) |
|
72
|
|
|
|