1
|
|
|
"""Contains generic table-writing functions. Data is expected to be a list of namedtuples. |
2
|
|
|
|
3
|
|
|
kwargs (kws): |
4
|
|
|
'title': First row will contain user-provided title string |
5
|
|
|
'prt_if': Only print a line if user-specfied test returns True. |
6
|
|
|
prt_if is a lambda function with the data item's namedtuple as input. |
7
|
|
|
Example: prt_if = lambda nt: nt.p_uncorrected < 0.05 |
8
|
|
|
'sort_by' : User-customizable sort when printing. |
9
|
|
|
sortby is a lambda function with the data item's namedtuple as input. |
10
|
|
|
It is the 'key' used in the sorted function. |
11
|
|
|
Example: sort_by = lambda nt: [nt.NS, -1*nt.depth] |
12
|
|
|
'hdrs' : A list of column headers to use when printing the table. |
13
|
|
|
default: The fields in the data's namedtuple is used as the column headers. |
14
|
|
|
'sep': Separator used when printing the tab-separated table format. |
15
|
|
|
default: sep = '\t' |
16
|
|
|
'prt_flds' : Used to print a subset of the fields in the namedtuple or |
17
|
|
|
to control the order of the print fields |
18
|
|
|
'fld2col_widths: A dictionary of column widths used when writing xlsx files. |
19
|
|
|
'fld2fmt': Used in tsv files and xlsx files for formatting specific fields |
20
|
|
|
|
21
|
|
|
For adding color or other formatting to a row based on value in a row: |
22
|
|
|
'ntfld_wbfmt': namedtuple field containing a value used as a key for a xlsx format |
23
|
|
|
'ntval2wbfmtdict': namedtuple value and corresponding xlsx format dict. Examples: |
24
|
|
|
""" |
25
|
|
|
|
26
|
|
|
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved." |
27
|
|
|
__author__ = "DV Klopfenstein" |
28
|
|
|
|
29
|
|
|
import re |
30
|
|
|
import sys |
31
|
|
|
from goatools.wr_tbl_class import get_hdrs |
32
|
|
|
|
33
|
|
|
def prt_txt(prt, data_nts, prtfmt=None, nt_fields=None, **kws): |
34
|
|
|
"""Print list of namedtuples into a table using prtfmt.""" |
35
|
|
|
lines = get_lines(data_nts, prtfmt, nt_fields, **kws) |
36
|
|
|
if lines: |
37
|
|
|
for line in lines: |
38
|
|
|
prt.write(line) |
39
|
|
|
else: |
40
|
|
|
sys.stdout.write(" 0 items. NOT WRITING\n") |
41
|
|
|
|
42
|
|
|
def get_lines(data_nts, prtfmt=None, nt_fields=None, **kws): |
43
|
|
|
"""Print list of namedtuples into a table using prtfmt.""" |
44
|
|
|
lines = [] |
45
|
|
|
# optional keyword args: prt_if sort_by |
46
|
|
|
if prtfmt is None: |
47
|
|
|
prtfmt = mk_fmtfld(data_nts[0], kws.get('joinchr', ' '), kws.get('eol', '\n')) |
48
|
|
|
# if nt_fields arg is None, use fields from prtfmt string. |
49
|
|
|
if nt_fields is not None: |
50
|
|
|
_chk_flds_fmt(nt_fields, prtfmt) |
51
|
|
|
if 'sort_by' in kws: |
52
|
|
|
data_nts = sorted(data_nts, key=kws['sort_by']) |
53
|
|
|
prt_if = kws.get('prt_if', None) |
54
|
|
|
for data_nt in data_nts: |
55
|
|
|
if prt_if is None or prt_if(data_nt): |
56
|
|
|
lines.append(prtfmt.format(**data_nt._asdict())) |
57
|
|
|
return lines |
58
|
|
|
|
59
|
|
|
def prt_nts(data_nts, prtfmt=None, prt=sys.stdout, nt_fields=None, **kws): |
60
|
|
|
"""Print list of namedtuples into a table using prtfmt.""" |
61
|
|
|
prt_txt(prt, data_nts, prtfmt, nt_fields, **kws) |
62
|
|
|
|
63
|
|
|
def wr_xlsx(fout_xlsx, data_xlsx, **kws): |
64
|
|
|
"""Write a spreadsheet into a xlsx file.""" |
65
|
|
|
from goatools.wr_tbl_class import WrXlsx |
66
|
|
|
# optional keyword args: fld2col_widths hdrs prt_if sort_by fld2fmt prt_flds |
67
|
|
|
items_str = kws.get("items", "items") if "items" not in kws else kws["items"] |
68
|
|
|
if data_xlsx: |
69
|
|
|
# Open xlsx file |
70
|
|
|
xlsxobj = WrXlsx(fout_xlsx, data_xlsx[0]._fields, **kws) |
71
|
|
|
worksheet = xlsxobj.add_worksheet() |
72
|
|
|
# Write title (optional) and headers. |
73
|
|
|
row_idx = xlsxobj.wr_title(worksheet) |
74
|
|
|
row_idx = xlsxobj.wr_hdrs(worksheet, row_idx) |
75
|
|
|
row_idx_data0 = row_idx |
76
|
|
|
# Write data |
77
|
|
|
row_idx = xlsxobj.wr_data(data_xlsx, row_idx, worksheet) |
78
|
|
|
# Close xlsx file |
79
|
|
|
xlsxobj.workbook.close() |
80
|
|
|
sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT}\n".format( |
81
|
|
|
N=row_idx-row_idx_data0, ITEMS=items_str, FOUT=fout_xlsx)) |
82
|
|
|
else: |
83
|
|
|
sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format( |
84
|
|
|
ITEMS=items_str, FOUT=fout_xlsx)) |
85
|
|
|
|
86
|
|
|
def wr_xlsx_sections(fout_xlsx, xlsx_data, **kws): |
87
|
|
|
"""Write xlsx file containing section names followed by lines of namedtuple data.""" |
88
|
|
|
from goatools.wr_tbl_class import WrXlsx |
89
|
|
|
items_str = "items" if "items" not in kws else kws["items"] |
90
|
|
|
prt_hdr_min = 10 |
91
|
|
|
num_items = 0 |
92
|
|
|
if xlsx_data: |
93
|
|
|
# Basic data checks |
94
|
|
|
assert len(xlsx_data[0]) == 2, "wr_xlsx_sections EXPECTED: [(section, nts), ..." |
95
|
|
|
assert xlsx_data[0][1], \ |
96
|
|
|
"wr_xlsx_sections EXPECTED SECTION({S}) LIST TO HAVE DATA".format(S=xlsx_data[0][0]) |
97
|
|
|
# Open xlsx file and write title (optional) and headers. |
98
|
|
|
xlsxobj = WrXlsx(fout_xlsx, xlsx_data[0][1][0]._fields, **kws) |
99
|
|
|
worksheet = xlsxobj.add_worksheet() |
100
|
|
|
row_idx = xlsxobj.wr_title(worksheet) |
101
|
|
|
hdrs_wrote = False |
102
|
|
|
# Write data |
103
|
|
|
for section_text, data_nts in xlsx_data: |
104
|
|
|
num_items += len(data_nts) |
105
|
|
|
fmt = xlsxobj.wbfmtobj.get_fmt_section() |
106
|
|
|
row_idx = xlsxobj.wr_row_mergeall(worksheet, section_text, fmt, row_idx) |
107
|
|
|
if hdrs_wrote is False or len(data_nts) > prt_hdr_min: |
108
|
|
|
row_idx = xlsxobj.wr_hdrs(worksheet, row_idx) |
109
|
|
|
hdrs_wrote = True |
110
|
|
|
row_idx = xlsxobj.wr_data(data_nts, row_idx, worksheet) |
111
|
|
|
# Close xlsx file |
112
|
|
|
xlsxobj.workbook.close() |
113
|
|
|
sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT} ({S} sections)\n".format( |
114
|
|
|
N=num_items, ITEMS=items_str, FOUT=fout_xlsx, S=len(xlsx_data))) |
115
|
|
|
else: |
116
|
|
|
sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format( |
117
|
|
|
ITEMS=items_str, FOUT=fout_xlsx)) |
118
|
|
|
|
119
|
|
|
def wr_tsv(fout_tsv, tsv_data, **kws): |
120
|
|
|
"""Write a file of tab-separated table data""" |
121
|
|
|
items_str = "items" if "items" not in kws else kws["items"] |
122
|
|
|
if tsv_data: |
123
|
|
|
ifstrm = sys.stdout if fout_tsv is None else open(fout_tsv, 'w') |
124
|
|
|
num_items = prt_tsv(ifstrm, tsv_data, **kws) |
125
|
|
|
if fout_tsv is not None: |
126
|
|
|
sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT}\n".format( |
127
|
|
|
N=num_items, ITEMS=items_str, FOUT=fout_tsv)) |
128
|
|
|
ifstrm.close() |
129
|
|
|
else: |
130
|
|
|
sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format( |
131
|
|
|
ITEMS=items_str, FOUT=fout_tsv)) |
132
|
|
|
|
133
|
|
|
def prt_tsv(prt, data_nts, **kws): |
134
|
|
|
"""Print tab-separated table data""" |
135
|
|
|
# User-controlled printing options |
136
|
|
|
sep = "\t" if 'sep' not in kws else kws['sep'] |
137
|
|
|
flds_all = data_nts[0]._fields |
138
|
|
|
hdrs = get_hdrs(flds_all, **kws) |
139
|
|
|
fld2fmt = None if 'fld2fmt' not in kws else kws['fld2fmt'] |
140
|
|
|
if 'sort_by' in kws: |
141
|
|
|
data_nts = sorted(data_nts, key=kws['sort_by']) |
142
|
|
|
prt_if = kws['prt_if'] if 'prt_if' in kws else None |
143
|
|
|
prt_flds = kws['prt_flds'] if 'prt_flds' in kws else data_nts[0]._fields |
144
|
|
|
# Write header |
145
|
|
|
prt.write("# {}\n".format(sep.join(hdrs))) |
146
|
|
|
# Write data |
147
|
|
|
items = 0 |
148
|
|
|
for nt_data_row in data_nts: |
149
|
|
|
if prt_if is None or prt_if(nt_data_row): |
150
|
|
|
if fld2fmt is not None: |
151
|
|
|
row_fld_vals = [(fld, getattr(nt_data_row, fld)) for fld in prt_flds] |
152
|
|
|
row_vals = _fmt_fields(row_fld_vals, fld2fmt) |
153
|
|
|
else: |
154
|
|
|
row_vals = [getattr(nt_data_row, fld) for fld in prt_flds] |
155
|
|
|
prt.write("{}\n".format(sep.join(str(d) for d in row_vals))) |
156
|
|
|
items += 1 |
157
|
|
|
return items |
158
|
|
|
|
159
|
|
|
def _fmt_fields(fld_vals, fld2fmt): |
160
|
|
|
"""Optional user-formatting of specific fields, eg, pval: '{:8.2e}'.""" |
161
|
|
|
vals = [] |
162
|
|
|
for fld, val in fld_vals: |
163
|
|
|
if fld in fld2fmt: |
164
|
|
|
val = fld2fmt[fld].format(val) |
165
|
|
|
vals.append(val) |
166
|
|
|
return vals |
167
|
|
|
|
168
|
|
|
def _chk_flds_fmt(nt_fields, prtfmt): |
169
|
|
|
"""Check that all fields in the prtfmt have corresponding data in the namedtuple.""" |
170
|
|
|
fmtflds = get_fmtflds(prtfmt) |
171
|
|
|
missing_data = set(fmtflds).difference(set(nt_fields)) |
172
|
|
|
# All data needed for print is present, return. |
173
|
|
|
if not missing_data: |
174
|
|
|
return |
175
|
|
|
#raise Exception('MISSING DATA({M}).'.format(M=" ".join(missing_data))) |
176
|
|
|
msg = ['CANNOT PRINT USING: "{PF}"'.format(PF=prtfmt.rstrip())] |
177
|
|
|
for fld in fmtflds: |
178
|
|
|
errmrk = "" if fld in nt_fields else "ERROR-->" |
179
|
|
|
msg.append(" {ERR:8} {FLD}".format(ERR=errmrk, FLD=fld)) |
180
|
|
|
raise Exception('\n'.join(msg)) |
181
|
|
|
|
182
|
|
|
def get_fmtflds(prtfmt): |
183
|
|
|
"""Return the fieldnames in the formatter text.""" |
184
|
|
|
# Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n" |
185
|
|
|
return [f.split(':')[0] for f in re.findall(r'{(\S+)}', prtfmt)] |
186
|
|
|
|
187
|
|
|
def get_fmtfldsdict(prtfmt): |
188
|
|
|
"""Return the fieldnames in the formatter text.""" |
189
|
|
|
# Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n" |
190
|
|
|
return {v:v for v in get_fmtflds(prtfmt)} |
191
|
|
|
|
192
|
|
|
def _prt_txt_hdr(prt, prtfmt): |
193
|
|
|
"""Print header for text report.""" |
194
|
|
|
tblhdrs = get_fmtfldsdict(prtfmt) |
195
|
|
|
# If needed, reformat for format_string for header, which has strings, not floats. |
196
|
|
|
hdrfmt = re.sub(r':(\d+)\.\S+}', r':\1}', prtfmt) |
197
|
|
|
hdrfmt = re.sub(r':(0+)(\d+)}', r':\2}', hdrfmt) |
198
|
|
|
prt.write("#{}".format(hdrfmt.format(**tblhdrs))) |
199
|
|
|
|
200
|
|
|
def mk_fmtfld(nt_item, joinchr=" ", eol="\n"): |
201
|
|
|
"""Given a namedtuple, return a format_field string.""" |
202
|
|
|
fldstrs = [] |
203
|
|
|
# Default formats based on fieldname |
204
|
|
|
fld2fmt = { |
205
|
|
|
'hdrgo' : lambda f: "{{{FLD}:1,}}".format(FLD=f), |
206
|
|
|
'dcnt' : lambda f: "{{{FLD}:6,}}".format(FLD=f), |
207
|
|
|
'level' : lambda f: "L{{{FLD}:02,}}".format(FLD=f), |
208
|
|
|
'depth' : lambda f: "D{{{FLD}:02,}}".format(FLD=f), |
209
|
|
|
} |
210
|
|
|
for fld in nt_item._fields: |
211
|
|
|
if fld in fld2fmt: |
212
|
|
|
val = fld2fmt[fld](fld) |
213
|
|
|
else: |
214
|
|
|
val = "{{{FLD}}}".format(FLD=fld) |
215
|
|
|
fldstrs.append(val) |
216
|
|
|
return "{LINE}{EOL}".format(LINE=joinchr.join(fldstrs), EOL=eol) |
217
|
|
|
|
218
|
|
|
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved. |
219
|
|
|
|