prt_tsv() - Code Metrics - tanghaibao/goatools - Measure and Improve Code Quality continuously with Scrutinizer

prt_tsv() D
last analyzed 2018-07-02 19:48 UTC

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	13
dl	0
loc	25
rs	4.2
c	0
b	0
f	0

How to fix Complexity

"""Contains generic table-writing functions. Data is expected to be a list of namedtuples.

       kwargs (kws):
           'title': First row will contain user-provided title string
           'prt_if': Only print a line if user-specfied test returns True.
               prt_if is a lambda function with the data item's namedtuple as input.
               Example: prt_if = lambda nt: nt.p_uncorrected < 0.05
           'sort_by' : User-customizable sort when printing.
               sortby is a lambda function with the data item's namedtuple as input.
               It is the 'key' used in the sorted function.
               Example: sort_by = lambda nt: [nt.NS, -1*nt.depth]
           'hdrs' : A list of column headers to use when printing the table.
               default: The fields in the data's namedtuple is used as the column headers.
           'sep': Separator used when printing the tab-separated table format.
               default: sep = '\t'
           'prt_flds' : Used to print a subset of the fields in the namedtuple or
               to control the order of the print fields
           'fld2col_widths: A dictionary of column widths used when writing xlsx files.
           'fld2fmt': Used in tsv files and xlsx files for formatting specific fields

           For adding color or other formatting to a row based on value in a row:
               'ntfld_wbfmt': namedtuple field containing a value used as a key for a xlsx format
               'ntval2wbfmtdict': namedtuple value and corresponding xlsx format dict. Examples:
"""

__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved."
__author__ = "DV Klopfenstein"

import re
import sys
from goatools.wr_tbl_class import get_hdrs

def prt_txt(prt, data_nts, prtfmt=None, nt_fields=None, **kws):
    """Print list of namedtuples into a table using prtfmt."""
    lines = get_lines(data_nts, prtfmt, nt_fields, **kws)
    if lines:
        for line in lines:
            prt.write(line)
    else:
        sys.stdout.write("      0 items. NOT WRITING\n")

def get_lines(data_nts, prtfmt=None, nt_fields=None, **kws):
    """Print list of namedtuples into a table using prtfmt."""
    lines = []
    # optional keyword args: prt_if sort_by
    if prtfmt is None:
        prtfmt = mk_fmtfld(data_nts[0], kws.get('joinchr', ' '), kws.get('eol', '\n'))
    # if nt_fields arg is None, use fields from prtfmt string.
    if nt_fields is not None:
        _chk_flds_fmt(nt_fields, prtfmt)
    if 'sort_by' in kws:
        data_nts = sorted(data_nts, key=kws['sort_by'])
    prt_if = kws.get('prt_if', None)
    for data_nt in data_nts:
        if prt_if is None or prt_if(data_nt):
            lines.append(prtfmt.format(**data_nt._asdict()))
    return lines

def prt_nts(data_nts, prtfmt=None, prt=sys.stdout, nt_fields=None, **kws):
    """Print list of namedtuples into a table using prtfmt."""
    prt_txt(prt, data_nts, prtfmt, nt_fields, **kws)

def wr_xlsx(fout_xlsx, data_xlsx, **kws):
    """Write a spreadsheet into a xlsx file."""
    from goatools.wr_tbl_class import WrXlsx
    # optional keyword args: fld2col_widths hdrs prt_if sort_by fld2fmt prt_flds
    items_str = kws.get("items", "items") if "items" not in kws else kws["items"]
    if data_xlsx:
        # Open xlsx file
        xlsxobj = WrXlsx(fout_xlsx, data_xlsx[0]._fields, **kws)
        worksheet = xlsxobj.add_worksheet()
        # Write title (optional) and headers.
        row_idx = xlsxobj.wr_title(worksheet)
        row_idx = xlsxobj.wr_hdrs(worksheet, row_idx)
        row_idx_data0 = row_idx
        # Write data
        row_idx = xlsxobj.wr_data(data_xlsx, row_idx, worksheet)
        # Close xlsx file
        xlsxobj.workbook.close()
        sys.stdout.write("  {N:>5} {ITEMS} WROTE: {FOUT}\n".format(
            N=row_idx-row_idx_data0, ITEMS=items_str, FOUT=fout_xlsx))
    else:
        sys.stdout.write("      0 {ITEMS}. NOT WRITING {FOUT}\n".format(
            ITEMS=items_str, FOUT=fout_xlsx))

def wr_xlsx_sections(fout_xlsx, xlsx_data, **kws):
    """Write xlsx file containing section names followed by lines of namedtuple data."""
    from goatools.wr_tbl_class import WrXlsx
    items_str = "items" if "items" not in kws else kws["items"]
    prt_hdr_min = 10
    num_items = 0
    if xlsx_data:
        # Basic data checks
        assert len(xlsx_data[0]) == 2, "wr_xlsx_sections EXPECTED: [(section, nts), ..."
        assert xlsx_data[0][1], \
            "wr_xlsx_sections EXPECTED SECTION({S}) LIST TO HAVE DATA".format(S=xlsx_data[0][0])
        # Open xlsx file and write title (optional) and headers.
        xlsxobj = WrXlsx(fout_xlsx, xlsx_data[0][1][0]._fields, **kws)
        worksheet = xlsxobj.add_worksheet()
        row_idx = xlsxobj.wr_title(worksheet)
        hdrs_wrote = False
        # Write data
        for section_text, data_nts in xlsx_data:
            num_items += len(data_nts)
            fmt = xlsxobj.wbfmtobj.get_fmt_section()
            row_idx = xlsxobj.wr_row_mergeall(worksheet, section_text, fmt, row_idx)
            if hdrs_wrote is False or len(data_nts) > prt_hdr_min:
                row_idx = xlsxobj.wr_hdrs(worksheet, row_idx)
                hdrs_wrote = True
            row_idx = xlsxobj.wr_data(data_nts, row_idx, worksheet)
        # Close xlsx file
        xlsxobj.workbook.close()
        sys.stdout.write("  {N:>5} {ITEMS} WROTE: {FOUT} ({S} sections)\n".format(
            N=num_items, ITEMS=items_str, FOUT=fout_xlsx, S=len(xlsx_data)))
    else:
        sys.stdout.write("      0 {ITEMS}. NOT WRITING {FOUT}\n".format(
            ITEMS=items_str, FOUT=fout_xlsx))

def wr_tsv(fout_tsv, tsv_data, **kws):
    """Write a file of tab-separated table data"""
    items_str = "items" if "items" not in kws else kws["items"]
    if tsv_data:
        ifstrm = sys.stdout if fout_tsv is None else open(fout_tsv, 'w')
        num_items = prt_tsv(ifstrm, tsv_data, **kws)
        if fout_tsv is not None:
            sys.stdout.write("  {N:>5} {ITEMS} WROTE: {FOUT}\n".format(
                N=num_items, ITEMS=items_str, FOUT=fout_tsv))
            ifstrm.close()
    else:
        sys.stdout.write("      0 {ITEMS}. NOT WRITING {FOUT}\n".format(
            ITEMS=items_str, FOUT=fout_tsv))

def prt_tsv(prt, data_nts, **kws):
    """Print tab-separated table data"""
    # User-controlled printing options
    sep = "\t" if 'sep' not in kws else kws['sep']
    flds_all = data_nts[0]._fields
    hdrs = get_hdrs(flds_all, **kws)
    fld2fmt = None if 'fld2fmt' not in kws else kws['fld2fmt']
    if 'sort_by' in kws:
        data_nts = sorted(data_nts, key=kws['sort_by'])
    prt_if = kws['prt_if'] if 'prt_if' in kws else None
    prt_flds = kws['prt_flds'] if 'prt_flds' in kws else data_nts[0]._fields
    # Write header
    prt.write("# {}\n".format(sep.join(hdrs)))
    # Write data
    items = 0
    for nt_data_row in data_nts:
        if prt_if is None or prt_if(nt_data_row):
            if fld2fmt is not None:
                row_fld_vals = [(fld, getattr(nt_data_row, fld)) for fld in prt_flds]
                row_vals = _fmt_fields(row_fld_vals, fld2fmt)
            else:
                row_vals = [getattr(nt_data_row, fld) for fld in prt_flds]
            prt.write("{}\n".format(sep.join(str(d) for d in row_vals)))
            items += 1
    return items

def _fmt_fields(fld_vals, fld2fmt):
    """Optional user-formatting of specific fields, eg, pval: '{:8.2e}'."""
    vals = []
    for fld, val in fld_vals:
        if fld in fld2fmt:
            val = fld2fmt[fld].format(val)
        vals.append(val)
    return vals

def _chk_flds_fmt(nt_fields, prtfmt):
    """Check that all fields in the prtfmt have corresponding data in the namedtuple."""
    fmtflds = get_fmtflds(prtfmt)
    missing_data = set(fmtflds).difference(set(nt_fields))
    # All data needed for print is present, return.
    if not missing_data:
        return
    #raise Exception('MISSING DATA({M}).'.format(M=" ".join(missing_data)))
    msg = ['CANNOT PRINT USING: "{PF}"'.format(PF=prtfmt.rstrip())]
    for fld in fmtflds:
        errmrk = "" if fld in nt_fields else "ERROR-->"
        msg.append("  {ERR:8} {FLD}".format(ERR=errmrk, FLD=fld))
    raise Exception('\n'.join(msg))

def get_fmtflds(prtfmt):
    """Return the fieldnames in the formatter text."""
    # Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n"
    return [f.split(':')[0] for f in re.findall(r'{(\S+)}', prtfmt)]

def get_fmtfldsdict(prtfmt):
    """Return the fieldnames in the formatter text."""
    # Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n"
    return {v:v for v in get_fmtflds(prtfmt)}

def _prt_txt_hdr(prt, prtfmt):
    """Print header for text report."""
    tblhdrs = get_fmtfldsdict(prtfmt)
    # If needed, reformat for format_string for header, which has strings, not floats.
    hdrfmt = re.sub(r':(\d+)\.\S+}', r':\1}', prtfmt)
    hdrfmt = re.sub(r':(0+)(\d+)}', r':\2}', hdrfmt)
    prt.write("#{}".format(hdrfmt.format(**tblhdrs)))

def mk_fmtfld(nt_item, joinchr=" ", eol="\n"):
    """Given a namedtuple, return a format_field string."""
    fldstrs = []
    # Default formats based on fieldname
    fld2fmt = {
        'hdrgo' : lambda f: "{{{FLD}:1,}}".format(FLD=f),
        'dcnt' : lambda f: "{{{FLD}:6,}}".format(FLD=f),
        'level' : lambda f: "L{{{FLD}:02,}}".format(FLD=f),
        'depth' : lambda f: "D{{{FLD}:02,}}".format(FLD=f),
    }
    for fld in nt_item._fields:
        if fld in fld2fmt:
            val = fld2fmt[fld](fld)
        else:
            val = "{{{FLD}}}".format(FLD=fld)
        fldstrs.append(val)
    return "{LINE}{EOL}".format(LINE=joinchr.join(fldstrs), EOL=eol)

# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.


1			"""Contains generic table-writing functions. Data is expected to be a list of namedtuples.
2
3			kwargs (kws):
4			'title': First row will contain user-provided title string
5			'prt_if': Only print a line if user-specfied test returns True.
6			prt_if is a lambda function with the data item's namedtuple as input.
7			Example: prt_if = lambda nt: nt.p_uncorrected < 0.05
8			'sort_by' : User-customizable sort when printing.
9			sortby is a lambda function with the data item's namedtuple as input.
10			It is the 'key' used in the sorted function.
11			Example: sort_by = lambda nt: [nt.NS, -1*nt.depth]
12			'hdrs' : A list of column headers to use when printing the table.
13			default: The fields in the data's namedtuple is used as the column headers.
14			'sep': Separator used when printing the tab-separated table format.
15			default: sep = '\t'
16			'prt_flds' : Used to print a subset of the fields in the namedtuple or
17			to control the order of the print fields
18			'fld2col_widths: A dictionary of column widths used when writing xlsx files.
19			'fld2fmt': Used in tsv files and xlsx files for formatting specific fields
20
21			For adding color or other formatting to a row based on value in a row:
22			'ntfld_wbfmt': namedtuple field containing a value used as a key for a xlsx format
23			'ntval2wbfmtdict': namedtuple value and corresponding xlsx format dict. Examples:
24			"""
25
26			__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved."
27			__author__ = "DV Klopfenstein"
28
29			import re
30			import sys
31			from goatools.wr_tbl_class import get_hdrs
32
33			def prt_txt(prt, data_nts, prtfmt=None, nt_fields=None, **kws):
34			"""Print list of namedtuples into a table using prtfmt."""
35			lines = get_lines(data_nts, prtfmt, nt_fields, **kws)
36			if lines:
37			for line in lines:
38			prt.write(line)
39			else:
40			sys.stdout.write(" 0 items. NOT WRITING\n")
41
42			def get_lines(data_nts, prtfmt=None, nt_fields=None, **kws):
43			"""Print list of namedtuples into a table using prtfmt."""
44			lines = []
45			# optional keyword args: prt_if sort_by
46			if prtfmt is None:
47			prtfmt = mk_fmtfld(data_nts[0], kws.get('joinchr', ' '), kws.get('eol', '\n'))
48			# if nt_fields arg is None, use fields from prtfmt string.
49			if nt_fields is not None:
50			_chk_flds_fmt(nt_fields, prtfmt)
51			if 'sort_by' in kws:
52			data_nts = sorted(data_nts, key=kws['sort_by'])
53			prt_if = kws.get('prt_if', None)
54			for data_nt in data_nts:
55			if prt_if is None or prt_if(data_nt):
56			lines.append(prtfmt.format(**data_nt._asdict()))
57			return lines
58
59			def prt_nts(data_nts, prtfmt=None, prt=sys.stdout, nt_fields=None, **kws):
60			"""Print list of namedtuples into a table using prtfmt."""
61			prt_txt(prt, data_nts, prtfmt, nt_fields, **kws)
62
63			def wr_xlsx(fout_xlsx, data_xlsx, **kws):
64			"""Write a spreadsheet into a xlsx file."""
65			from goatools.wr_tbl_class import WrXlsx
66			# optional keyword args: fld2col_widths hdrs prt_if sort_by fld2fmt prt_flds
67			items_str = kws.get("items", "items") if "items" not in kws else kws["items"]
68			if data_xlsx:
69			# Open xlsx file
70			xlsxobj = WrXlsx(fout_xlsx, data_xlsx[0]._fields, **kws)
71			worksheet = xlsxobj.add_worksheet()
72			# Write title (optional) and headers.
73			row_idx = xlsxobj.wr_title(worksheet)
74			row_idx = xlsxobj.wr_hdrs(worksheet, row_idx)
75			row_idx_data0 = row_idx
76			# Write data
77			row_idx = xlsxobj.wr_data(data_xlsx, row_idx, worksheet)
78			# Close xlsx file
79			xlsxobj.workbook.close()
80			sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT}\n".format(
81			N=row_idx-row_idx_data0, ITEMS=items_str, FOUT=fout_xlsx))
82			else:
83			sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format(
84			ITEMS=items_str, FOUT=fout_xlsx))
85
86			def wr_xlsx_sections(fout_xlsx, xlsx_data, **kws):
87			"""Write xlsx file containing section names followed by lines of namedtuple data."""
88			from goatools.wr_tbl_class import WrXlsx
89			items_str = "items" if "items" not in kws else kws["items"]
90			prt_hdr_min = 10
91			num_items = 0
92			if xlsx_data:
93			# Basic data checks
94			assert len(xlsx_data[0]) == 2, "wr_xlsx_sections EXPECTED: [(section, nts), ..."
95			assert xlsx_data[0][1], \
96			"wr_xlsx_sections EXPECTED SECTION({S}) LIST TO HAVE DATA".format(S=xlsx_data[0][0])
97			# Open xlsx file and write title (optional) and headers.
98			xlsxobj = WrXlsx(fout_xlsx, xlsx_data[0][1][0]._fields, **kws)
99			worksheet = xlsxobj.add_worksheet()
100			row_idx = xlsxobj.wr_title(worksheet)
101			hdrs_wrote = False
102			# Write data
103			for section_text, data_nts in xlsx_data:
104			num_items += len(data_nts)
105			fmt = xlsxobj.wbfmtobj.get_fmt_section()
106			row_idx = xlsxobj.wr_row_mergeall(worksheet, section_text, fmt, row_idx)
107			if hdrs_wrote is False or len(data_nts) > prt_hdr_min:
108			row_idx = xlsxobj.wr_hdrs(worksheet, row_idx)
109			hdrs_wrote = True
110			row_idx = xlsxobj.wr_data(data_nts, row_idx, worksheet)
111			# Close xlsx file
112			xlsxobj.workbook.close()
113			sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT} ({S} sections)\n".format(
114			N=num_items, ITEMS=items_str, FOUT=fout_xlsx, S=len(xlsx_data)))
115			else:
116			sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format(
117			ITEMS=items_str, FOUT=fout_xlsx))
118
119			def wr_tsv(fout_tsv, tsv_data, **kws):
120			"""Write a file of tab-separated table data"""
121			items_str = "items" if "items" not in kws else kws["items"]
122			if tsv_data:
123			ifstrm = sys.stdout if fout_tsv is None else open(fout_tsv, 'w')
124			num_items = prt_tsv(ifstrm, tsv_data, **kws)
125			if fout_tsv is not None:
126			sys.stdout.write(" {N:>5} {ITEMS} WROTE: {FOUT}\n".format(
127			N=num_items, ITEMS=items_str, FOUT=fout_tsv))
128			ifstrm.close()
129			else:
130			sys.stdout.write(" 0 {ITEMS}. NOT WRITING {FOUT}\n".format(
131			ITEMS=items_str, FOUT=fout_tsv))
132
133			def prt_tsv(prt, data_nts, **kws):
134			"""Print tab-separated table data"""
135			# User-controlled printing options
136			sep = "\t" if 'sep' not in kws else kws['sep']
137			flds_all = data_nts[0]._fields
138			hdrs = get_hdrs(flds_all, **kws)
139			fld2fmt = None if 'fld2fmt' not in kws else kws['fld2fmt']
140			if 'sort_by' in kws:
141			data_nts = sorted(data_nts, key=kws['sort_by'])
142			prt_if = kws['prt_if'] if 'prt_if' in kws else None
143			prt_flds = kws['prt_flds'] if 'prt_flds' in kws else data_nts[0]._fields
144			# Write header
145			prt.write("# {}\n".format(sep.join(hdrs)))
146			# Write data
147			items = 0
148			for nt_data_row in data_nts:
149			if prt_if is None or prt_if(nt_data_row):
150			if fld2fmt is not None:
151			row_fld_vals = [(fld, getattr(nt_data_row, fld)) for fld in prt_flds]
152			row_vals = _fmt_fields(row_fld_vals, fld2fmt)
153			else:
154			row_vals = [getattr(nt_data_row, fld) for fld in prt_flds]
155			prt.write("{}\n".format(sep.join(str(d) for d in row_vals)))
156			items += 1
157			return items
158
159			def _fmt_fields(fld_vals, fld2fmt):
160			"""Optional user-formatting of specific fields, eg, pval: '{:8.2e}'."""
161			vals = []
162			for fld, val in fld_vals:
163			if fld in fld2fmt:
164			val = fld2fmt[fld].format(val)
165			vals.append(val)
166			return vals
167
168			def _chk_flds_fmt(nt_fields, prtfmt):
169			"""Check that all fields in the prtfmt have corresponding data in the namedtuple."""
170			fmtflds = get_fmtflds(prtfmt)
171			missing_data = set(fmtflds).difference(set(nt_fields))
172			# All data needed for print is present, return.
173			if not missing_data:
174			return
175			#raise Exception('MISSING DATA({M}).'.format(M=" ".join(missing_data)))
176			msg = ['CANNOT PRINT USING: "{PF}"'.format(PF=prtfmt.rstrip())]
177			for fld in fmtflds:
178			errmrk = "" if fld in nt_fields else "ERROR-->"
179			msg.append(" {ERR:8} {FLD}".format(ERR=errmrk, FLD=fld))
180			raise Exception('\n'.join(msg))
181
182			def get_fmtflds(prtfmt):
183			"""Return the fieldnames in the formatter text."""
184			# Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n"
185			return [f.split(':')[0] for f in re.findall(r'{(\S+)}', prtfmt)]
186
187			def get_fmtfldsdict(prtfmt):
188			"""Return the fieldnames in the formatter text."""
189			# Example prtfmt: "{NS} {study_cnt:2} {fdr_bh:5.3e} L{level:02} D{depth:02} {GO} {name}\n"
190			return {v:v for v in get_fmtflds(prtfmt)}
191
192			def _prt_txt_hdr(prt, prtfmt):
193			"""Print header for text report."""
194			tblhdrs = get_fmtfldsdict(prtfmt)
195			# If needed, reformat for format_string for header, which has strings, not floats.
196			hdrfmt = re.sub(r':(\d+)\.\S+}', r':\1}', prtfmt)
197			hdrfmt = re.sub(r':(0+)(\d+)}', r':\2}', hdrfmt)
198			prt.write("#{}".format(hdrfmt.format(**tblhdrs)))
199
200			def mk_fmtfld(nt_item, joinchr=" ", eol="\n"):
201			"""Given a namedtuple, return a format_field string."""
202			fldstrs = []
203			# Default formats based on fieldname
204			fld2fmt = {
205			'hdrgo' : lambda f: "{{{FLD}:1,}}".format(FLD=f),
206			'dcnt' : lambda f: "{{{FLD}:6,}}".format(FLD=f),
207			'level' : lambda f: "L{{{FLD}:02,}}".format(FLD=f),
208			'depth' : lambda f: "D{{{FLD}:02,}}".format(FLD=f),
209			}
210			for fld in nt_item._fields:
211			if fld in fld2fmt:
212			val = fld2fmt[fld](fld)
213			else:
214			val = "{{{FLD}}}".format(FLD=fld)
215			fldstrs.append(val)
216			return "{LINE}{EOL}".format(LINE=joinchr.join(fldstrs), EOL=eol)
217
218			# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
219

tanghaibao / goatools

prt_tsv() D last analyzed 2018-07-02 19:48 UTC

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like

prt_tsv() D
last analyzed 2018-07-02 19:48 UTC