1
|
|
|
"""Sorts GO IDs or user-provided sections containing GO IDs.""" |
2
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved." |
4
|
|
|
__author__ = "DV Klopfenstein" |
5
|
|
|
|
6
|
|
|
import sys |
7
|
|
|
import collections as cx |
8
|
|
|
from goatools.wr_tbl import prt_txt |
9
|
|
|
from goatools.grouper.wrxlsx import WrXlsxSortedGos |
10
|
|
|
from goatools.grouper.sorter_nts import SorterNts |
11
|
|
|
from goatools.grouper.sorter_gos import SorterGoIds |
12
|
|
|
from goatools.grouper.wr_sections import WrSections |
13
|
|
|
|
14
|
|
|
|
15
|
|
|
class Sorter(object): |
16
|
|
|
"""Sorts GO IDs or user-provided sections containing GO IDs. |
17
|
|
|
|
18
|
|
|
User GO IDs grouped under header GO IDs are not sorted by the Grouper class. |
19
|
|
|
Sort both user GO IDs in a group and header GO IDs across groups with these: |
20
|
|
|
|
21
|
|
|
S: use_sections |
22
|
|
|
s: section_sortby (T=True, F=False, S=lambda sort function) |
23
|
|
|
h: hdrgo_sortby Sorts hdr GO IDs |
24
|
|
|
u: sortby Sorts user GO IDs |
25
|
|
|
P: hdrgo_prt If True, Removes GO IDs used as GO group headers; Leaves list in |
26
|
|
|
sorted order, but removes header GO IDs which are not user GO IDs. |
27
|
|
|
|
28
|
|
|
rm_h hdr_sort usr_sort S s h u p |
29
|
|
|
--- ------------ ------------ -- -- -- -- -- |
30
|
|
|
case 1: NO hdrgo_sortby usrgo_sortby N T H U T |
31
|
|
|
case 2: YES hdrgo_sortby usrgo_sortby N T H U F |
32
|
|
|
case 3: NO section_order usrgo_sortby S F - U T |
33
|
|
|
case 4: YES section_order usrgo_sortby S F - U F |
34
|
|
|
case 5: YES |<--- section_sortby --->| S S - - - |
35
|
|
|
|
36
|
|
|
|print| |
37
|
|
|
sec usesec prthdr prtsec 1d 2d hdr usr |
38
|
|
|
---- ------ ------ ------ -- -- --- --- |
39
|
|
|
none - true - y . hdr usr A |
40
|
|
|
none - false - y . ... usr B |
41
|
|
|
|
42
|
|
|
yes False true - y . hdr usr A |
43
|
|
|
yes False false - y . ... usr B |
44
|
|
|
|
45
|
|
|
yes True True False . y hdr usr |
46
|
|
|
yes True False False . y ... usr |
47
|
|
|
""" |
48
|
|
|
|
49
|
|
|
kw_keys_nts = set(["hdrgo_prt", "section_prt", "top_n", "use_sections"]) |
50
|
|
|
kw_keys_prt = set(["prt", "prtfmt"]) |
51
|
|
|
|
52
|
|
|
def __init__(self, grprobj, **kws): |
53
|
|
|
# Keyword arguments: |
54
|
|
|
sortby = kws.get('sortby') |
55
|
|
|
hdrgo_sortby = kws.get('hdrgo_sortby') |
56
|
|
|
section_sortby = kws.get('section_sortby') |
57
|
|
|
# data members |
58
|
|
|
self.grprobj = grprobj |
59
|
|
|
self.sortobj = SorterGoIds(grprobj, sortby, hdrgo_sortby) |
60
|
|
|
self.sectobj = self._init_sectobj(section_sortby) # SorterNts |
61
|
|
|
|
62
|
|
|
def wr_xlsx_nts(self, fout_xlsx, desc2nts, **kws): |
63
|
|
|
"""Write grouped and sorted user GO IDs into an xlsx file.""" |
64
|
|
|
# kws (content): hdrgo_prt section_prt use_sections |
65
|
|
|
# kws (prt fmt): title fld2col_widths ... |
66
|
|
|
xlsxobj = WrXlsxSortedGos("sortname", self) |
67
|
|
|
xlsxobj.wr_xlsx_nts(fout_xlsx, desc2nts, **kws) |
68
|
|
|
|
69
|
|
|
def prt_gos(self, prt=sys.stdout, **kws_usr): |
70
|
|
|
"""Sort user GO ids, grouped under broader GO terms or sections. Print to screen.""" |
71
|
|
|
# Keyword arguments (control content): hdrgo_prt section_prt use_sections |
72
|
|
|
# desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj) |
73
|
|
|
desc2nts = self.get_desc2nts(**kws_usr) |
74
|
|
|
# Keyword arguments (control print format): prt prtfmt |
75
|
|
|
kws_prt = {k:v for k, v in kws_usr.items() if k in self.kw_keys_prt} |
76
|
|
|
self.prt_nts(desc2nts, prt, **kws_prt) |
77
|
|
|
return desc2nts |
78
|
|
|
|
79
|
|
|
def prt_nts(self, desc2nts, prt=sys.stdout, prtfmt=None): |
80
|
|
|
"""Print grouped and sorted GO IDs.""" |
81
|
|
|
# Set print format string |
82
|
|
|
if prtfmt is None: |
83
|
|
|
prtfmt = "{{hdr1usr01:2}} {FMT}\n".format(FMT=self.grprobj.gosubdag.prt_attr['fmt']) |
84
|
|
|
# 1-D: data to print is a flat list of namedtuples |
85
|
|
|
if 'flat' in desc2nts: |
86
|
|
|
prt_txt(prt, desc2nts['flat'], prtfmt=prtfmt) |
87
|
|
|
# 2-D: data to print is a list of [(section, nts), ... |
88
|
|
|
else: |
89
|
|
|
WrSections.prt_sections(prt, desc2nts['sections'], prtfmt) |
90
|
|
|
|
91
|
|
|
def get_desc2nts(self, **kws_usr): |
92
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
93
|
|
|
# desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj) |
94
|
|
|
kws_nts = {k:v for k, v in kws_usr.items() if k in self.kw_keys_nts} |
95
|
|
|
return self.get_desc2nts_fnc(**kws_nts) |
96
|
|
|
|
97
|
|
|
def get_desc2nts_fnc(self, hdrgo_prt=True, section_prt=None, |
98
|
|
|
top_n=None, use_sections=True): |
99
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
100
|
|
|
# RETURN: flat list of namedtuples |
101
|
|
|
nts_flat = self.get_nts_flat(hdrgo_prt, use_sections) |
102
|
|
|
if nts_flat is not None: |
103
|
|
|
if not use_sections: |
104
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt} |
105
|
|
|
else: |
106
|
|
|
return {'sortobj':self, |
107
|
|
|
'sections' : [(self.grprobj.hdrobj.secdflt, nts_flat)], |
108
|
|
|
'hdrgo_prt':hdrgo_prt} |
109
|
|
|
|
110
|
|
|
# RETURN: 2-D list [(section_name0, namedtuples0), (section_name1, namedtuples1), ... |
111
|
|
|
# kws: top_n hdrgo_prt section_sortby |
112
|
|
|
# Over-ride hdrgo_prt depending on top_n value |
113
|
|
|
assert top_n is not True and top_n is not False, \ |
114
|
|
|
"top_n({T}) MUST BE None OR AN int".format(T=top_n) |
115
|
|
|
assert self.sectobj is not None, "SECTIONS OBJECT DOES NOT EXIST" |
116
|
|
|
sec_sb = self.sectobj.section_sortby |
117
|
|
|
# Override hdrgo_prt, if sorting by sections or returning a subset of GO IDs in section |
118
|
|
|
hdrgo_prt_curr = hdrgo_prt is True |
119
|
|
|
if sec_sb is True or (sec_sb is not False and sec_sb is not None) or top_n is not None: |
120
|
|
|
hdrgo_prt_curr = False |
121
|
|
|
nts_section = self.sectobj.get_sorted_nts_keep_section(hdrgo_prt_curr) |
122
|
|
|
# Take top_n in each section, if requested |
123
|
|
|
if top_n is not None: |
124
|
|
|
nts_section = [(s, nts[:top_n]) for s, nts in nts_section] |
125
|
|
|
if section_prt is None: |
126
|
|
|
nts_flat = self.get_sections_flattened(nts_section) |
127
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr} |
128
|
|
|
# Send flat list of sections nts back, as requested |
129
|
|
|
if section_prt is False: |
130
|
|
|
nts_flat = self.get_sections_flattened(nts_section) |
131
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr} |
132
|
|
|
# Send 2-D sections nts back |
133
|
|
|
return {'sortobj':self, 'sections' : nts_section, 'hdrgo_prt':hdrgo_prt_curr} |
134
|
|
|
|
135
|
|
|
@staticmethod |
136
|
|
|
def get_sections_flattened(section_nts): |
137
|
|
|
"""Convert [(section0, nts0), (section1, nts1), ... to [*nts0, *nts1, ...""" |
138
|
|
|
nt_flds = list(section_nts[0][1][0]._fields) |
139
|
|
|
# Flatten section_nts 2-D list |
140
|
|
|
if 'section' in nt_flds: |
141
|
|
|
return [nt for _, nts in section_nts for nt in nts] |
142
|
|
|
# Flatten section_nts 2-D list, and add sections to each namedtuple |
143
|
|
|
nt_flds.append('section') |
144
|
|
|
nts_flat = [] |
145
|
|
|
ntobj = cx.namedtuple("Nt", " ".join(nt_flds)) |
146
|
|
|
for section_name, nts in section_nts: |
147
|
|
|
for nt_go in nts: |
148
|
|
|
vals = list(nt_go) + [section_name] |
149
|
|
|
nts_flat.append(ntobj._make(vals)) |
150
|
|
|
return nts_flat |
151
|
|
|
|
152
|
|
|
|
153
|
|
|
def get_nts_flat(self, hdrgo_prt=True, use_sections=True): |
154
|
|
|
"""Return a flat list of sorted nts.""" |
155
|
|
|
# Either there are no sections OR we are not using them |
156
|
|
|
if self.sectobj is None or not use_sections: |
157
|
|
|
return self.sortobj.get_nts_sorted( |
158
|
|
|
hdrgo_prt, |
159
|
|
|
hdrgos=self.grprobj.get_hdrgos(), |
160
|
|
|
hdrgo_sort=True) |
161
|
|
|
if not use_sections: |
162
|
|
|
return self.sectobj.get_sorted_nts_omit_section(hdrgo_prt, hdrgo_sort=True) |
163
|
|
|
return None |
164
|
|
|
|
165
|
|
|
@staticmethod |
166
|
|
|
def get_fields(desc2nts): |
167
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
168
|
|
|
if 'flat' in desc2nts: |
169
|
|
|
nts_flat = desc2nts.get('flat') |
170
|
|
|
if nts_flat: |
171
|
|
|
return nts_flat[0]._fields |
172
|
|
|
if 'sections' in desc2nts: |
173
|
|
|
nts_sections = desc2nts.get('sections') |
174
|
|
|
if nts_sections: |
175
|
|
|
return nts_sections[0][1][0]._fields |
176
|
|
|
|
177
|
|
|
def _init_sectobj(self, section_sortby): |
178
|
|
|
"""Return SorterNts""" |
179
|
|
|
if not self.sortobj.grprobj.hdrobj.sections: |
180
|
|
|
return None |
181
|
|
|
return SorterNts(self.sortobj, section_sortby) |
182
|
|
|
|
183
|
|
|
|
184
|
|
|
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved. |
185
|
|
|
|