|
1
|
|
|
"""Sorts GO IDs or user-provided sections containing GO IDs.""" |
|
2
|
|
|
|
|
3
|
|
|
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved." |
|
4
|
|
|
__author__ = "DV Klopfenstein" |
|
5
|
|
|
|
|
6
|
|
|
import sys |
|
7
|
|
|
import collections as cx |
|
8
|
|
|
from goatools.wr_tbl import prt_txt |
|
9
|
|
|
from goatools.grouper.wrxlsx import WrXlsxSortedGos |
|
10
|
|
|
from goatools.grouper.sorter_nts import SorterNts |
|
11
|
|
|
from goatools.grouper.sorter_gos import SorterGoIds |
|
12
|
|
|
from goatools.grouper.wr_sections import WrSections |
|
13
|
|
|
|
|
14
|
|
|
|
|
15
|
|
|
class Sorter(object): |
|
16
|
|
|
"""Sorts GO IDs or user-provided sections containing GO IDs. |
|
17
|
|
|
|
|
18
|
|
|
User GO IDs grouped under header GO IDs are not sorted by the Grouper class. |
|
19
|
|
|
Sort both user GO IDs in a group and header GO IDs across groups with these: |
|
20
|
|
|
|
|
21
|
|
|
S: use_sections |
|
22
|
|
|
s: section_sortby (T=True, F=False, S=lambda sort function) |
|
23
|
|
|
h: hdrgo_sortby Sorts hdr GO IDs |
|
24
|
|
|
u: sortby Sorts user GO IDs |
|
25
|
|
|
P: hdrgo_prt If True, Removes GO IDs used as GO group headers; Leaves list in |
|
26
|
|
|
sorted order, but removes header GO IDs which are not user GO IDs. |
|
27
|
|
|
|
|
28
|
|
|
rm_h hdr_sort usr_sort S s h u p |
|
29
|
|
|
--- ------------ ------------ -- -- -- -- -- |
|
30
|
|
|
case 1: NO hdrgo_sortby usrgo_sortby N T H U T |
|
31
|
|
|
case 2: YES hdrgo_sortby usrgo_sortby N T H U F |
|
32
|
|
|
case 3: NO section_order usrgo_sortby S F - U T |
|
33
|
|
|
case 4: YES section_order usrgo_sortby S F - U F |
|
34
|
|
|
case 5: YES |<--- section_sortby --->| S S - - - |
|
35
|
|
|
|
|
36
|
|
|
|print| |
|
37
|
|
|
sec usesec prthdr prtsec 1d 2d hdr usr |
|
38
|
|
|
---- ------ ------ ------ -- -- --- --- |
|
39
|
|
|
none - true - y . hdr usr A |
|
40
|
|
|
none - false - y . ... usr B |
|
41
|
|
|
|
|
42
|
|
|
yes False true - y . hdr usr A |
|
43
|
|
|
yes False false - y . ... usr B |
|
44
|
|
|
|
|
45
|
|
|
yes True True False . y hdr usr |
|
46
|
|
|
yes True False False . y ... usr |
|
47
|
|
|
""" |
|
48
|
|
|
|
|
49
|
|
|
kw_keys_nts = set(["hdrgo_prt", "section_prt", "top_n", "use_sections"]) |
|
50
|
|
|
kw_keys_prt = set(["prt", "prtfmt"]) |
|
51
|
|
|
|
|
52
|
|
|
def __init__(self, grprobj, **kws): |
|
53
|
|
|
# Keyword arguments: |
|
54
|
|
|
sortby = kws.get('sortby') |
|
55
|
|
|
hdrgo_sortby = kws.get('hdrgo_sortby') |
|
56
|
|
|
section_sortby = kws.get('section_sortby') |
|
57
|
|
|
# data members |
|
58
|
|
|
self.grprobj = grprobj |
|
59
|
|
|
self.sortobj = SorterGoIds(grprobj, sortby, hdrgo_sortby) |
|
60
|
|
|
self.sectobj = self._init_sectobj(section_sortby) # SorterNts |
|
61
|
|
|
|
|
62
|
|
|
def wr_xlsx_nts(self, fout_xlsx, desc2nts, **kws): |
|
63
|
|
|
"""Write grouped and sorted user GO IDs into an xlsx file.""" |
|
64
|
|
|
# kws (content): hdrgo_prt section_prt use_sections |
|
65
|
|
|
# kws (prt fmt): title fld2col_widths ... |
|
66
|
|
|
xlsxobj = WrXlsxSortedGos("sortname", self) |
|
67
|
|
|
xlsxobj.wr_xlsx_nts(fout_xlsx, desc2nts, **kws) |
|
68
|
|
|
|
|
69
|
|
|
def prt_gos(self, prt=sys.stdout, **kws_usr): |
|
70
|
|
|
"""Sort user GO ids, grouped under broader GO terms or sections. Print to screen.""" |
|
71
|
|
|
# Keyword arguments (control content): hdrgo_prt section_prt use_sections |
|
72
|
|
|
# desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj) |
|
73
|
|
|
desc2nts = self.get_desc2nts(**kws_usr) |
|
74
|
|
|
# Keyword arguments (control print format): prt prtfmt |
|
75
|
|
|
kws_prt = {k:v for k, v in kws_usr.items() if k in self.kw_keys_prt} |
|
76
|
|
|
self.prt_nts(desc2nts, prt, **kws_prt) |
|
77
|
|
|
return desc2nts |
|
78
|
|
|
|
|
79
|
|
|
def prt_nts(self, desc2nts, prt=sys.stdout, prtfmt=None): |
|
80
|
|
|
"""Print grouped and sorted GO IDs.""" |
|
81
|
|
|
# Set print format string |
|
82
|
|
|
if prtfmt is None: |
|
83
|
|
|
prtfmt = "{{hdr1usr01:2}} {FMT}\n".format(FMT=self.grprobj.gosubdag.prt_attr['fmt']) |
|
84
|
|
|
# 1-D: data to print is a flat list of namedtuples |
|
85
|
|
|
if 'flat' in desc2nts: |
|
86
|
|
|
prt_txt(prt, desc2nts['flat'], prtfmt=prtfmt) |
|
87
|
|
|
# 2-D: data to print is a list of [(section, nts), ... |
|
88
|
|
|
else: |
|
89
|
|
|
WrSections.prt_sections(prt, desc2nts['sections'], prtfmt) |
|
90
|
|
|
|
|
91
|
|
|
def get_desc2nts(self, **kws_usr): |
|
92
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
|
93
|
|
|
# desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj) |
|
94
|
|
|
kws_nts = {k:v for k, v in kws_usr.items() if k in self.kw_keys_nts} |
|
95
|
|
|
return self.get_desc2nts_fnc(**kws_nts) |
|
96
|
|
|
|
|
97
|
|
|
def get_desc2nts_fnc(self, hdrgo_prt=True, section_prt=None, |
|
98
|
|
|
top_n=None, use_sections=True): |
|
99
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
|
100
|
|
|
# RETURN: flat list of namedtuples |
|
101
|
|
|
nts_flat = self.get_nts_flat(hdrgo_prt, use_sections) |
|
102
|
|
|
if nts_flat is not None: |
|
103
|
|
|
if not use_sections: |
|
104
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt} |
|
105
|
|
|
else: |
|
106
|
|
|
return {'sortobj':self, |
|
107
|
|
|
'sections' : [(self.grprobj.hdrobj.secdflt, nts_flat)], |
|
108
|
|
|
'hdrgo_prt':hdrgo_prt} |
|
109
|
|
|
|
|
110
|
|
|
# RETURN: 2-D list [(section_name0, namedtuples0), (section_name1, namedtuples1), ... |
|
111
|
|
|
# kws: top_n hdrgo_prt section_sortby |
|
112
|
|
|
# Over-ride hdrgo_prt depending on top_n value |
|
113
|
|
|
assert top_n is not True and top_n is not False, \ |
|
114
|
|
|
"top_n({T}) MUST BE None OR AN int".format(T=top_n) |
|
115
|
|
|
assert self.sectobj is not None, "SECTIONS OBJECT DOES NOT EXIST" |
|
116
|
|
|
sec_sb = self.sectobj.section_sortby |
|
117
|
|
|
# Override hdrgo_prt, if sorting by sections or returning a subset of GO IDs in section |
|
118
|
|
|
hdrgo_prt_curr = hdrgo_prt is True |
|
119
|
|
|
if sec_sb is True or (sec_sb is not False and sec_sb is not None) or top_n is not None: |
|
120
|
|
|
hdrgo_prt_curr = False |
|
121
|
|
|
nts_section = self.sectobj.get_sorted_nts_keep_section(hdrgo_prt_curr) |
|
122
|
|
|
# Take top_n in each section, if requested |
|
123
|
|
|
if top_n is not None: |
|
124
|
|
|
nts_section = [(s, nts[:top_n]) for s, nts in nts_section] |
|
125
|
|
|
if section_prt is None: |
|
126
|
|
|
nts_flat = self.get_sections_flattened(nts_section) |
|
127
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr} |
|
128
|
|
|
# Send flat list of sections nts back, as requested |
|
129
|
|
|
if section_prt is False: |
|
130
|
|
|
nts_flat = self.get_sections_flattened(nts_section) |
|
131
|
|
|
return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr} |
|
132
|
|
|
# Send 2-D sections nts back |
|
133
|
|
|
return {'sortobj':self, 'sections' : nts_section, 'hdrgo_prt':hdrgo_prt_curr} |
|
134
|
|
|
|
|
135
|
|
|
@staticmethod |
|
136
|
|
|
def get_sections_flattened(section_nts): |
|
137
|
|
|
"""Convert [(section0, nts0), (section1, nts1), ... to [*nts0, *nts1, ...""" |
|
138
|
|
|
nt_flds = list(section_nts[0][1][0]._fields) |
|
139
|
|
|
# Flatten section_nts 2-D list |
|
140
|
|
|
if 'section' in nt_flds: |
|
141
|
|
|
return [nt for _, nts in section_nts for nt in nts] |
|
142
|
|
|
# Flatten section_nts 2-D list, and add sections to each namedtuple |
|
143
|
|
|
nt_flds.append('section') |
|
144
|
|
|
nts_flat = [] |
|
145
|
|
|
ntobj = cx.namedtuple("Nt", " ".join(nt_flds)) |
|
146
|
|
|
for section_name, nts in section_nts: |
|
147
|
|
|
for nt_go in nts: |
|
148
|
|
|
vals = list(nt_go) + [section_name] |
|
149
|
|
|
nts_flat.append(ntobj._make(vals)) |
|
150
|
|
|
return nts_flat |
|
151
|
|
|
|
|
152
|
|
|
|
|
153
|
|
|
def get_nts_flat(self, hdrgo_prt=True, use_sections=True): |
|
154
|
|
|
"""Return a flat list of sorted nts.""" |
|
155
|
|
|
# Either there are no sections OR we are not using them |
|
156
|
|
|
if self.sectobj is None or not use_sections: |
|
157
|
|
|
return self.sortobj.get_nts_sorted( |
|
158
|
|
|
hdrgo_prt, |
|
159
|
|
|
hdrgos=self.grprobj.get_hdrgos(), |
|
160
|
|
|
hdrgo_sort=True) |
|
161
|
|
|
if not use_sections: |
|
162
|
|
|
return self.sectobj.get_sorted_nts_omit_section(hdrgo_prt, hdrgo_sort=True) |
|
163
|
|
|
return None |
|
164
|
|
|
|
|
165
|
|
|
@staticmethod |
|
166
|
|
|
def get_fields(desc2nts): |
|
167
|
|
|
"""Return grouped, sorted namedtuples in either format: flat, sections.""" |
|
168
|
|
|
if 'flat' in desc2nts: |
|
169
|
|
|
nts_flat = desc2nts.get('flat') |
|
170
|
|
|
if nts_flat: |
|
171
|
|
|
return nts_flat[0]._fields |
|
172
|
|
|
if 'sections' in desc2nts: |
|
173
|
|
|
nts_sections = desc2nts.get('sections') |
|
174
|
|
|
if nts_sections: |
|
175
|
|
|
return nts_sections[0][1][0]._fields |
|
176
|
|
|
|
|
177
|
|
|
def _init_sectobj(self, section_sortby): |
|
178
|
|
|
"""Return SorterNts""" |
|
179
|
|
|
if not self.sortobj.grprobj.hdrobj.sections: |
|
180
|
|
|
return None |
|
181
|
|
|
return SorterNts(self.sortobj, section_sortby) |
|
182
|
|
|
|
|
183
|
|
|
|
|
184
|
|
|
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved. |
|
185
|
|
|
|