Sorter.get_desc2nts()   A
last analyzed

Complexity

Conditions 3

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 5
rs 10
1
"""Sorts GO IDs or user-provided sections containing GO IDs."""
2
3
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import sys
7
import collections as cx
8
from goatools.wr_tbl import prt_txt
9
from goatools.grouper.wrxlsx import WrXlsxSortedGos
10
from goatools.grouper.sorter_nts import SorterNts
11
from goatools.grouper.sorter_gos import SorterGoIds
12
from goatools.grouper.wr_sections import WrSections
13
14
15
class Sorter(object):
16
    """Sorts GO IDs or user-provided sections containing GO IDs.
17
18
       User GO IDs grouped under header GO IDs are not sorted by the Grouper class.
19
       Sort both user GO IDs in a group and header GO IDs across groups with these:
20
21
       S: use_sections
22
       s: section_sortby (T=True, F=False, S=lambda sort function)
23
       h: hdrgo_sortby Sorts hdr GO IDs
24
       u: sortby       Sorts user GO IDs
25
       P: hdrgo_prt    If True, Removes GO IDs used as GO group headers; Leaves list in
26
                       sorted order, but removes header GO IDs which are not user GO IDs.
27
28
               rm_h     hdr_sort      usr_sort       S  s  h  u  p
29
                ---     ------------  ------------  -- -- -- -- --
30
       case 1:   NO     hdrgo_sortby  usrgo_sortby   N  T  H  U  T
31
       case 2:  YES     hdrgo_sortby  usrgo_sortby   N  T  H  U  F
32
       case 3:   NO     section_order usrgo_sortby   S  F  -  U  T
33
       case 4:  YES     section_order usrgo_sortby   S  F  -  U  F
34
       case 5:  YES     |<--- section_sortby --->|   S  S  -  -  -
35
36
                                        |print|
37
         sec usesec prthdr prtsec 1d 2d hdr usr
38
        ---- ------ ------ ------ -- -- --- ---
39
        none      -   true      -  y  . hdr usr A
40
        none      -  false      -  y  . ... usr B
41
42
         yes  False   true      -  y  . hdr usr A
43
         yes  False  false      -  y  . ... usr B
44
45
         yes   True   True  False  .  y hdr usr
46
         yes   True  False  False  .  y ... usr
47
    """
48
49
    kw_keys_nts = set(["hdrgo_prt", "section_prt", "top_n", "use_sections"])
50
    kw_keys_prt = set(["prt", "prtfmt"])
51
52
    def __init__(self, grprobj, **kws):
53
        # Keyword arguments:
54
        sortby = kws.get('sortby')
55
        hdrgo_sortby = kws.get('hdrgo_sortby')
56
        section_sortby = kws.get('section_sortby')
57
        # data members
58
        self.grprobj = grprobj
59
        self.sortobj = SorterGoIds(grprobj, sortby, hdrgo_sortby)
60
        self.sectobj = self._init_sectobj(section_sortby)  # SorterNts
61
62
    def wr_xlsx_nts(self, fout_xlsx, desc2nts, **kws):
63
        """Write grouped and sorted user GO IDs into an xlsx file."""
64
        # kws (content): hdrgo_prt section_prt use_sections
65
        # kws (prt fmt): title fld2col_widths ...
66
        xlsxobj = WrXlsxSortedGos("sortname", self)
67
        xlsxobj.wr_xlsx_nts(fout_xlsx, desc2nts, **kws)
68
69
    def prt_gos(self, prt=sys.stdout, **kws_usr):
70
        """Sort user GO ids, grouped under broader GO terms or sections. Print to screen."""
71
        # Keyword arguments (control content): hdrgo_prt section_prt use_sections
72
        # desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj)
73
        desc2nts = self.get_desc2nts(**kws_usr)
74
        # Keyword arguments (control print format): prt prtfmt
75
        kws_prt = {k:v for k, v in kws_usr.items() if k in self.kw_keys_prt}
76
        self.prt_nts(desc2nts, prt, **kws_prt)
77
        return desc2nts
78
79
    def prt_nts(self, desc2nts, prt=sys.stdout, prtfmt=None):
80
        """Print grouped and sorted GO IDs."""
81
        # Set print format string
82
        if prtfmt is None:
83
            prtfmt = "{{hdr1usr01:2}} {FMT}\n".format(FMT=self.grprobj.gosubdag.prt_attr['fmt'])
84
        # 1-D: data to print is a flat list of namedtuples
85
        if 'flat' in desc2nts:
86
            prt_txt(prt, desc2nts['flat'], prtfmt=prtfmt)
87
        # 2-D: data to print is a list of [(section, nts), ...
88
        else:
89
            WrSections.prt_sections(prt, desc2nts['sections'], prtfmt)
90
91
    def get_desc2nts(self, **kws_usr):
92
        """Return grouped, sorted namedtuples in either format: flat, sections."""
93
        # desc2nts contains: (sections hdrgo_prt sortobj) or (flat hdrgo_prt sortobj)
94
        kws_nts = {k:v for k, v in kws_usr.items() if k in self.kw_keys_nts}
95
        return self.get_desc2nts_fnc(**kws_nts)
96
97
    def get_desc2nts_fnc(self, hdrgo_prt=True, section_prt=None,
98
                         top_n=None, use_sections=True):
99
        """Return grouped, sorted namedtuples in either format: flat, sections."""
100
        # RETURN: flat list of namedtuples
101
        nts_flat = self.get_nts_flat(hdrgo_prt, use_sections)
102
        if nts_flat is not None:
103
            if not use_sections:
104
                return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt}
105
            else:
106
                return {'sortobj':self,
107
                        'sections' : [(self.grprobj.hdrobj.secdflt, nts_flat)],
108
                        'hdrgo_prt':hdrgo_prt}
109
110
        # RETURN: 2-D list [(section_name0, namedtuples0), (section_name1, namedtuples1), ...
111
        #     kws: top_n hdrgo_prt section_sortby
112
        # Over-ride hdrgo_prt depending on top_n value
113
        assert top_n is not True and top_n is not False, \
114
            "top_n({T}) MUST BE None OR AN int".format(T=top_n)
115
        assert self.sectobj is not None, "SECTIONS OBJECT DOES NOT EXIST"
116
        sec_sb = self.sectobj.section_sortby
117
        # Override hdrgo_prt, if sorting by sections or returning a subset of GO IDs in section
118
        hdrgo_prt_curr = hdrgo_prt is True
119
        if sec_sb is True or (sec_sb is not False and sec_sb is not None) or top_n is not None:
120
            hdrgo_prt_curr = False
121
        nts_section = self.sectobj.get_sorted_nts_keep_section(hdrgo_prt_curr)
122
        # Take top_n in each section, if requested
123
        if top_n is not None:
124
            nts_section = [(s, nts[:top_n]) for s, nts in nts_section]
125
            if section_prt is None:
126
                nts_flat = self.get_sections_flattened(nts_section)
127
                return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr}
128
        # Send flat list of sections nts back, as requested
129
        if section_prt is False:
130
            nts_flat = self.get_sections_flattened(nts_section)
131
            return {'sortobj':self, 'flat' : nts_flat, 'hdrgo_prt':hdrgo_prt_curr}
132
        # Send 2-D sections nts back
133
        return {'sortobj':self, 'sections' : nts_section, 'hdrgo_prt':hdrgo_prt_curr}
134
135
    @staticmethod
136
    def get_sections_flattened(section_nts):
137
        """Convert [(section0, nts0), (section1, nts1), ... to [*nts0, *nts1, ..."""
138
        nt_flds = list(section_nts[0][1][0]._fields)
139
        # Flatten section_nts 2-D list
140
        if 'section' in nt_flds:
141
            return [nt for _, nts in section_nts for nt in nts]
142
        # Flatten section_nts 2-D list, and add sections to each namedtuple
143
        nt_flds.append('section')
144
        nts_flat = []
145
        ntobj = cx.namedtuple("Nt", " ".join(nt_flds))
146
        for section_name, nts in section_nts:
147
            for nt_go in nts:
148
                vals = list(nt_go) + [section_name]
149
                nts_flat.append(ntobj._make(vals))
150
        return nts_flat
151
152
153
    def get_nts_flat(self, hdrgo_prt=True, use_sections=True):
154
        """Return a flat list of sorted nts."""
155
        # Either there are no sections OR we are not using them
156
        if self.sectobj is None or not use_sections:
157
            return self.sortobj.get_nts_sorted(
158
                hdrgo_prt,
159
                hdrgos=self.grprobj.get_hdrgos(),
160
                hdrgo_sort=True)
161
        if not use_sections:
162
            return self.sectobj.get_sorted_nts_omit_section(hdrgo_prt, hdrgo_sort=True)
163
        return None
164
165
    @staticmethod
166
    def get_fields(desc2nts):
167
        """Return grouped, sorted namedtuples in either format: flat, sections."""
168
        if 'flat' in desc2nts:
169
            nts_flat = desc2nts.get('flat')
170
            if nts_flat:
171
                return nts_flat[0]._fields
172
        if 'sections' in desc2nts:
173
            nts_sections = desc2nts.get('sections')
174
            if nts_sections:
175
                return nts_sections[0][1][0]._fields
176
177
    def _init_sectobj(self, section_sortby):
178
        """Return SorterNts"""
179
        if not self.sortobj.grprobj.hdrobj.sections:
180
            return None
181
        return SorterNts(self.sortobj, section_sortby)
182
183
184
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.
185