Completed
Push — master ( 0f596f...821888 )
by
unknown
01:11
created

GoDepth1LettersWr   A

Complexity

Total Complexity 20

Size/Duplication

Total Lines 111
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 111
rs 10
wmc 20

7 Methods

Rating   Name   Duplication   Size   Complexity  
A wr_txt() 0 15 3
A prt_txt() 0 9 2
A __init__() 0 3 1
B get_d1nts() 0 19 5
A _init_ns2nt() 0 8 2
B wr_tex() 0 37 4
A wr_xlsx() 0 8 3
1
"""Used to operate on a sub-graph of a larger GO DAG."""
2
3
__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import sys
7
import re
8
import collections as cx
9
from goatools.gosubdag.gosubdag import GoSubDag
10
from goatools.wr_tbl import wr_xlsx as wr_xlsx_tbl
11
from goatools.wr_tbl import wr_xlsx_sections as wr_xlsx_sections_tbl
12
from goatools.wr_tbl import get_lines
13
from goatools.wr_tbl import prt_txt
14
15
class GoSubDagWr(object):
16
    """Contains a sub-graph of the original obo from geneontology.org."""
17
18
    fld2col_widths = {
19
        'NS' : 3,
20
        'dcnt' : 6,
21
        'level' : 4,
22
        'depth' : 4,
23
        'GO' : 12,
24
        'D1' : 6,
25
        'GO_name' : 45}
26
27
    def __init__(self, go2obj):
28
        self.go2obj = go2obj
29
30
    def wr_xlsx(self, fout_xlsx, goids, sortby=None, **kws_usr):
31
        """Write goids into a table."""
32
        nts = GoSubDag(goids, self.go2obj).get_nts(goids, sortby)
33
        kws_wr = kws_usr.copy()
34
        if 'fld2col_widths' not in kws_wr:
35
            kws_wr['fld2col_widths'] = self.fld2col_widths
36
        wr_xlsx_tbl(fout_xlsx, nts, **kws_wr)
37
38
    def wr_xlsx_sections(self, fout_xlsx, sections, sortby=None, **kws_usr):
39
        """Write goids into a table."""
40
        nts = self.get_nts_sections(sections, sortby)
41
        kws_wr = kws_usr.copy()
42
        if 'fld2col_widths' not in kws_wr:
43
            kws_wr['fld2col_widths'] = self.fld2col_widths
44
        else:
45
            fld2col_widths = self.fld2col_widths.copy()
46
            for fld, wid in kws_usr['fld2col_widths'].items():
47
                fld2col_widths[fld] = wid
48
            kws_wr['fld2col_widths'] = fld2col_widths
49
        wr_xlsx_sections_tbl(fout_xlsx, nts, **kws_wr)
50
51
    def get_nts_sections(self, sections, sortby=None):
52
        """Given a list of sections containing GO IDs, get a list of sections w/GO nts."""
53
        goids = self.get_goids_sections(sections)
54
        gosubdag = GoSubDag(goids, self.go2obj)
55
        return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]
56
57
    @staticmethod
58
    def get_goids_sections(sections):
59
        """Return all the GO IDs in a 2-D sections list."""
60
        goids_all = set()
61
        for _, goids_sec in sections:
62
            goids_all |= set(goids_sec)
63
        return goids_all
64
65
66
def read_d1_letter(fin_txt):
67
    """Reads letter aliases from a text file created by GoDepth1LettersWr."""
68
    go2letter = {}
69
    re_goid = re.compile(r"(GO:\d{7})")
70
    with open(fin_txt) as ifstrm:
71
        for line in ifstrm:
72
            mtch = re_goid.search(line)
73
            if mtch and line[:1] != ' ':
74
                # Alias is expected to be the first character
75
                go2letter[mtch.group(1)] = line[:1]
76
    return go2letter
77
78
class GoDepth1LettersWr(object):
79
    """Writes reports for a GoDepth1Letters object."""
80
81
    str2ns = {'biological_process': 'BP', 'molecular_function': 'MF', 'cellular_component': 'CC'}
82
    hdrs = ['D1', 'NS', 'descendants', 'depth', 'GO', 'GO description']
83
84
    def __init__(self, rcntobj):
85
        self.ns2nt = self._init_ns2nt(rcntobj)
86
        self.goone2ntletter = rcntobj.goone2ntletter
87
88
    def prt_txt(self, prt=sys.stdout):
89
        """Print letters, descendant count, and GO information."""
90
        for ntdata in self.get_d1nts():
91
            prt.write("{L:1} {d:6,} D{D:02} {GO} {NAME}\n".format(
92
                L=ntdata.D1,
93
                d=ntdata.dcnt,
94
                D=ntdata.depth,
95
                GO=ntdata.GO,
96
                NAME=ntdata.name))
97
98
    def wr_xlsx(self, fout_xlsx="gos_depth01.xlsx", **kws):
99
        """Write xlsx table of depth-01 GO terms and their letter representation."""
100
        data_nts = self.get_d1nts()
101
        if 'fld2col_widths' not in kws:
102
            kws['fld2col_widths'] = {'D1': 6, 'NS':3, 'depth': 5, 'GO': 12, 'name': 40}
103
        if 'hdrs' not in kws:
104
            kws['hdrs'] = self.hdrs
105
        wr_xlsx_tbl(fout_xlsx, data_nts, **kws)
106
107
    def wr_txt(self, fout_txt="gos_depth01.txt", title=None):
108
        """write text table of depth-01 GO terms and their letter representation."""
109
        data_nts = self.get_d1nts()
110
        with open(fout_txt, 'w') as prt:
111
            if title is not None:
112
                prt.write("{TITLE}\n\n".format(TITLE=title))
113
                prt.write("    D1 : Letter representing the depth-01 GO term\n")
114
                prt.write("    dcnt: Total number of all descendants\n")
115
                prt.write("    dep: Depth; The maximum length path to ")
116
                prt.write("leaf-level (childless) GO descendant(s)\n\n")
117
                prt.write("D1 NS  dcnt dep GO ID      Description\n")
118
                prt.write("- -- ------ --- ---------- ------------------------------\n")
119
            prt_txt(prt, data_nts)
120
            sys.stdout.write("  {N:>5} items WROTE: {TXT}\n".format(
121
                N=len(data_nts), TXT=fout_txt))
122
123
    def wr_tex(self, fout_tex="gos_depth01.tex"):
124
        """write text table of depth-01 GO terms and their letter representation."""
125
        data_nts = self.get_d1nts()
126
        joinchr = " & "
127
        #pylint: disable=anomalous-backslash-in-string
128
        eol = " \\\\\n"
129
        with open(fout_tex, 'w') as prt:
130
            prt.write("\\begin{table}[!ht]\n")
131
            prt.write("\\begin{tabular}{|p{.5cm} | p{.5cm} | >{\\raggedleft}p{.9cm} ")
132
            prt.write("|p{.7cm} |p{1.8cm} |p{9cm}|}\n")
133
            prt.write("\multicolumn{6}{c}{} \\\\\n")
134
            prt.write("\hline\n")
135
            prt.write("\\rowcolor{gray!10}\n")
136
            prt.write("{HDRS}{EOL}".format(
137
                HDRS=joinchr.join(next(iter(data_nts))._fields), EOL=eol))
138
            prt.write("\hline\n")
139
            for idx, line in enumerate(get_lines(data_nts, joinchr=joinchr, eol=eol)):
140
                if idx%2 == 1:
141
                    prt.write("\\rowcolor{gray!7}\n")
142
                line.replace('_', '\\_')
143
                prt.write(line)
144
            prt.write("\hline\n")
145
            prt.write("\end{tabular}\n")
146
            caption = ("The descendant counts of GO terms at depth-01 are highly skewed. The "
147
                       "root term, \textit{biological\_process} has over twenty GO children at "
148
                       "depth-01 shown in the table sorted by their number of descendants "
149
                       "(dcnt) with \textit{cellular process} at the top having 18k+ "
150
                       "descendants and \textit{cell killing} near the bottom having only "
151
                       "about 100 descendants. The first column (D1) contains a letter used as "
152
                       "an alias for each depth-01 GO term. The second column represents the "
153
                       "number of descendants from the specified GO term from down to the total  "
154
                       "of its descendant leaf-level GO terms, which have no child GO terms.")
155
            prt.write("\caption{{{TEXT}}}\n\n".format(TEXT=caption))
156
            prt.write("\label{table:supptbl_d1}\n")
157
            prt.write("\end{table}\n")
158
            sys.stdout.write("  {N:>5} items WROTE: {TXT}\n".format(
159
                N=len(data_nts), TXT=fout_tex))
160
161
    def get_d1nts(self):
162
        """Get letters for depth-01 GO terms, descendants count, and GO information."""
163
        data = []
164
        ntdata = cx.namedtuple("NtPrt", "D1 NS dcnt depth GO name")
165
        namespace = None
166
        for ntlet in sorted(self.goone2ntletter.values(),
167
                            key=lambda nt: [nt.goobj.namespace, -1 * nt.dcnt, nt.D1]):
168
            goobj = ntlet.goobj
169
            goid = goobj.id
170
            assert len(goobj.parents) == 1
171
            if namespace != goobj.namespace:
172
                namespace = goobj.namespace
173
                ntns = self.ns2nt[namespace]
174
                pobj = ntns.goobj
175
                ns2 = self.str2ns[goobj.namespace]
176
                data.append(ntdata._make([" ", ns2, ntns.dcnt, pobj.depth, pobj.id, pobj.name]))
177
            data.append(ntdata._make(
178
                [ntlet.D1, self.str2ns[namespace], ntlet.dcnt, goobj.depth, goid, goobj.name]))
179
        return data
180
181
    @staticmethod
182
    def _init_ns2nt(rcntobj):
183
        """Save depth-00 GO terms ordered using descendants cnt."""
184
        go2dcnt = rcntobj.go2dcnt
185
        ntobj = cx.namedtuple("NtD1", "D1 dcnt goobj")
186
        d0s = rcntobj.depth2goobjs[0]
187
        ns_nt = [(o.namespace, ntobj(D1="", dcnt=go2dcnt[o.id], goobj=o)) for o in d0s]
188
        return cx.OrderedDict(ns_nt)
189
190
# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.
191