GoDepth1LettersWr - Code Metrics - Inspection of "Added new plotting code for #102" - tanghaibao/goatools - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 0f596f...821888 )

unknown

created 2018-06-15 18:47 UTC

GoDepth1LettersWr A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	111
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
c	1
b	0
f	0
dl	0
loc	111
rs	10
wmc	20

7 Methods

Rating	Name	Size	Complexity
A	wr_txt()	15	3
A	prt_txt()	9	2
A	__init__()	3	1
B	get_d1nts()	19	5
A	_init_ns2nt()	8	2
B	wr_tex()	37	4
A	wr_xlsx()	8	3

"""Used to operate on a sub-graph of a larger GO DAG."""

__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
__author__ = "DV Klopfenstein"

import sys
import re
import collections as cx
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.wr_tbl import wr_xlsx as wr_xlsx_tbl
from goatools.wr_tbl import wr_xlsx_sections as wr_xlsx_sections_tbl
from goatools.wr_tbl import get_lines
from goatools.wr_tbl import prt_txt

class GoSubDagWr(object):
    """Contains a sub-graph of the original obo from geneontology.org."""

    fld2col_widths = {
        'NS' : 3,
        'dcnt' : 6,
        'level' : 4,
        'depth' : 4,
        'GO' : 12,
        'D1' : 6,
        'GO_name' : 45}

    def __init__(self, go2obj):
        self.go2obj = go2obj

    def wr_xlsx(self, fout_xlsx, goids, sortby=None, **kws_usr):
        """Write goids into a table."""
        nts = GoSubDag(goids, self.go2obj).get_nts(goids, sortby)
        kws_wr = kws_usr.copy()
        if 'fld2col_widths' not in kws_wr:
            kws_wr['fld2col_widths'] = self.fld2col_widths
        wr_xlsx_tbl(fout_xlsx, nts, **kws_wr)

    def wr_xlsx_sections(self, fout_xlsx, sections, sortby=None, **kws_usr):
        """Write goids into a table."""
        nts = self.get_nts_sections(sections, sortby)
        kws_wr = kws_usr.copy()
        if 'fld2col_widths' not in kws_wr:
            kws_wr['fld2col_widths'] = self.fld2col_widths
        else:
            fld2col_widths = self.fld2col_widths.copy()
            for fld, wid in kws_usr['fld2col_widths'].items():
                fld2col_widths[fld] = wid
            kws_wr['fld2col_widths'] = fld2col_widths
        wr_xlsx_sections_tbl(fout_xlsx, nts, **kws_wr)

    def get_nts_sections(self, sections, sortby=None):
        """Given a list of sections containing GO IDs, get a list of sections w/GO nts."""
        goids = self.get_goids_sections(sections)
        gosubdag = GoSubDag(goids, self.go2obj)
        return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]

    @staticmethod
    def get_goids_sections(sections):
        """Return all the GO IDs in a 2-D sections list."""
        goids_all = set()
        for _, goids_sec in sections:
            goids_all |= set(goids_sec)
        return goids_all


def read_d1_letter(fin_txt):
    """Reads letter aliases from a text file created by GoDepth1LettersWr."""
    go2letter = {}
    re_goid = re.compile(r"(GO:\d{7})")
    with open(fin_txt) as ifstrm:
        for line in ifstrm:
            mtch = re_goid.search(line)
            if mtch and line[:1] != ' ':
                # Alias is expected to be the first character
                go2letter[mtch.group(1)] = line[:1]
    return go2letter

class GoDepth1LettersWr(object):
    """Writes reports for a GoDepth1Letters object."""

    str2ns = {'biological_process': 'BP', 'molecular_function': 'MF', 'cellular_component': 'CC'}
    hdrs = ['D1', 'NS', 'descendants', 'depth', 'GO', 'GO description']

    def __init__(self, rcntobj):
        self.ns2nt = self._init_ns2nt(rcntobj)
        self.goone2ntletter = rcntobj.goone2ntletter

    def prt_txt(self, prt=sys.stdout):
        """Print letters, descendant count, and GO information."""
        for ntdata in self.get_d1nts():
            prt.write("{L:1} {d:6,} D{D:02} {GO} {NAME}\n".format(
                L=ntdata.D1,
                d=ntdata.dcnt,
                D=ntdata.depth,
                GO=ntdata.GO,
                NAME=ntdata.name))

    def wr_xlsx(self, fout_xlsx="gos_depth01.xlsx", **kws):
        """Write xlsx table of depth-01 GO terms and their letter representation."""
        data_nts = self.get_d1nts()
        if 'fld2col_widths' not in kws:
            kws['fld2col_widths'] = {'D1': 6, 'NS':3, 'depth': 5, 'GO': 12, 'name': 40}
        if 'hdrs' not in kws:
            kws['hdrs'] = self.hdrs
        wr_xlsx_tbl(fout_xlsx, data_nts, **kws)

    def wr_txt(self, fout_txt="gos_depth01.txt", title=None):
        """write text table of depth-01 GO terms and their letter representation."""
        data_nts = self.get_d1nts()
        with open(fout_txt, 'w') as prt:
            if title is not None:
                prt.write("{TITLE}\n\n".format(TITLE=title))
                prt.write("    D1 : Letter representing the depth-01 GO term\n")
                prt.write("    dcnt: Total number of all descendants\n")
                prt.write("    dep: Depth; The maximum length path to ")
                prt.write("leaf-level (childless) GO descendant(s)\n\n")
                prt.write("D1 NS  dcnt dep GO ID      Description\n")
                prt.write("- -- ------ --- ---------- ------------------------------\n")
            prt_txt(prt, data_nts)
            sys.stdout.write("  {N:>5} items WROTE: {TXT}\n".format(
                N=len(data_nts), TXT=fout_txt))

    def wr_tex(self, fout_tex="gos_depth01.tex"):
        """write text table of depth-01 GO terms and their letter representation."""
        data_nts = self.get_d1nts()
        joinchr = " & "
        #pylint: disable=anomalous-backslash-in-string
        eol = " \\\\\n"
        with open(fout_tex, 'w') as prt:
            prt.write("\\begin{table}[!ht]\n")
            prt.write("\\begin{tabular}{|p{.5cm} | p{.5cm} | >{\\raggedleft}p{.9cm} ")
            prt.write("|p{.7cm} |p{1.8cm} |p{9cm}|}\n")
            prt.write("\multicolumn{6}{c}{} \\\\\n")
            prt.write("\hline\n")
            prt.write("\\rowcolor{gray!10}\n")
            prt.write("{HDRS}{EOL}".format(
                HDRS=joinchr.join(next(iter(data_nts))._fields), EOL=eol))
            prt.write("\hline\n")
            for idx, line in enumerate(get_lines(data_nts, joinchr=joinchr, eol=eol)):
                if idx%2 == 1:
                    prt.write("\\rowcolor{gray!7}\n")
                line.replace('_', '\\_')
                prt.write(line)
            prt.write("\hline\n")
            prt.write("\end{tabular}\n")
            caption = ("The descendant counts of GO terms at depth-01 are highly skewed. The "
                       "root term, \textit{biological\_process} has over twenty GO children at "
                       "depth-01 shown in the table sorted by their number of descendants "
                       "(dcnt) with \textit{cellular process} at the top having 18k+ "
                       "descendants and \textit{cell killing} near the bottom having only "
                       "about 100 descendants. The first column (D1) contains a letter used as "
                       "an alias for each depth-01 GO term. The second column represents the "
                       "number of descendants from the specified GO term from down to the total  "
                       "of its descendant leaf-level GO terms, which have no child GO terms.")
            prt.write("\caption{{{TEXT}}}\n\n".format(TEXT=caption))
            prt.write("\label{table:supptbl_d1}\n")
            prt.write("\end{table}\n")
            sys.stdout.write("  {N:>5} items WROTE: {TXT}\n".format(
                N=len(data_nts), TXT=fout_tex))

    def get_d1nts(self):
        """Get letters for depth-01 GO terms, descendants count, and GO information."""
        data = []
        ntdata = cx.namedtuple("NtPrt", "D1 NS dcnt depth GO name")
        namespace = None
        for ntlet in sorted(self.goone2ntletter.values(),
                            key=lambda nt: [nt.goobj.namespace, -1 * nt.dcnt, nt.D1]):
            goobj = ntlet.goobj
            goid = goobj.id
            assert len(goobj.parents) == 1
            if namespace != goobj.namespace:
                namespace = goobj.namespace
                ntns = self.ns2nt[namespace]
                pobj = ntns.goobj
                ns2 = self.str2ns[goobj.namespace]
                data.append(ntdata._make([" ", ns2, ntns.dcnt, pobj.depth, pobj.id, pobj.name]))
            data.append(ntdata._make(
                [ntlet.D1, self.str2ns[namespace], ntlet.dcnt, goobj.depth, goid, goobj.name]))
        return data

    @staticmethod
    def _init_ns2nt(rcntobj):
        """Save depth-00 GO terms ordered using descendants cnt."""
        go2dcnt = rcntobj.go2dcnt
        ntobj = cx.namedtuple("NtD1", "D1 dcnt goobj")
        d0s = rcntobj.depth2goobjs[0]
        ns_nt = [(o.namespace, ntobj(D1="", dcnt=go2dcnt[o.id], goobj=o)) for o in d0s]
        return cx.OrderedDict(ns_nt)

# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.


1			"""Used to operate on a sub-graph of a larger GO DAG."""
2
3			__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
4			__author__ = "DV Klopfenstein"
5
6			import sys
7			import re
8			import collections as cx
9			from goatools.gosubdag.gosubdag import GoSubDag
10			from goatools.wr_tbl import wr_xlsx as wr_xlsx_tbl
11			from goatools.wr_tbl import wr_xlsx_sections as wr_xlsx_sections_tbl
12			from goatools.wr_tbl import get_lines
13			from goatools.wr_tbl import prt_txt
14
15			class GoSubDagWr(object):
16			"""Contains a sub-graph of the original obo from geneontology.org."""
17
18			fld2col_widths = {
19			'NS' : 3,
20			'dcnt' : 6,
21			'level' : 4,
22			'depth' : 4,
23			'GO' : 12,
24			'D1' : 6,
25			'GO_name' : 45}
26
27			def __init__(self, go2obj):
28			self.go2obj = go2obj
29
30			def wr_xlsx(self, fout_xlsx, goids, sortby=None, **kws_usr):
31			"""Write goids into a table."""
32			nts = GoSubDag(goids, self.go2obj).get_nts(goids, sortby)
33			kws_wr = kws_usr.copy()
34			if 'fld2col_widths' not in kws_wr:
35			kws_wr['fld2col_widths'] = self.fld2col_widths
36			wr_xlsx_tbl(fout_xlsx, nts, **kws_wr)
37
38			def wr_xlsx_sections(self, fout_xlsx, sections, sortby=None, **kws_usr):
39			"""Write goids into a table."""
40			nts = self.get_nts_sections(sections, sortby)
41			kws_wr = kws_usr.copy()
42			if 'fld2col_widths' not in kws_wr:
43			kws_wr['fld2col_widths'] = self.fld2col_widths
44			else:
45			fld2col_widths = self.fld2col_widths.copy()
46			for fld, wid in kws_usr['fld2col_widths'].items():
47			fld2col_widths[fld] = wid
48			kws_wr['fld2col_widths'] = fld2col_widths
49			wr_xlsx_sections_tbl(fout_xlsx, nts, **kws_wr)
50
51			def get_nts_sections(self, sections, sortby=None):
52			"""Given a list of sections containing GO IDs, get a list of sections w/GO nts."""
53			goids = self.get_goids_sections(sections)
54			gosubdag = GoSubDag(goids, self.go2obj)
55			return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]
56
57			@staticmethod
58			def get_goids_sections(sections):
59			"""Return all the GO IDs in a 2-D sections list."""
60			goids_all = set()
61			for _, goids_sec in sections:
62			goids_all \|= set(goids_sec)
63			return goids_all
64
65
66			def read_d1_letter(fin_txt):
67			"""Reads letter aliases from a text file created by GoDepth1LettersWr."""
68			go2letter = {}
69			re_goid = re.compile(r"(GO:\d{7})")
70			with open(fin_txt) as ifstrm:
71			for line in ifstrm:
72			mtch = re_goid.search(line)
73			if mtch and line[:1] != ' ':
74			# Alias is expected to be the first character
75			go2letter[mtch.group(1)] = line[:1]
76			return go2letter
77
78			class GoDepth1LettersWr(object):
79			"""Writes reports for a GoDepth1Letters object."""
80
81			str2ns = {'biological_process': 'BP', 'molecular_function': 'MF', 'cellular_component': 'CC'}
82			hdrs = ['D1', 'NS', 'descendants', 'depth', 'GO', 'GO description']
83
84			def __init__(self, rcntobj):
85			self.ns2nt = self._init_ns2nt(rcntobj)
86			self.goone2ntletter = rcntobj.goone2ntletter
87
88			def prt_txt(self, prt=sys.stdout):
89			"""Print letters, descendant count, and GO information."""
90			for ntdata in self.get_d1nts():
91			prt.write("{L:1} {d:6,} D{D:02} {GO} {NAME}\n".format(
92			L=ntdata.D1,
93			d=ntdata.dcnt,
94			D=ntdata.depth,
95			GO=ntdata.GO,
96			NAME=ntdata.name))
97
98			def wr_xlsx(self, fout_xlsx="gos_depth01.xlsx", **kws):
99			"""Write xlsx table of depth-01 GO terms and their letter representation."""
100			data_nts = self.get_d1nts()
101			if 'fld2col_widths' not in kws:
102			kws['fld2col_widths'] = {'D1': 6, 'NS':3, 'depth': 5, 'GO': 12, 'name': 40}
103			if 'hdrs' not in kws:
104			kws['hdrs'] = self.hdrs
105			wr_xlsx_tbl(fout_xlsx, data_nts, **kws)
106
107			def wr_txt(self, fout_txt="gos_depth01.txt", title=None):
108			"""write text table of depth-01 GO terms and their letter representation."""
109			data_nts = self.get_d1nts()
110			with open(fout_txt, 'w') as prt:
111			if title is not None:
112			prt.write("{TITLE}\n\n".format(TITLE=title))
113			prt.write(" D1 : Letter representing the depth-01 GO term\n")
114			prt.write(" dcnt: Total number of all descendants\n")
115			prt.write(" dep: Depth; The maximum length path to ")
116			prt.write("leaf-level (childless) GO descendant(s)\n\n")
117			prt.write("D1 NS dcnt dep GO ID Description\n")
118			prt.write("- -- ------ --- ---------- ------------------------------\n")
119			prt_txt(prt, data_nts)
120			sys.stdout.write(" {N:>5} items WROTE: {TXT}\n".format(
121			N=len(data_nts), TXT=fout_txt))
122
123			def wr_tex(self, fout_tex="gos_depth01.tex"):
124			"""write text table of depth-01 GO terms and their letter representation."""
125			data_nts = self.get_d1nts()
126			joinchr = " & "
127			#pylint: disable=anomalous-backslash-in-string
128			eol = " \\\\\n"
129			with open(fout_tex, 'w') as prt:
130			prt.write("\\begin{table}[!ht]\n")
131			prt.write("\\begin{tabular}{\|p{.5cm} \| p{.5cm} \| >{\\raggedleft}p{.9cm} ")
132			prt.write("\|p{.7cm} \|p{1.8cm} \|p{9cm}\|}\n")
133			prt.write("\multicolumn{6}{c}{} \\\\\n")
134			prt.write("\hline\n")
135			prt.write("\\rowcolor{gray!10}\n")
136			prt.write("{HDRS}{EOL}".format(
137			HDRS=joinchr.join(next(iter(data_nts))._fields), EOL=eol))
138			prt.write("\hline\n")
139			for idx, line in enumerate(get_lines(data_nts, joinchr=joinchr, eol=eol)):
140			if idx%2 == 1:
141			prt.write("\\rowcolor{gray!7}\n")
142			line.replace('_', '\\_')
143			prt.write(line)
144			prt.write("\hline\n")
145			prt.write("\end{tabular}\n")
146			caption = ("The descendant counts of GO terms at depth-01 are highly skewed. The "
147			"root term, \textit{biological\_process} has over twenty GO children at "
148			"depth-01 shown in the table sorted by their number of descendants "
149			"(dcnt) with \textit{cellular process} at the top having 18k+ "
150			"descendants and \textit{cell killing} near the bottom having only "
151			"about 100 descendants. The first column (D1) contains a letter used as "
152			"an alias for each depth-01 GO term. The second column represents the "
153			"number of descendants from the specified GO term from down to the total "
154			"of its descendant leaf-level GO terms, which have no child GO terms.")
155			prt.write("\caption{{{TEXT}}}\n\n".format(TEXT=caption))
156			prt.write("\label{table:supptbl_d1}\n")
157			prt.write("\end{table}\n")
158			sys.stdout.write(" {N:>5} items WROTE: {TXT}\n".format(
159			N=len(data_nts), TXT=fout_tex))
160
161			def get_d1nts(self):
162			"""Get letters for depth-01 GO terms, descendants count, and GO information."""
163			data = []
164			ntdata = cx.namedtuple("NtPrt", "D1 NS dcnt depth GO name")
165			namespace = None
166			for ntlet in sorted(self.goone2ntletter.values(),
167			key=lambda nt: [nt.goobj.namespace, -1 * nt.dcnt, nt.D1]):
168			goobj = ntlet.goobj
169			goid = goobj.id
170			assert len(goobj.parents) == 1
171			if namespace != goobj.namespace:
172			namespace = goobj.namespace
173			ntns = self.ns2nt[namespace]
174			pobj = ntns.goobj
175			ns2 = self.str2ns[goobj.namespace]
176			data.append(ntdata._make([" ", ns2, ntns.dcnt, pobj.depth, pobj.id, pobj.name]))
177			data.append(ntdata._make(
178			[ntlet.D1, self.str2ns[namespace], ntlet.dcnt, goobj.depth, goid, goobj.name]))
179			return data
180
181			@staticmethod
182			def _init_ns2nt(rcntobj):
183			"""Save depth-00 GO terms ordered using descendants cnt."""
184			go2dcnt = rcntobj.go2dcnt
185			ntobj = cx.namedtuple("NtD1", "D1 dcnt goobj")
186			d0s = rcntobj.depth2goobjs[0]
187			ns_nt = [(o.namespace, ntobj(D1="", dcnt=go2dcnt[o.id], goobj=o)) for o in d0s]
188			return cx.OrderedDict(ns_nt)
189
190			# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.
191

tanghaibao / goatools

Push — master ( 0f596f...821888 )

GoDepth1LettersWr A

Complexity

Size/Duplication

Importance

7 Methods

Duplication Side-by-Side

Filter issues like