GoeaResults._get_item_str() - Code Metrics - Inspection of "Added new plotting code for #102" - tanghaibao/goatools - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 0f596f...821888 )

unknown

created 2018-06-15 18:47 UTC

GoeaResults._get_item_str() F

↳ Parent: GoeaResults

Complexity

Conditions

Size

Total Lines

Duplication

Lines	24
Ratio	100 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	12
c	1
b	0
f	0
dl	24
loc	24
rs	2.8641

How to fix Complexity

"""Manages GO Term fill colors and bordercolors."""

__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
__author__ = "DV Klopfenstein"

import sys
import collections as cx


class GoeaResults(object):
    """Manages GOEA Results for plotting."""

    kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name'])

    dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms
    fmtres = "{study_count} genes"

    alpha2col = cx.OrderedDict([
        # Enriched GOEA GO terms that are significant
        (0.005, 'mistyrose'),
        (0.010, 'moccasin'),
        (0.050, 'lemonchiffon1'),
        # GOEA GO terms that are not significant
        (1.000, 'grey95'),
    ])

    def __init__(self, goea_results, **kws):
        # kws: goea_results or go2nt
        assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS"
        # GOATOOLs results as objects (WAS: Kws goea_results go2nt)
        self.go2res = {r.GO: r for r in goea_results}
        self.is_goterm = hasattr(goea_results[0], "_fldsdefprt")
        # GOATOOLs results as a list of namedtuples
        self.pval_name = self._init_pval_name(**kws)
        self.study_items = kws.get('study_items', None)
        self.study_items_max = self._init_study_items_max()
        self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line
        self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {}

    def prt_summary(self, prt=sys.stdout):
        """Print summary of GOEA plotting object."""
        desc = "NtGoeaResults" if self.is_goterm else "namedtuple"
        prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format(
            N=len(self.go2res), O=desc, P=self.pval_name))

    def get_study_txt(self, goid):
        """Get GO text from GOEA study."""
        if goid in self.go2res:
            res = self.go2res[goid]
            if res.study_items is not None:
                return self._get_item_str(res)
            else:
                return self.fmtres.format(study_count=res.study_count)

    def set_goid2color_pval(self, goid2color):
        """Fill missing colors based on p-value of an enriched GO term."""
        alpha2col = self.alpha2col
        if self.pval_name is not None:

            pval_name = self.pval_name
            for goid, res in self.go2res.items():
                pval = getattr(res, pval_name, None)
                if pval is not None:
                    for alpha, color in alpha2col.items():
                        if pval <= alpha and res.study_count != 0:
                            if goid not in goid2color:
                                goid2color[goid] = color

    def get_goid2color_pval(self):
        """Return a go2color dict containing GO colors determined by P-value."""
        go2color = {}
        self.set_goid2color_pval(go2color)
        color_dflt = self.alpha2col[1.000]
        for goid in self.go2res:
            if goid not in go2color:
                go2color[goid] = color_dflt
        return go2color

    def _get_item_str(self, res):

        """Return genes in any of these formats:
              1. 19264, 17319, 12520, 12043, 74131, 22163, 12575
              2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a
              3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3...
        """
        ipl = self.items_p_line
        prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items])
        prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)]
        num_items = len(prt_items)
        if self.study_items_max is None:
            genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline])
            return "{N}) {GENES}".format(N=num_items, GENES=genestr)
        else:
            if num_items <= self.study_items_max:
                gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline]
                genestr = "\n".join(gene_lines)
                return genestr
            else:
                short_list = prt_items[:self.study_items_max]
                short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)]
                short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult]
                short_str = "\n".join(short_lines)
                return "".join(["{N} genes; ".format(N=num_items), short_str, "..."])

    def _get_genestr(self, itemid):
        """Given a geneid, return the string geneid or a gene symbol."""
        if itemid in self.id2symbol:
            symbol = self.id2symbol[itemid]
            if symbol is not None:
                return symbol
        if isinstance(itemid, int):
            return str(itemid)
        return itemid


    def _init_pval_name(self, **kws):
        """Initialize pvalue attribute name."""
        if 'pval_name' in kws:
            return kws['pval_name']
        # If go2res contains GO Terms
        if self.is_goterm:
            return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name())
        # If go2res contains GO namedtuples
        for fld in next(iter(self.go2res.values()))._fields:
            if fld[:2] == 'p_' and fld != 'p_uncorrected':
                return fld

    def _init_study_items_max(self):
        """User can limit the number of genes printed in a GO term."""
        if self.study_items is None:
            return None
        if self.study_items is True:
            return None
        if isinstance(self.study_items, int):
            return self.study_items
        return None

# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.


1		"""Manages GO Term fill colors and bordercolors."""
2
3		__copyright__ = "Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved."
4		__author__ = "DV Klopfenstein"
5
6		import sys
7		import collections as cx
8
9
10		class GoeaResults(object):
11		"""Manages GOEA Results for plotting."""
12
13		kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name'])
14
15		dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms
16		fmtres = "{study_count} genes"
17
18		alpha2col = cx.OrderedDict([
19		# Enriched GOEA GO terms that are significant
20		(0.005, 'mistyrose'),
21		(0.010, 'moccasin'),
22		(0.050, 'lemonchiffon1'),
23		# GOEA GO terms that are not significant
24		(1.000, 'grey95'),
25		])
26
27		def __init__(self, goea_results, **kws):
28		# kws: goea_results or go2nt
29		assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS"
30		# GOATOOLs results as objects (WAS: Kws goea_results go2nt)
31		self.go2res = {r.GO: r for r in goea_results}
32		self.is_goterm = hasattr(goea_results[0], "_fldsdefprt")
33		# GOATOOLs results as a list of namedtuples
34		self.pval_name = self._init_pval_name(**kws)
35		self.study_items = kws.get('study_items', None)
36		self.study_items_max = self._init_study_items_max()
37		self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line
38		self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {}
39
40		def prt_summary(self, prt=sys.stdout):
41		"""Print summary of GOEA plotting object."""
42		desc = "NtGoeaResults" if self.is_goterm else "namedtuple"
43		prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format(
44		N=len(self.go2res), O=desc, P=self.pval_name))
45
46		def get_study_txt(self, goid):
47		"""Get GO text from GOEA study."""
48		if goid in self.go2res:
49		res = self.go2res[goid]
50		if res.study_items is not None:
51		return self._get_item_str(res)
52		else:
53		return self.fmtres.format(study_count=res.study_count)
54
55		def set_goid2color_pval(self, goid2color):
56		"""Fill missing colors based on p-value of an enriched GO term."""
57		alpha2col = self.alpha2col
58	View Code Duplication	if self.pval_name is not None:
		0 ignored issues – show Duplication introduced 2018-06-15 18:48 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
59		pval_name = self.pval_name
60		for goid, res in self.go2res.items():
61		pval = getattr(res, pval_name, None)
62		if pval is not None:
63		for alpha, color in alpha2col.items():
64		if pval <= alpha and res.study_count != 0:
65		if goid not in goid2color:
66		goid2color[goid] = color
67
68		def get_goid2color_pval(self):
69		"""Return a go2color dict containing GO colors determined by P-value."""
70		go2color = {}
71		self.set_goid2color_pval(go2color)
72		color_dflt = self.alpha2col[1.000]
73		for goid in self.go2res:
74		if goid not in go2color:
75		go2color[goid] = color_dflt
76		return go2color
77
78	View Code Duplication	def _get_item_str(self, res):
		0 ignored issues – show Duplication introduced 2018-06-15 18:48 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
79		"""Return genes in any of these formats:
80		1. 19264, 17319, 12520, 12043, 74131, 22163, 12575
81		2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a
82		3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3...
83		"""
84		ipl = self.items_p_line
85		prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items])
86		prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)]
87		num_items = len(prt_items)
88		if self.study_items_max is None:
89		genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline])
90		return "{N}) {GENES}".format(N=num_items, GENES=genestr)
91		else:
92		if num_items <= self.study_items_max:
93		gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline]
94		genestr = "\n".join(gene_lines)
95		return genestr
96		else:
97		short_list = prt_items[:self.study_items_max]
98		short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)]
99		short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult]
100		short_str = "\n".join(short_lines)
101		return "".join(["{N} genes; ".format(N=num_items), short_str, "..."])
102
103		def _get_genestr(self, itemid):
104		"""Given a geneid, return the string geneid or a gene symbol."""
105		if itemid in self.id2symbol:
106		symbol = self.id2symbol[itemid]
107		if symbol is not None:
108		return symbol
109		if isinstance(itemid, int):
110		return str(itemid)
111		return itemid
112
113
114		def _init_pval_name(self, **kws):
115		"""Initialize pvalue attribute name."""
116		if 'pval_name' in kws:
117		return kws['pval_name']
118		# If go2res contains GO Terms
119		if self.is_goterm:
120		return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name())
121		# If go2res contains GO namedtuples
122		for fld in next(iter(self.go2res.values()))._fields:
123		if fld[:2] == 'p_' and fld != 'p_uncorrected':
124		return fld
125
126		def _init_study_items_max(self):
127		"""User can limit the number of genes printed in a GO term."""
128		if self.study_items is None:
129		return None
130		if self.study_items is True:
131		return None
132		if isinstance(self.study_items, int):
133		return self.study_items
134		return None
135
136		# Copyright (C) 2016-2017, DV Klopfenstein, H Tang, All rights reserved.
137

tanghaibao / goatools

Push — master ( 0f596f...821888 )

GoeaResults._get_item_str() F

Complexity

Size

Duplication

Importance

How to fix Complexity

Complexity

Duplication Side-by-Side

Filter issues like