Total Complexity | 47 |
Total Lines | 125 |
Duplicated Lines | 26.4 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like GoeaResults often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
1 | """Manages GO Term fill colors and bordercolors.""" |
||
10 | class GoeaResults(object): |
||
11 | """Manages GOEA Results for plotting.""" |
||
12 | |||
13 | kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name']) |
||
14 | |||
15 | dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms |
||
16 | fmtres = "{study_count} genes" |
||
17 | |||
18 | alpha2col = cx.OrderedDict([ |
||
19 | # Enriched GOEA GO terms that are significant |
||
20 | (0.005, 'mistyrose'), |
||
21 | (0.010, 'moccasin'), |
||
22 | (0.050, 'lemonchiffon1'), |
||
23 | # GOEA GO terms that are not significant |
||
24 | (1.000, 'grey95'), |
||
25 | ]) |
||
26 | |||
27 | def __init__(self, goea_results, **kws): |
||
28 | # kws: goea_results or go2nt |
||
29 | assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS" |
||
30 | # GOATOOLs results as objects (WAS: Kws goea_results go2nt) |
||
31 | self.go2res = {r.GO: r for r in goea_results} |
||
32 | self.is_goterm = hasattr(goea_results[0], "_fldsdefprt") |
||
33 | # GOATOOLs results as a list of namedtuples |
||
34 | self.pval_name = self._init_pval_name(**kws) |
||
35 | self.study_items = kws.get('study_items', None) |
||
36 | self.study_items_max = self._init_study_items_max() |
||
37 | self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line |
||
38 | self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} |
||
39 | |||
40 | def prt_summary(self, prt=sys.stdout): |
||
41 | """Print summary of GOEA plotting object.""" |
||
42 | desc = "NtGoeaResults" if self.is_goterm else "namedtuple" |
||
43 | prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format( |
||
44 | N=len(self.go2res), O=desc, P=self.pval_name)) |
||
45 | |||
46 | def get_study_txt(self, goid): |
||
47 | """Get GO text from GOEA study.""" |
||
48 | if goid in self.go2res: |
||
49 | res = self.go2res[goid] |
||
50 | if res.study_items is not None: |
||
51 | return self._get_item_str(res) |
||
52 | else: |
||
53 | return self.fmtres.format(study_count=res.study_count) |
||
54 | |||
55 | def set_goid2color_pval(self, goid2color): |
||
56 | """Fill missing colors based on p-value of an enriched GO term.""" |
||
57 | alpha2col = self.alpha2col |
||
58 | View Code Duplication | if self.pval_name is not None: |
|
|
|||
59 | pval_name = self.pval_name |
||
60 | for goid, res in self.go2res.items(): |
||
61 | pval = getattr(res, pval_name, None) |
||
62 | if pval is not None: |
||
63 | for alpha, color in alpha2col.items(): |
||
64 | if pval <= alpha and res.study_count != 0: |
||
65 | if goid not in goid2color: |
||
66 | goid2color[goid] = color |
||
67 | |||
68 | def get_goid2color_pval(self): |
||
69 | """Return a go2color dict containing GO colors determined by P-value.""" |
||
70 | go2color = {} |
||
71 | self.set_goid2color_pval(go2color) |
||
72 | color_dflt = self.alpha2col[1.000] |
||
73 | for goid in self.go2res: |
||
74 | if goid not in go2color: |
||
75 | go2color[goid] = color_dflt |
||
76 | return go2color |
||
77 | |||
78 | View Code Duplication | def _get_item_str(self, res): |
|
79 | """Return genes in any of these formats: |
||
80 | 1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 |
||
81 | 2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a |
||
82 | 3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... |
||
83 | """ |
||
84 | ipl = self.items_p_line |
||
85 | prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items]) |
||
86 | prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)] |
||
87 | num_items = len(prt_items) |
||
88 | if self.study_items_max is None: |
||
89 | genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
||
90 | return "{N}) {GENES}".format(N=num_items, GENES=genestr) |
||
91 | else: |
||
92 | if num_items <= self.study_items_max: |
||
93 | gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] |
||
94 | genestr = "\n".join(gene_lines) |
||
95 | return genestr |
||
96 | else: |
||
97 | short_list = prt_items[:self.study_items_max] |
||
98 | short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)] |
||
99 | short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult] |
||
100 | short_str = "\n".join(short_lines) |
||
101 | return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) |
||
102 | |||
103 | def _get_genestr(self, itemid): |
||
104 | """Given a geneid, return the string geneid or a gene symbol.""" |
||
105 | if itemid in self.id2symbol: |
||
106 | symbol = self.id2symbol[itemid] |
||
107 | if symbol is not None: |
||
108 | return symbol |
||
109 | if isinstance(itemid, int): |
||
110 | return str(itemid) |
||
111 | return itemid |
||
112 | |||
113 | |||
114 | def _init_pval_name(self, **kws): |
||
115 | """Initialize pvalue attribute name.""" |
||
116 | if 'pval_name' in kws: |
||
117 | return kws['pval_name'] |
||
118 | # If go2res contains GO Terms |
||
119 | if self.is_goterm: |
||
120 | return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name()) |
||
121 | # If go2res contains GO namedtuples |
||
122 | for fld in next(iter(self.go2res.values()))._fields: |
||
123 | if fld[:2] == 'p_' and fld != 'p_uncorrected': |
||
124 | return fld |
||
125 | |||
126 | def _init_study_items_max(self): |
||
127 | """User can limit the number of genes printed in a GO term.""" |
||
128 | if self.study_items is None: |
||
129 | return None |
||
130 | if self.study_items is True: |
||
131 | return None |
||
132 | if isinstance(self.study_items, int): |
||
133 | return self.study_items |
||
134 | return None |
||
135 | |||
137 |