| Total Complexity | 47 | 
| Total Lines | 125 | 
| Duplicated Lines | 26.4 % | 
| Changes | 1 | ||
| Bugs | 0 | Features | 0 | 
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like GoeaResults often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | """Manages GO Term fill colors and bordercolors.""" | ||
| 10 | class GoeaResults(object): | ||
| 11 | """Manages GOEA Results for plotting.""" | ||
| 12 | |||
| 13 | kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name']) | ||
| 14 | |||
| 15 | dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms | ||
| 16 |     fmtres = "{study_count} genes" | ||
| 17 | |||
| 18 | alpha2col = cx.OrderedDict([ | ||
| 19 | # Enriched GOEA GO terms that are significant | ||
| 20 | (0.005, 'mistyrose'), | ||
| 21 | (0.010, 'moccasin'), | ||
| 22 | (0.050, 'lemonchiffon1'), | ||
| 23 | # GOEA GO terms that are not significant | ||
| 24 | (1.000, 'grey95'), | ||
| 25 | ]) | ||
| 26 | |||
| 27 | def __init__(self, goea_results, **kws): | ||
| 28 | # kws: goea_results or go2nt | ||
| 29 | assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS" | ||
| 30 | # GOATOOLs results as objects (WAS: Kws goea_results go2nt) | ||
| 31 |         self.go2res = {r.GO: r for r in goea_results} | ||
| 32 | self.is_goterm = hasattr(goea_results[0], "_fldsdefprt") | ||
| 33 | # GOATOOLs results as a list of namedtuples | ||
| 34 | self.pval_name = self._init_pval_name(**kws) | ||
| 35 |         self.study_items = kws.get('study_items', None) | ||
| 36 | self.study_items_max = self._init_study_items_max() | ||
| 37 | self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line | ||
| 38 |         self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} | ||
| 39 | |||
| 40 | def prt_summary(self, prt=sys.stdout): | ||
| 41 | """Print summary of GOEA plotting object.""" | ||
| 42 | desc = "NtGoeaResults" if self.is_goterm else "namedtuple" | ||
| 43 |         prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format( | ||
| 44 | N=len(self.go2res), O=desc, P=self.pval_name)) | ||
| 45 | |||
| 46 | def get_study_txt(self, goid): | ||
| 47 | """Get GO text from GOEA study.""" | ||
| 48 | if goid in self.go2res: | ||
| 49 | res = self.go2res[goid] | ||
| 50 | if res.study_items is not None: | ||
| 51 | return self._get_item_str(res) | ||
| 52 | else: | ||
| 53 | return self.fmtres.format(study_count=res.study_count) | ||
| 54 | |||
| 55 | def set_goid2color_pval(self, goid2color): | ||
| 56 | """Fill missing colors based on p-value of an enriched GO term.""" | ||
| 57 | alpha2col = self.alpha2col | ||
| 58 | View Code Duplication | if self.pval_name is not None: | |
|  | |||
| 59 | pval_name = self.pval_name | ||
| 60 | for goid, res in self.go2res.items(): | ||
| 61 | pval = getattr(res, pval_name, None) | ||
| 62 | if pval is not None: | ||
| 63 | for alpha, color in alpha2col.items(): | ||
| 64 | if pval <= alpha and res.study_count != 0: | ||
| 65 | if goid not in goid2color: | ||
| 66 | goid2color[goid] = color | ||
| 67 | |||
| 68 | def get_goid2color_pval(self): | ||
| 69 | """Return a go2color dict containing GO colors determined by P-value.""" | ||
| 70 |         go2color = {} | ||
| 71 | self.set_goid2color_pval(go2color) | ||
| 72 | color_dflt = self.alpha2col[1.000] | ||
| 73 | for goid in self.go2res: | ||
| 74 | if goid not in go2color: | ||
| 75 | go2color[goid] = color_dflt | ||
| 76 | return go2color | ||
| 77 | |||
| 78 | View Code Duplication | def _get_item_str(self, res): | |
| 79 | """Return genes in any of these formats: | ||
| 80 | 1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 | ||
| 81 | 2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a | ||
| 82 | 3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... | ||
| 83 | """ | ||
| 84 | ipl = self.items_p_line | ||
| 85 | prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items]) | ||
| 86 | prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)] | ||
| 87 | num_items = len(prt_items) | ||
| 88 | if self.study_items_max is None: | ||
| 89 | genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) | ||
| 90 |             return "{N}) {GENES}".format(N=num_items, GENES=genestr) | ||
| 91 | else: | ||
| 92 | if num_items <= self.study_items_max: | ||
| 93 | gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] | ||
| 94 | genestr = "\n".join(gene_lines) | ||
| 95 | return genestr | ||
| 96 | else: | ||
| 97 | short_list = prt_items[:self.study_items_max] | ||
| 98 | short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)] | ||
| 99 | short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult] | ||
| 100 | short_str = "\n".join(short_lines) | ||
| 101 |                 return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) | ||
| 102 | |||
| 103 | def _get_genestr(self, itemid): | ||
| 104 | """Given a geneid, return the string geneid or a gene symbol.""" | ||
| 105 | if itemid in self.id2symbol: | ||
| 106 | symbol = self.id2symbol[itemid] | ||
| 107 | if symbol is not None: | ||
| 108 | return symbol | ||
| 109 | if isinstance(itemid, int): | ||
| 110 | return str(itemid) | ||
| 111 | return itemid | ||
| 112 | |||
| 113 | |||
| 114 | def _init_pval_name(self, **kws): | ||
| 115 | """Initialize pvalue attribute name.""" | ||
| 116 | if 'pval_name' in kws: | ||
| 117 | return kws['pval_name'] | ||
| 118 | # If go2res contains GO Terms | ||
| 119 | if self.is_goterm: | ||
| 120 |             return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name()) | ||
| 121 | # If go2res contains GO namedtuples | ||
| 122 | for fld in next(iter(self.go2res.values()))._fields: | ||
| 123 | if fld[:2] == 'p_' and fld != 'p_uncorrected': | ||
| 124 | return fld | ||
| 125 | |||
| 126 | def _init_study_items_max(self): | ||
| 127 | """User can limit the number of genes printed in a GO term.""" | ||
| 128 | if self.study_items is None: | ||
| 129 | return None | ||
| 130 | if self.study_items is True: | ||
| 131 | return None | ||
| 132 | if isinstance(self.study_items, int): | ||
| 133 | return self.study_items | ||
| 134 | return None | ||
| 135 | |||
| 137 |