| Total Complexity | 47 |
| Total Lines | 125 |
| Duplicated Lines | 26.4 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like GoeaResults often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | """Manages GO Term fill colors and bordercolors.""" |
||
| 10 | class GoeaResults(object): |
||
| 11 | """Manages GOEA Results for plotting.""" |
||
| 12 | |||
| 13 | kws_set = set(['id2symbol', 'study_items', 'items_p_line ', 'pval_name']) |
||
| 14 | |||
| 15 | dflt_items_p_line = 5 # study items (e.g., genes) per line on GO Terms |
||
| 16 | fmtres = "{study_count} genes" |
||
| 17 | |||
| 18 | alpha2col = cx.OrderedDict([ |
||
| 19 | # Enriched GOEA GO terms that are significant |
||
| 20 | (0.005, 'mistyrose'), |
||
| 21 | (0.010, 'moccasin'), |
||
| 22 | (0.050, 'lemonchiffon1'), |
||
| 23 | # GOEA GO terms that are not significant |
||
| 24 | (1.000, 'grey95'), |
||
| 25 | ]) |
||
| 26 | |||
| 27 | def __init__(self, goea_results, **kws): |
||
| 28 | # kws: goea_results or go2nt |
||
| 29 | assert goea_results, "NO GOEA RESULTS IN GoeaResults INPUTS" |
||
| 30 | # GOATOOLs results as objects (WAS: Kws goea_results go2nt) |
||
| 31 | self.go2res = {r.GO: r for r in goea_results} |
||
| 32 | self.is_goterm = hasattr(goea_results[0], "_fldsdefprt") |
||
| 33 | # GOATOOLs results as a list of namedtuples |
||
| 34 | self.pval_name = self._init_pval_name(**kws) |
||
| 35 | self.study_items = kws.get('study_items', None) |
||
| 36 | self.study_items_max = self._init_study_items_max() |
||
| 37 | self.items_p_line = kws['items_p_line'] if 'items_p_line' in kws else self.dflt_items_p_line |
||
| 38 | self.id2symbol = kws['id2symbol'] if 'id2symbol' in kws else {} |
||
| 39 | |||
| 40 | def prt_summary(self, prt=sys.stdout): |
||
| 41 | """Print summary of GOEA plotting object.""" |
||
| 42 | desc = "NtGoeaResults" if self.is_goterm else "namedtuple" |
||
| 43 | prt.write("{N} GOEA results from {O}. P-values stored in {P}.\n".format( |
||
| 44 | N=len(self.go2res), O=desc, P=self.pval_name)) |
||
| 45 | |||
| 46 | def get_study_txt(self, goid): |
||
| 47 | """Get GO text from GOEA study.""" |
||
| 48 | if goid in self.go2res: |
||
| 49 | res = self.go2res[goid] |
||
| 50 | if res.study_items is not None: |
||
| 51 | return self._get_item_str(res) |
||
| 52 | else: |
||
| 53 | return self.fmtres.format(study_count=res.study_count) |
||
| 54 | |||
| 55 | def set_goid2color_pval(self, goid2color): |
||
| 56 | """Fill missing colors based on p-value of an enriched GO term.""" |
||
| 57 | alpha2col = self.alpha2col |
||
| 58 | View Code Duplication | if self.pval_name is not None: |
|
|
|
|||
| 59 | pval_name = self.pval_name |
||
| 60 | for goid, res in self.go2res.items(): |
||
| 61 | pval = getattr(res, pval_name, None) |
||
| 62 | if pval is not None: |
||
| 63 | for alpha, color in alpha2col.items(): |
||
| 64 | if pval <= alpha and res.study_count != 0: |
||
| 65 | if goid not in goid2color: |
||
| 66 | goid2color[goid] = color |
||
| 67 | |||
| 68 | def get_goid2color_pval(self): |
||
| 69 | """Return a go2color dict containing GO colors determined by P-value.""" |
||
| 70 | go2color = {} |
||
| 71 | self.set_goid2color_pval(go2color) |
||
| 72 | color_dflt = self.alpha2col[1.000] |
||
| 73 | for goid in self.go2res: |
||
| 74 | if goid not in go2color: |
||
| 75 | go2color[goid] = color_dflt |
||
| 76 | return go2color |
||
| 77 | |||
| 78 | View Code Duplication | def _get_item_str(self, res): |
|
| 79 | """Return genes in any of these formats: |
||
| 80 | 1. 19264, 17319, 12520, 12043, 74131, 22163, 12575 |
||
| 81 | 2. Ptprc, Mif, Cd81, Bcl2, Sash3, Tnfrsf4, Cdkn1a |
||
| 82 | 3. 7: Ptprc, Mif, Cd81, Bcl2, Sash3... |
||
| 83 | """ |
||
| 84 | ipl = self.items_p_line |
||
| 85 | prt_items = sorted([self._get_genestr(itemid) for itemid in res.study_items]) |
||
| 86 | prt_multiline = [prt_items[i:i+ipl] for i in range(0, len(prt_items), ipl)] |
||
| 87 | num_items = len(prt_items) |
||
| 88 | if self.study_items_max is None: |
||
| 89 | genestr = "\n".join([", ".join(str(e) for e in sublist) for sublist in prt_multiline]) |
||
| 90 | return "{N}) {GENES}".format(N=num_items, GENES=genestr) |
||
| 91 | else: |
||
| 92 | if num_items <= self.study_items_max: |
||
| 93 | gene_lines = [", ".join(str(e) for e in sublist) for sublist in prt_multiline] |
||
| 94 | genestr = "\n".join(gene_lines) |
||
| 95 | return genestr |
||
| 96 | else: |
||
| 97 | short_list = prt_items[:self.study_items_max] |
||
| 98 | short_mult = [short_list[i:i+ipl] for i in range(0, len(short_list), ipl)] |
||
| 99 | short_lines = [", ".join(str(e) for e in sublist) for sublist in short_mult] |
||
| 100 | short_str = "\n".join(short_lines) |
||
| 101 | return "".join(["{N} genes; ".format(N=num_items), short_str, "..."]) |
||
| 102 | |||
| 103 | def _get_genestr(self, itemid): |
||
| 104 | """Given a geneid, return the string geneid or a gene symbol.""" |
||
| 105 | if itemid in self.id2symbol: |
||
| 106 | symbol = self.id2symbol[itemid] |
||
| 107 | if symbol is not None: |
||
| 108 | return symbol |
||
| 109 | if isinstance(itemid, int): |
||
| 110 | return str(itemid) |
||
| 111 | return itemid |
||
| 112 | |||
| 113 | |||
| 114 | def _init_pval_name(self, **kws): |
||
| 115 | """Initialize pvalue attribute name.""" |
||
| 116 | if 'pval_name' in kws: |
||
| 117 | return kws['pval_name'] |
||
| 118 | # If go2res contains GO Terms |
||
| 119 | if self.is_goterm: |
||
| 120 | return "p_{M}".format(M=next(iter(self.go2res.values())).get_method_name()) |
||
| 121 | # If go2res contains GO namedtuples |
||
| 122 | for fld in next(iter(self.go2res.values()))._fields: |
||
| 123 | if fld[:2] == 'p_' and fld != 'p_uncorrected': |
||
| 124 | return fld |
||
| 125 | |||
| 126 | def _init_study_items_max(self): |
||
| 127 | """User can limit the number of genes printed in a GO term.""" |
||
| 128 | if self.study_items is None: |
||
| 129 | return None |
||
| 130 | if self.study_items is True: |
||
| 131 | return None |
||
| 132 | if isinstance(self.study_items, int): |
||
| 133 | return self.study_items |
||
| 134 | return None |
||
| 135 | |||
| 137 |