| Conditions | 13 |
| Total Lines | 155 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 3 | ||
| Bugs | 0 | Features | 0 |
Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.
For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.
Commonly applied refactorings include:
If many parameters/temporary variables are present:
Complex classes like test_example() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python |
||
| 43 | @pytest.mark.skip(reason="requires pydot - works in py2.7 but not py3.4 and 3.5") |
||
| 44 | def test_example(log=sys.stdout): |
||
| 45 | """Run Gene Ontology Enrichment Analysis (GOEA) on Nature data.""" |
||
| 46 | # -------------------------------------------------------------------- |
||
| 47 | # -------------------------------------------------------------------- |
||
| 48 | # Gene Ontology Enrichment Analysis (GOEA) |
||
| 49 | # -------------------------------------------------------------------- |
||
| 50 | # -------------------------------------------------------------------- |
||
| 51 | taxid = 10090 # Mouse study |
||
| 52 | # Load ontologies, associations, and population ids |
||
| 53 | geneids_pop = GeneID2nt_mus.keys() |
||
| 54 | geneids_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx") |
||
| 55 | goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid) |
||
| 56 | # Run GOEA on study |
||
| 57 | #keep_if = lambda nt: getattr(nt, "p_fdr_bh" ) < 0.05 # keep if results are significant |
||
| 58 | goea_results_all = goeaobj.run_study(geneids_study) |
||
| 59 | goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05] |
||
| 60 | compare_results(goea_results_all) |
||
| 61 | geneids = get_study_items(goea_results_sig) |
||
| 62 | # Print GOEA results to files |
||
| 63 | goeaobj.wr_xlsx("nbt3102.xlsx", goea_results_sig) |
||
| 64 | goeaobj.wr_txt("nbt3102_sig.txt", goea_results_sig) |
||
| 65 | goeaobj.wr_txt("nbt3102_all.txt", goea_results_all) |
||
| 66 | # Plot all significant GO terms w/annotated study info (large plots) |
||
| 67 | #plot_results("nbt3102_{NS}.png", goea_results_sig) |
||
| 68 | #plot_results("nbt3102_{NS}_sym.png", goea_results_sig, study_items=5, items_p_line=2, id2symbol=geneids_study) |
||
| 69 | |||
| 70 | |||
| 71 | |||
| 72 | # -------------------------------------------------------------------- |
||
| 73 | # -------------------------------------------------------------------- |
||
| 74 | # Further examination of GOEA results... |
||
| 75 | # -------------------------------------------------------------------- |
||
| 76 | # -------------------------------------------------------------------- |
||
| 77 | obo = goeaobj.obo_dag |
||
| 78 | dpi = 150 # For review: Figures can be saved in .jpg, .gif, .tif or .eps, at 150 dpi |
||
| 79 | |||
| 80 | |||
| 81 | # -------------------------------------------------------------------- |
||
| 82 | # Item 1) Words in GO names associated with large numbers of study genes |
||
| 83 | # -------------------------------------------------------------------- |
||
| 84 | # What GO term words are associated with the largest number of study genes? |
||
| 85 | prt_word2genecnt("nbt3102_genecnt_GOword.txt", goea_results_sig, log) |
||
| 86 | # Curated selection of GO words associated with large numbers of study genes |
||
| 87 | freq_seen = ['RNA', 'translation', 'mitochondr', 'ribosom', # 'ribosomal', 'ribosome', |
||
| 88 | 'adhesion', 'endoplasmic', 'nucleotide', 'apoptotic', 'myelin'] |
||
| 89 | # Collect the GOs which contains the chosen frequently seen words |
||
| 90 | word2NS2gos = get_word2NS2gos(freq_seen, goea_results_sig) |
||
| 91 | go2res = {nt.GO:nt for nt in goea_results_sig} |
||
| 92 | # Print words of interest, the sig GO terms which contain that word, and study genes. |
||
| 93 | prt_word_GO_genes("nbt3102_GO_word_genes.txt", word2NS2gos, go2res, geneids_study, log) |
||
| 94 | # Plot each set of GOs along w/study gene info |
||
| 95 | for word, NS2gos in word2NS2gos.items(): |
||
| 96 | for NS in ['BP', 'MF', 'CC']: |
||
| 97 | if NS in NS2gos: |
||
| 98 | gos = NS2gos[NS] |
||
| 99 | goid2goobj = {go:go2res[go].goterm for go in gos} |
||
| 100 | # dpi: 150 for review, 1200 for publication |
||
| 101 | #dpis = [150, 1200] if word == "RNA" else [150] |
||
| 102 | dpis = [150] |
||
| 103 | for dpi in dpis: |
||
| 104 | fmts = ['png', 'tif', 'eps'] if word == "RNA" else ['png'] |
||
| 105 | for fmt in fmts: |
||
| 106 | plot_goid2goobj( |
||
| 107 | "nbt3102_{WORD}_{NS}_dpi{DPI}.{FMT}".format(WORD=word, NS=NS, DPI=dpi, FMT=fmt), |
||
| 108 | goid2goobj, # source GOs and their GOTerm object |
||
| 109 | items_p_line=3, |
||
| 110 | study_items=6, # Max number of gene symbols to print in each GO term |
||
| 111 | id2symbol=geneids_study, # Contains GeneID-to-Symbol |
||
| 112 | goea_results=goea_results_all, # pvals used for GO Term coloring |
||
| 113 | dpi=dpi) |
||
| 114 | |||
| 115 | |||
| 116 | # -------------------------------------------------------------------- |
||
| 117 | # Item 2) Explore findings of Nature paper: |
||
| 118 | # |
||
| 119 | # Gene Ontology (GO) enrichment analysis showed that the |
||
| 120 | # differentially expressed genes contained statistically |
||
| 121 | # significant enrichments of genes involved in |
||
| 122 | # glycolysis, |
||
| 123 | # cellular response to IL-4 stimulation and |
||
| 124 | # positive regulation of B-cell proliferation |
||
| 125 | # -------------------------------------------------------------------- |
||
| 126 | goid_subset = [ |
||
| 127 | 'GO:0006096', # BP 4.24e-12 10 glycolytic process |
||
| 128 | 'GO:0071353', # BP 7.45e-06 5 cellular response to interleukin-4 |
||
| 129 | 'GO:0030890', # BP 8.22e-07 7 positive regulation of B cell proliferation |
||
| 130 | ] |
||
| 131 | plot_gos("nbt3102_GOs.png", goid_subset, obo, dpi=dpi) |
||
| 132 | plot_gos("nbt3102_GOs_genecnt.png", goid_subset, obo, goea_results=goea_results_all, dpi=dpi) |
||
| 133 | plot_gos("nbt3102_GOs_genelst.png", goid_subset, obo, |
||
| 134 | study_items=True, goea_results=goea_results_all, dpi=dpi) |
||
| 135 | plot_gos("nbt3102_GOs_symlst.png", goid_subset, obo, |
||
| 136 | study_items=True, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) |
||
| 137 | plot_gos("nbt3102_GOs_symlst_trunc.png", goid_subset, obo, |
||
| 138 | study_items=5, id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) |
||
| 139 | plot_gos("nbt3102_GOs_GO0005743.png", ["GO:0005743"], obo, |
||
| 140 | items_p_line=2, study_items=6, |
||
| 141 | id2symbol=geneids_study, goea_results=goea_results_all, dpi=dpi) |
||
| 142 | |||
| 143 | # -------------------------------------------------------------------- |
||
| 144 | # Item 3) Create one GO sub-plot per significant GO term from study |
||
| 145 | # -------------------------------------------------------------------- |
||
| 146 | for rec in goea_results_sig: |
||
| 147 | png = "nbt3102_{NS}_{GO}.png".format(GO=rec.GO.replace(':', '_'), NS=rec.NS) |
||
| 148 | goid2goobj = {rec.GO:rec.goterm} |
||
| 149 | plot_goid2goobj(png, |
||
| 150 | goid2goobj, # source GOs and their GOTerm object |
||
| 151 | study_items=15, # Max number of gene symbols to print in each GO term |
||
| 152 | id2symbol=geneids_study, # Contains GeneID-to-Symbol |
||
| 153 | goea_results=goea_results_all, # pvals used for GO Term coloring |
||
| 154 | dpi=dpi) |
||
| 155 | |||
| 156 | # -------------------------------------------------------------------- |
||
| 157 | # Item 4) Explore using manually curated lists of GO terms |
||
| 158 | # -------------------------------------------------------------------- |
||
| 159 | goid_subset = [ |
||
| 160 | 'GO:0030529', # CC D03 intracellular ribonucleoprotein complex (42 genes) |
||
| 161 | 'GO:0015934', # CC D05 large ribosomal subunit (4 genes) |
||
| 162 | 'GO:0015935', # CC D05 small ribosomal subunit (13 genes) |
||
| 163 | 'GO:0022625', # CC D06 cytosolic large ribosomal subunit (16 genes) |
||
| 164 | 'GO:0022627', # CC D06 cytosolic small ribosomal subunit (19 genes) |
||
| 165 | 'GO:0036464', # CC D06 cytoplasmic ribonucleoprotein granule (4 genes) |
||
| 166 | 'GO:0005840', # CC D05 ribosome (35 genes) |
||
| 167 | 'GO:0005844', # CC D04 polysome (6 genes) |
||
| 168 | ] |
||
| 169 | plot_gos("nbt3102_CC_ribosome.png", goid_subset, obo, |
||
| 170 | study_items=6, id2symbol=geneids_study, items_p_line=3, |
||
| 171 | goea_results=goea_results_sig, dpi=dpi) |
||
| 172 | |||
| 173 | goid_subset = [ |
||
| 174 | 'GO:0003723', # MF D04 RNA binding (32 genes) |
||
| 175 | 'GO:0044822', # MF D05 poly(A) RNA binding (86 genes) |
||
| 176 | 'GO:0003729', # MF D06 mRNA binding (11 genes) |
||
| 177 | 'GO:0019843', # MF D05 rRNA binding (6 genes) |
||
| 178 | 'GO:0003746', # MF D06 translation elongation factor activity (5 genes) |
||
| 179 | ] |
||
| 180 | plot_gos("nbt3102_MF_RNA_genecnt.png", |
||
| 181 | goid_subset, |
||
| 182 | obo, |
||
| 183 | goea_results=goea_results_all, dpi=150) |
||
| 184 | for dpi in [150, 1200]: # 150 for review, 1200 for publication |
||
| 185 | plot_gos("nbt3102_MF_RNA_dpi{DPI}.png".format(DPI=dpi), |
||
| 186 | goid_subset, |
||
| 187 | obo, |
||
| 188 | study_items=6, id2symbol=geneids_study, items_p_line=3, |
||
| 189 | goea_results=goea_results_all, dpi=dpi) |
||
| 190 | |||
| 191 | # -------------------------------------------------------------------- |
||
| 192 | # Item 5) Are any significant geneids related to cell cycle? |
||
| 193 | # -------------------------------------------------------------------- |
||
| 194 | import test_genes_cell_cycle as CC |
||
| 195 | genes_cell_cycle = CC.get_genes_cell_cycle(taxid, log=log) |
||
| 196 | genes_cell_cycle_sig = genes_cell_cycle.intersection(geneids) |
||
| 197 | CC.prt_genes("nbt3102_cell_cycle.txt", genes_cell_cycle_sig, taxid, log=None) |
||
| 198 | |||
| 363 |