| 1 |  |  | """Semantic Similarity test for Issue #86. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | https://github.com/tanghaibao/goatools/issues/86 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | semantic_similarity & resnik_sim works for few entities but it's giving an error: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |     return max(common_parent_go_ids(terms, go), key=lambda t: go[t].depth) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |         ValueError: max() arg is an empty sequence | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | It issues this error when these is no common parent in both provided | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  | entities/genes. Here is one example producing this error | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |     semantic_similarity(GO:0003676, GO:0007516, godag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  | """ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | import os | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | import sys | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | from goatools.base import get_godag | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  | from goatools.associations import dnld_assc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | from goatools.semantic import semantic_distance, semantic_similarity, TermCounts | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | from goatools.semantic import resnik_sim, lin_sim | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | def test_top_parent(prt=sys.stdout): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     """Semantic Similarity test for Issue #86.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |     fin_obo = "data/i86.obo" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     branch_dist = 5 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |     repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     godag = get_godag(os.path.join(repo, fin_obo)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     # Get all the annotations from arabidopsis. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |     # Calculate the semantic distance and semantic similarity: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     _test_path_same(godag, prt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     _test_path_parallel(godag, prt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     _test_path_bp_mf(branch_dist, godag, prt) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |     sys.stdout.write("TESTS PASSed: similarity_top_parent\n") | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 37 |  |  | def _test_path_bp_mf(branch_dist, godag, prt): | 
            
                                                                        
                            
            
                                    
            
            
                | 38 |  |  |     """Test distances between BP branch and MF branch.""" | 
            
                                                                        
                            
            
                                    
            
            
                | 39 |  |  |     go_mf = 'GO:0003676' # level-03 depth-03 nucleic acid binding [molecular_function] | 
            
                                                                        
                            
            
                                    
            
            
                | 40 |  |  |     go_bp = 'GO:0007516' # level-04 depth-05 hemocyte development [biological_process] | 
            
                                                                        
                            
            
                                    
            
            
                | 41 |  |  |     dst_none = semantic_distance(go_mf, go_bp, godag) | 
            
                                                                        
                            
            
                                    
            
            
                | 42 |  |  |     sim_none = semantic_similarity(go_mf, go_bp, godag) | 
            
                                                                        
                            
            
                                    
            
            
                | 43 |  |  |     assc = dnld_assc("gene_association.tair", godag) | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |     termcounts = TermCounts(godag, assc) | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |     fmt = '({GO1}, {GO2}) {TYPE:6} score = {VAL}\n' | 
            
                                                                        
                            
            
                                    
            
            
                | 46 |  |  |     sim_r = resnik_sim(go_mf, go_bp, godag, termcounts) | 
            
                                                                        
                            
            
                                    
            
            
                | 47 |  |  |     sim_l = lin_sim(go_mf, go_bp, godag, termcounts) | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  |     if prt is not None: | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  |         prt.write(fmt.format(TYPE='semantic distance', GO1=go_mf, GO2=go_bp, VAL=dst_none)) | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |         prt.write(fmt.format(TYPE='semantic similarity', GO1=go_mf, GO2=go_bp, VAL=sim_none)) | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |         prt.write(fmt.format(TYPE='Resnik similarity', GO1=go_mf, GO2=go_bp, VAL=sim_r)) | 
            
                                                                        
                            
            
                                    
            
            
                | 52 |  |  |         prt.write(fmt.format(TYPE='Lin similarity', GO1=go_mf, GO2=go_bp, VAL=sim_l)) | 
            
                                                                        
                            
            
                                    
            
            
                | 53 |  |  |     assert dst_none is None | 
            
                                                                        
                            
            
                                    
            
            
                | 54 |  |  |     assert sim_none is None | 
            
                                                                        
                            
            
                                    
            
            
                | 55 |  |  |     assert sim_r is None | 
            
                                                                        
                            
            
                                    
            
            
                | 56 |  |  |     assert sim_l is None | 
            
                                                                        
                            
            
                                    
            
            
                | 57 |  |  |     sim_d = semantic_distance(go_mf, go_bp, godag, branch_dist) | 
            
                                                                        
                            
            
                                    
            
            
                | 58 |  |  |     if prt is not None: | 
            
                                                                        
                            
            
                                    
            
            
                | 59 |  |  |         prt.write(fmt.format(TYPE='semantic distance', GO1=go_mf, GO2=go_bp, VAL=sim_d)) | 
            
                                                                        
                            
            
                                    
            
            
                | 60 |  |  |     assert sim_d == godag[go_mf].depth + godag[go_bp].depth + branch_dist | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  | def _test_path_parallel(godag, prt): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     """Test distances between GO IDs on parallel branches.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |     goid_bottom = 'GO:0007516' # BP level-04 depth-05 hemocyte development | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     # Test distances up a parallel branch | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     goids = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         'GO:0044763',  # BP level-02 depth-02 single-organism cellular process | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |         'GO:0008219',  # BP level-03 depth-03 cell death | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         'GO:0070997',  # BP level-04 depth-04 neuron death | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         'GO:0036475',  # BP level-05 depth-05 neuron death in response to oxidative stress | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         'GO:0036476']  # BP level-06 depth-06 neuron death in response to hydrogen peroxide | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |     fmt = '{DST} semantic_distance between {GO1} and {GO2} on parallel branches\n' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     for dst_exp, goid in enumerate(goids, 3): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         dst_act = semantic_distance(goid_bottom, goid, godag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |         if prt is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |             prt.write(fmt.format(DST=dst_act, GO1=goid_bottom, GO2=goid)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |         assert dst_act == dst_exp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  | def _test_path_same(godag, prt): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |     """Test distances btween GO IDs on the same path.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |     goid_bottom = 'GO:0007516' # level-04 depth-05 hemocyte development [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |     # Test distances up the same branch | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |     goids_bp = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         'GO:0008150', # level-00 depth-00 biological_process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |         'GO:0009987', # level-01 depth-01 cellular process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |         'GO:0044763', # level-02 depth-02 single-organism cellular process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |         'GO:0048869', # level-03 depth-03 cellular developmental process [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |         'GO:0048468'] # level-03 depth-04 cell development [biological_process] | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |     fmt = '{DST} semantic_distance for {GO1} and {GO2} on the same branch\n' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |     for dst_exp, goid in enumerate(reversed(goids_bp), 1): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         dst_act = semantic_distance(goid_bottom, goid, godag) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |         if prt is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |             prt.write(fmt.format(DST=dst_act, GO1=goid_bottom, GO2=goid)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         assert dst_act == dst_exp | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  | if __name__ == '__main__': | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     PRT = None if len(sys.argv) != 1 else sys.stdout | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 99 |  |  |     test_top_parent(PRT) | 
            
                                                        
            
                                    
            
            
                | 100 |  |  |  |