Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

_test_path_bp_mf()   B

Complexity

Conditions 8

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
dl 0
loc 24
rs 7.3333
c 0
b 0
f 0
1
"""Semantic Similarity test for Issue #86.
2
3
https://github.com/tanghaibao/goatools/issues/86
4
5
semantic_similarity & resnik_sim works for few entities but it's giving an error:
6
    return max(common_parent_go_ids(terms, go), key=lambda t: go[t].depth)
7
        ValueError: max() arg is an empty sequence
8
9
It issues this error when these is no common parent in both provided
10
entities/genes. Here is one example producing this error
11
    semantic_similarity(GO:0003676, GO:0007516, godag)
12
13
"""
14
15
import os
16
import sys
17
from goatools.base import get_godag
18
from goatools.associations import dnld_assc
19
from goatools.semantic import semantic_distance, semantic_similarity, TermCounts
20
from goatools.semantic import resnik_sim, lin_sim
21
22
23
def test_top_parent(prt=sys.stdout):
24
    """Semantic Similarity test for Issue #86."""
25
    fin_obo = "data/i86.obo"
26
    branch_dist = 5
27
    repo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")
28
    godag = get_godag(os.path.join(repo, fin_obo))
29
    # Get all the annotations from arabidopsis.
30
31
    # Calculate the semantic distance and semantic similarity:
32
    _test_path_same(godag, prt)
33
    _test_path_parallel(godag, prt)
34
    _test_path_bp_mf(branch_dist, godag, prt)
35
    sys.stdout.write("TESTS PASSed: similarity_top_parent\n")
36
37
def _test_path_bp_mf(branch_dist, godag, prt):
38
    """Test distances between BP branch and MF branch."""
39
    go_mf = 'GO:0003676' # level-03 depth-03 nucleic acid binding [molecular_function]
40
    go_bp = 'GO:0007516' # level-04 depth-05 hemocyte development [biological_process]
41
    dst_none = semantic_distance(go_mf, go_bp, godag)
42
    sim_none = semantic_similarity(go_mf, go_bp, godag)
43
    assc = dnld_assc("gene_association.tair", godag)
44
    termcounts = TermCounts(godag, assc)
45
    fmt = '({GO1}, {GO2}) {TYPE:6} score = {VAL}\n'
46
    sim_r = resnik_sim(go_mf, go_bp, godag, termcounts)
47
    sim_l = lin_sim(go_mf, go_bp, godag, termcounts)
48
    if prt is not None:
49
        prt.write(fmt.format(TYPE='semantic distance', GO1=go_mf, GO2=go_bp, VAL=dst_none))
50
        prt.write(fmt.format(TYPE='semantic similarity', GO1=go_mf, GO2=go_bp, VAL=sim_none))
51
        prt.write(fmt.format(TYPE='Resnik similarity', GO1=go_mf, GO2=go_bp, VAL=sim_r))
52
        prt.write(fmt.format(TYPE='Lin similarity', GO1=go_mf, GO2=go_bp, VAL=sim_l))
53
    assert dst_none is None
54
    assert sim_none is None
55
    assert sim_r is None
56
    assert sim_l is None
57
    sim_d = semantic_distance(go_mf, go_bp, godag, branch_dist)
58
    if prt is not None:
59
        prt.write(fmt.format(TYPE='semantic distance', GO1=go_mf, GO2=go_bp, VAL=sim_d))
60
    assert sim_d == godag[go_mf].depth + godag[go_bp].depth + branch_dist
61
62
def _test_path_parallel(godag, prt):
63
    """Test distances between GO IDs on parallel branches."""
64
    goid_bottom = 'GO:0007516' # BP level-04 depth-05 hemocyte development
65
    # Test distances up a parallel branch
66
    goids = [
67
        'GO:0044763',  # BP level-02 depth-02 single-organism cellular process
68
        'GO:0008219',  # BP level-03 depth-03 cell death
69
        'GO:0070997',  # BP level-04 depth-04 neuron death
70
        'GO:0036475',  # BP level-05 depth-05 neuron death in response to oxidative stress
71
        'GO:0036476']  # BP level-06 depth-06 neuron death in response to hydrogen peroxide
72
    fmt = '{DST} semantic_distance between {GO1} and {GO2} on parallel branches\n'
73
    for dst_exp, goid in enumerate(goids, 3):
74
        dst_act = semantic_distance(goid_bottom, goid, godag)
75
        if prt is not None:
76
            prt.write(fmt.format(DST=dst_act, GO1=goid_bottom, GO2=goid))
77
        assert dst_act == dst_exp
78
79
80
def _test_path_same(godag, prt):
81
    """Test distances btween GO IDs on the same path."""
82
    goid_bottom = 'GO:0007516' # level-04 depth-05 hemocyte development [biological_process]
83
    # Test distances up the same branch
84
    goids_bp = [
85
        'GO:0008150', # level-00 depth-00 biological_process [biological_process]
86
        'GO:0009987', # level-01 depth-01 cellular process [biological_process]
87
        'GO:0044763', # level-02 depth-02 single-organism cellular process [biological_process]
88
        'GO:0048869', # level-03 depth-03 cellular developmental process [biological_process]
89
        'GO:0048468'] # level-03 depth-04 cell development [biological_process]
90
    fmt = '{DST} semantic_distance for {GO1} and {GO2} on the same branch\n'
91
    for dst_exp, goid in enumerate(reversed(goids_bp), 1):
92
        dst_act = semantic_distance(goid_bottom, goid, godag)
93
        if prt is not None:
94
            prt.write(fmt.format(DST=dst_act, GO1=goid_bottom, GO2=goid))
95
        assert dst_act == dst_exp
96
97
if __name__ == '__main__':
98
    PRT = None if len(sys.argv) != 1 else sys.stdout
99
    test_top_parent(PRT)
100