Completed
Push — master ( b3af87...c31a95 )
by
unknown
52s
created

_Run.get_gosubdag_r0()   A

Complexity

Conditions 1

Size

Total Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 9
rs 9.95
1
#!/usr/bin/env python
2
"""Ancestors/Descendants."""
3
4
import os
5
import sys
6
import timeit
7
import numpy as np
8
from numpy.random import shuffle
9
from scipy import stats
10
11
from goatools.base import download_go_basic_obo
12
from goatools.obo_parser import GODag
13
from goatools.test_data.godag_timed import prt_hms
14
from goatools.gosubdag.gosubdag import GoSubDag
15
16
17
def test_go_pools():
18
    """Print a comparison of GO terms from different species in two different comparisons."""
19
    objr = _Run()
20
    # Check that subset GoSubDags have the same ancestors/descendants as Full GoSubDag
21
    for qty in np.random.randint(10, 100, size=10):
22
        print("")
23
        goids = objr.get_goids_rand(qty)
24
        # No relationships loaded; GoSubDag subset equivalent to Full subset?
25
        gosubdag_r0 = objr.get_gosubdag_r0(goids)
26
        for goid in gosubdag_r0.go2obj:
27
            r0_u = gosubdag_r0.rcntobj.go2parents[goid]
28
            r0_d = gosubdag_r0.rcntobj.go2descendants[goid]
29
            assert r0_u == objr.gosubdag_r0.rcntobj.go2parents[goid]
30
            assert r0_d == objr.gosubdag_r0.rcntobj.go2descendants[goid]
31
        # All relationships loaded; GoSubDag(r0) vs. GoSubDag(r1)
32
        gosubdag_r1 = objr.get_gosubdag_r1(goids)
33
        assert gosubdag_r0.go_sources == gosubdag_r1.go_sources
34
        assert set(gosubdag_r0.go2obj).issubset(gosubdag_r1.go2obj)
35
        cnts = {'r0_u':[], 'r1_u':[], 'r0_d':[], 'r1_d':[]}
36
        for goid in gosubdag_r0.go2obj:
37
            r0_u = gosubdag_r0.rcntobj.go2parents[goid]
38
            r0_d = gosubdag_r0.rcntobj.go2descendants[goid]
39
            r1_u = gosubdag_r1.rcntobj.go2parents[goid]
40
            r1_d = gosubdag_r1.rcntobj.go2descendants[goid]
41
            assert r0_d.issubset(r1_d), "R1({}) R0({})".format(len(r1_d), len(r0_d))
42
            assert r0_u.issubset(r1_u), "R1({}) R0({})".format(len(r1_u), len(r0_u))
43
            cnts['r0_u'].append(len(r0_u))
44
            cnts['r1_u'].append(len(r1_u))
45
            cnts['r0_d'].append(len(r0_d))
46
            cnts['r1_d'].append(len(r1_d))
47
        objr.prt_cnts(cnts)
48
49
50
class _Run(object):
51
    """Group entire go-basic.obo"""
52
53
    obo = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../go-basic.obo")
54
55
    def __init__(self):
56
        download_go_basic_obo(self.obo, sys.stdout, loading_bar=None)
57
        self.godag_r0 = GODag("go-basic.obo")
58
        self.godag_r1 = GODag("go-basic.obo", optional_attrs=set(['relationship']))
59
        self.goids = list(set(o.id for o in self.godag_r0.values()))
60
        # GoSubDag (plain)
61
        tic = timeit.default_timer()
62
        self.gosubdag_r0 = GoSubDag(self.goids, self.godag_r0, prt=None)
63
        prt_hms(tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
64
            N=len(self.gosubdag_r0.go2obj), S=len(self.gosubdag_r0.go_sources)))
65
        # GoSubDag with relationships
66
        self.gosubdag_r1 = GoSubDag(self.goids, self.godag_r1, prt=None, relationships=True)
67
        prt_hms(tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
68
            N=len(self.gosubdag_r1.go2obj), S=len(self.gosubdag_r1.go_sources)))
69
70
    def prt_cnts(self, cnts):
71
        """Compare ancestor/descendant counts with relatives=False/True."""
72
        k2v = {k:self.str_stats(v) for k, v in cnts.items()}
73
        print(k2v)
74
75
    @staticmethod
76
    def str_stats(vals):
77
        """Print statistics on values."""
78
        ntd = stats.describe(vals)
79
        std = int(round(np.sqrt(ntd.variance)))
80
        return "({m} {M}) STD={STD:,}".format(m=ntd.minmax[0], M=ntd.minmax[1], STD=std)
81
82
    def get_gosubdag_r0(self, goids):
83
        """Return a GoSubDag with N randomly chosen GO sources."""
84
        tic = timeit.default_timer()
85
        gosubdag = GoSubDag(goids, self.godag_r0, relationships=None,
86
                            #rcntobj=self.gosubdag_r0.rcntobj,
87
                            prt=None)
88
        prt_hms(tic, "GoSubDag r0 {N:4} GOs {S:3} srcs".format(
89
            N=len(gosubdag.go2obj), S=len(gosubdag.go_sources)))
90
        return gosubdag
91
92
    def get_gosubdag_r1(self, goids):
93
        """Return a GoSubDag with N randomly chosen GO sources."""
94
        tic = timeit.default_timer()
95
        gosubdag = GoSubDag(goids, self.godag_r1, relationships=True,
96
                            #rcntobj=self.gosubdag_r1.rcntobj,
97
                            prt=None)
98
        prt_hms(tic, "GoSubDag r1 {N:4} GOs {S:3} srcs".format(
99
            N=len(gosubdag.go2obj), S=len(gosubdag.go_sources)))
100
        return gosubdag
101
102
    def get_goids_rand(self, qty):
103
        """Return N randomly chosen GO IDs."""
104
        shuffle(self.goids)
105
        return self.goids[:qty]
106
107
108
if __name__ == '__main__':
109
    test_go_pools()
110