Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

test_gpad_read()   B

Complexity

Conditions 8

Size

Total Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 8
dl 0
loc 25
rs 7.3333
c 2
b 0
f 0
1
#!/usr/bin/env python
2
"""Test reading GPAD files from Gene Ontology Annotation (GOA) resource http://www.ebi.ac.uk/GOA."""
3
4
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
5
__author__ = "DV Klopfenstein"
6
7
import os
8
import sys
9
import collections as cx
10
from goatools.anno.dnld_ebi_goa import DnldGoa
11
from goatools.anno.gpad_reader import GpadReader
12
13
def test_gpad_read(run_desc="mouse", prt=sys.stdout):
14
    """Test reading GPAD files from GOA source http://www.ebi.ac.uk/GOA."""
15
    objdnld = DnldGoa()
16
    species2gpad = _dnld_gpad(objdnld, run_desc)
17
    # Count Annotation Extension Relations across all species
18
    relations = cx.Counter()
19
    pat = "{N:8,} of {M:8,} {P:5.2f}% associations have Annotation Extensions in {ORG}\n"
20
    for org, gpad_file in sorted(species2gpad.items()):
21
        orgstr = "{ORG} {GPAD}".format(ORG=org, GPAD=os.path.basename(gpad_file))
22
        prt.write("\n{GPAD}\n".format(GPAD=orgstr))
23
        objgpad = GpadReader(gpad_file)
24
        for ntgpad in objgpad.associations:
25
            # Assertions are present in the GPAD reader class
26
            if ntgpad.Extension:
27
                relations += ntgpad.Extension.get_relations_cnt()
28
        num_ext = len([nt for nt in objgpad.associations if nt.Extension is not None])
29
        # The Extensions field is new in GPAD
30
        prt.write(pat.format(N=num_ext, M=objgpad.qty, P=100.*num_ext/objgpad.qty, ORG=org))
31
        for rel, cnt in objgpad.get_relation_cnt().most_common():
32
            prt.write("    {C:6,} {R}\n".format(C=cnt, R=rel))
33
34
    prt.write("\n{N} Annotation Extensions Relations found among all species:\n".format(
35
        N=len(relations)))
36
    for rel, cnt in relations.most_common():
37
        prt.write("{C:10,} {R}\n".format(C=cnt, R=rel))
38
39
def _dnld_gpad(objdnld, run_desc):
40
    """Return list of downloaded files."""
41
    species2gpad = {}
42
    species_cur = set(s for s in objdnld.species)
43
    # Run one species
44
    if run_desc in species_cur:
45
        species_cur = set([run_desc])
46
    # Uniprot is large, so skip it unless specifically asked to include it
47
    elif run_desc != "inc_uniprot":
48
        species_cur.remove('uniprot')
49
    # Download GPAD files for species
50
    for species in species_cur:
51
        species2gpad[species] = objdnld.dnld_goa(species, 'gpa', None)
52
    return species2gpad
53
54
55
if __name__ == '__main__':
56
    RUN_DESC = "not_uniprot" if len(sys.argv) == 1 else "inc_uniprot"
57
    test_gpad_read(RUN_DESC)
58
59
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
60