Completed
Push — master ( ca146f...1b2584 )
by
unknown
53s
created

mapslim()   C

Complexity

Conditions 8

Size

Total Lines 49

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
dl 0
loc 49
rs 6.8024
c 0
b 0
f 0
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
4
""" Maps gene associations to a 'slim' ontology.
5
6
    This roughly implements the functionality of the perl script map2slim.
7
    See: [http://search.cpan.org/~cmungall/go-perl/scripts/map2slim]
8
9
    For a description of GO Slims look here:
10
    http://geneontology.org/GO.slims.shtml
11
12
    For now this does not implement Bucket Terms.
13
"""
14
15
from .obo_parser import GODag
16
17
18
def mapslim(go_term, go_dag, goslim_dag):
19
    """ Maps a GO term (accession) to it's GO slim terms.
20
21
        Parameters:
22
        - go_term: the accession to be mapped to the slim terms
23
        - go_dag: the (full) Gene Ontology DAG
24
        - goslim_dag: the GO Slim DAG
25
26
        Returns:
27
            Two sets:
28
            direct_ancestors, all_ancestors
29
        - direct_ancestors: The direct ancestors of the given term that are in
30
                            the GO Slim. Those are the terms that are not
31
                            covered by earlier ancestors of the GO Slims in
32
                            _any_ path (from bottom to top).
33
        - all_ancestors:    All ancestors of the given term that are part of
34
                            the GO-Slim terms.
35
36
    """
37
    # check parameters
38
    if not isinstance(go_dag, GODag):
39
        raise TypeError("go_dag must be an instance of GODag")
40
    if not isinstance(goslim_dag, GODag):
41
        raise TypeError("goslim_dag must be an instance of GODag")
42
    if go_term not in go_dag:
43
        raise ValueError("go_term must be an accession that is in the go_dag")
44
45
    all_ancestors = set()
46
    covered_ancestors = set()
47
48
    # get all paths for the term in the go_dag
49
    paths = go_dag.paths_to_top(go_term)
50
    for path in paths:
51
        # the next loop needs to run bottom->up, i.e. from the go_term item to
52
        # the root, thus we need to reverse the list prior to iteration
53
        path.reverse()
54
55
        got_leaf = False
56
        for term in path:
57
            if term.id in goslim_dag:
58
                all_ancestors.add(term.id)
59
                if got_leaf:
60
                    covered_ancestors.add(term.id)
61
                got_leaf = True
62
63
    # get the direct ancestors, i.e. those that are not covered by a earlier
64
    # ancestor of the GO-Slim in _any_ path (in bottom->top order)
65
    direct_ancestors = all_ancestors - covered_ancestors
66
    return direct_ancestors, all_ancestors
67