|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: UTF-8 -*- |
|
3
|
|
|
|
|
4
|
|
|
""" Maps gene associations to a 'slim' ontology. |
|
5
|
|
|
|
|
6
|
|
|
This roughly implements the functionality of the perl script map2slim. |
|
7
|
|
|
See: [http://search.cpan.org/~cmungall/go-perl/scripts/map2slim] |
|
8
|
|
|
|
|
9
|
|
|
For a description of GO Slims look here: |
|
10
|
|
|
http://geneontology.org/GO.slims.shtml |
|
11
|
|
|
|
|
12
|
|
|
For now this does not implement Bucket Terms. |
|
13
|
|
|
""" |
|
14
|
|
|
|
|
15
|
|
|
from .obo_parser import GODag |
|
16
|
|
|
|
|
17
|
|
|
|
|
18
|
|
|
def mapslim(go_term, go_dag, goslim_dag): |
|
19
|
|
|
""" Maps a GO term (accession) to it's GO slim terms. |
|
20
|
|
|
|
|
21
|
|
|
Parameters: |
|
22
|
|
|
- go_term: the accession to be mapped to the slim terms |
|
23
|
|
|
- go_dag: the (full) Gene Ontology DAG |
|
24
|
|
|
- goslim_dag: the GO Slim DAG |
|
25
|
|
|
|
|
26
|
|
|
Returns: |
|
27
|
|
|
Two sets: |
|
28
|
|
|
direct_ancestors, all_ancestors |
|
29
|
|
|
- direct_ancestors: The direct ancestors of the given term that are in |
|
30
|
|
|
the GO Slim. Those are the terms that are not |
|
31
|
|
|
covered by earlier ancestors of the GO Slims in |
|
32
|
|
|
_any_ path (from bottom to top). |
|
33
|
|
|
- all_ancestors: All ancestors of the given term that are part of |
|
34
|
|
|
the GO-Slim terms. |
|
35
|
|
|
|
|
36
|
|
|
""" |
|
37
|
|
|
# check parameters |
|
38
|
|
|
if not isinstance(go_dag, GODag): |
|
39
|
|
|
raise TypeError("go_dag must be an instance of GODag") |
|
40
|
|
|
if not isinstance(goslim_dag, GODag): |
|
41
|
|
|
raise TypeError("goslim_dag must be an instance of GODag") |
|
42
|
|
|
if go_term not in go_dag: |
|
43
|
|
|
raise ValueError("go_term must be an accession that is in the go_dag") |
|
44
|
|
|
|
|
45
|
|
|
all_ancestors = set() |
|
46
|
|
|
covered_ancestors = set() |
|
47
|
|
|
|
|
48
|
|
|
# get all paths for the term in the go_dag |
|
49
|
|
|
paths = go_dag.paths_to_top(go_term) |
|
50
|
|
|
for path in paths: |
|
51
|
|
|
# the next loop needs to run bottom->up, i.e. from the go_term item to |
|
52
|
|
|
# the root, thus we need to reverse the list prior to iteration |
|
53
|
|
|
path.reverse() |
|
54
|
|
|
|
|
55
|
|
|
got_leaf = False |
|
56
|
|
|
for term in path: |
|
57
|
|
|
if term.id in goslim_dag: |
|
58
|
|
|
all_ancestors.add(term.id) |
|
59
|
|
|
if got_leaf: |
|
60
|
|
|
covered_ancestors.add(term.id) |
|
61
|
|
|
got_leaf = True |
|
62
|
|
|
|
|
63
|
|
|
# get the direct ancestors, i.e. those that are not covered by a earlier |
|
64
|
|
|
# ancestor of the GO-Slim in _any_ path (in bottom->top order) |
|
65
|
|
|
direct_ancestors = all_ancestors - covered_ancestors |
|
66
|
|
|
return direct_ancestors, all_ancestors |
|
67
|
|
|
|