1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: UTF-8 -*- |
3
|
|
|
|
4
|
|
|
""" Maps gene associations to a 'slim' ontology. |
5
|
|
|
|
6
|
|
|
This roughly implements the functionality of the perl script map2slim. |
7
|
|
|
See: [http://search.cpan.org/~cmungall/go-perl/scripts/map2slim] |
8
|
|
|
|
9
|
|
|
For a description of GO Slims look here: |
10
|
|
|
http://geneontology.org/GO.slims.shtml |
11
|
|
|
|
12
|
|
|
For now this does not implement Bucket Terms. |
13
|
|
|
""" |
14
|
|
|
|
15
|
|
|
from .obo_parser import GODag |
16
|
|
|
|
17
|
|
|
|
18
|
|
|
def mapslim(go_term, go_dag, goslim_dag): |
19
|
|
|
""" Maps a GO term (accession) to it's GO slim terms. |
20
|
|
|
|
21
|
|
|
Parameters: |
22
|
|
|
- go_term: the accession to be mapped to the slim terms |
23
|
|
|
- go_dag: the (full) Gene Ontology DAG |
24
|
|
|
- goslim_dag: the GO Slim DAG |
25
|
|
|
|
26
|
|
|
Returns: |
27
|
|
|
Two sets: |
28
|
|
|
direct_ancestors, all_ancestors |
29
|
|
|
- direct_ancestors: The direct ancestors of the given term that are in |
30
|
|
|
the GO Slim. Those are the terms that are not |
31
|
|
|
covered by earlier ancestors of the GO Slims in |
32
|
|
|
_any_ path (from bottom to top). |
33
|
|
|
- all_ancestors: All ancestors of the given term that are part of |
34
|
|
|
the GO-Slim terms. |
35
|
|
|
|
36
|
|
|
""" |
37
|
|
|
# check parameters |
38
|
|
|
if not isinstance(go_dag, GODag): |
39
|
|
|
raise TypeError("go_dag must be an instance of GODag") |
40
|
|
|
if not isinstance(goslim_dag, GODag): |
41
|
|
|
raise TypeError("goslim_dag must be an instance of GODag") |
42
|
|
|
if go_term not in go_dag: |
43
|
|
|
raise ValueError("go_term must be an accession that is in the go_dag") |
44
|
|
|
|
45
|
|
|
all_ancestors = set() |
46
|
|
|
covered_ancestors = set() |
47
|
|
|
|
48
|
|
|
# get all paths for the term in the go_dag |
49
|
|
|
paths = go_dag.paths_to_top(go_term) |
50
|
|
|
for path in paths: |
51
|
|
|
# the next loop needs to run bottom->up, i.e. from the go_term item to |
52
|
|
|
# the root, thus we need to reverse the list prior to iteration |
53
|
|
|
path.reverse() |
54
|
|
|
|
55
|
|
|
got_leaf = False |
56
|
|
|
for term in path: |
57
|
|
|
if term.id in goslim_dag: |
58
|
|
|
all_ancestors.add(term.id) |
59
|
|
|
if got_leaf: |
60
|
|
|
covered_ancestors.add(term.id) |
61
|
|
|
got_leaf = True |
62
|
|
|
|
63
|
|
|
# get the direct ancestors, i.e. those that are not covered by a earlier |
64
|
|
|
# ancestor of the GO-Slim in _any_ path (in bottom->top order) |
65
|
|
|
direct_ancestors = all_ancestors - covered_ancestors |
66
|
|
|
return direct_ancestors, all_ancestors |
67
|
|
|
|