Completed
Push — master ( 0f596f...821888 )
by
unknown
01:11
created

TopologicalSortRelationships   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 23
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 23
rs 10
wmc 6

3 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 4 1
A _init_sorted_relationship() 0 5 2
A _get_sorted_relationships() 0 8 3
1
"""Tasks for go2obj dicts."""
2
3
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved."
4
__author__ = "DV Klopfenstein"
5
6
import sys
7
import collections as cx
8
from goatools.godag.go_tasks import get_go2parents
9
from goatools.godag.go_tasks import get_go2children
10
11
12
# ------------------------------------------------------------------------------------
13
def get_sorted_relationship(goterms):
14
    """Topological sort of GO Terms w/'relationship's loaded."""
15
    return TopologicalSortRelationships(goterms).goterms_sorted
16
17
class TopologicalSortRelationships(object):
18
    """Topological sort of GO Terms w/'relationship's loaded."""
19
20
    # pylint: disable=too-few-public-methods
21
    def __init__(self, goterms):
22
        self.goterms_sorted = []
23
        self.goids_seen = set()
24
        self._init_sorted_relationship(goterms)
25
26
    def _init_sorted_relationship(self, goterms):
27
        """Topologically sort GO Terms using 'is_a' parents and 'relationship' GO IDs."""
28
        # NOTE: GODag must be loaded with 'relationship' to use this function
29
        for goterm in goterms:
30
            self._get_sorted_relationships(goterm)
31
32
    def _get_sorted_relationships(self, goterm):
33
        """Traverse GO Terms above the current GO Term. Then add current GO Term to sorted."""
34
        if goterm.id in self.goids_seen:
35
            return
36
        self.goids_seen.add(goterm.id)
37
        for goterm_upper in goterm.get_goterms_upper():
38
            self._get_sorted_relationships(goterm_upper)
39
        self.goterms_sorted.append(goterm)
40
41
42
# ------------------------------------------------------------------------------------
43
def update_association(assc_gene2gos, go2obj):
44
    """Add the GO parents of a gene's associated GO IDs to the gene's association."""
45
    # Replaces update_association in GODag
46
    goids_avail = set(go2obj)
47
    # Get all assc GO IDs that are current
48
    goid_sets = assc_gene2gos.values()
49
    goids_assoc_all = set.union(*goid_sets)
50
    goids_assoc_cur = goids_assoc_all.intersection(goids_avail)
51
    # Get the subset of GO objects in the association
52
    go2obj_assc = {go:go2obj[go] for go in goids_assoc_cur}
53
    go2parents = get_go2parents_go2obj(go2obj_assc)
54
    # Update the association: update the GO set for each gene
55
    for goids_cur in goid_sets:
56
        parents = set()
57
        for goid in goids_cur.intersection(goids_avail):
58
            parents.update(go2parents[goid])
59
        goids_cur.update(parents)
60
    goids_bad = goids_assoc_all.difference(goids_avail)
61
    if goids_bad:
62
        sys.stderr.write("{N} GO IDs NOT FOUND IN ASSOCIATION: {GOs}\n".format(
63
            N=len(goids_bad), GOs=" ".join(goids_bad)))
64
65
# ------------------------------------------------------------------------------------
66
def get_go2obj_unique(go2obj):
67
    """If GO keys point to the same GOTerm, return new go2obj w/no duplicates."""
68
    # Find the unique GO Terms that are represented for each GO in go2obj
69
    goid2gokeys = cx.defaultdict(set)
70
    for goid, goobj in go2obj.items():
71
        goid2gokeys[goobj.id].add(goid)
72
    go_unique = set()
73
    for goid, gos_seen in goid2gokeys.items():
74
        # Return main GO ID, if it is present in the go2obj keys
75
        if goid in gos_seen:
76
            go_unique.add(goid)
77
        # Otherwise return an alternate GO ID
78
        else:
79
            go_unique.add(next(iter(gos_seen)))
80
    return go_unique
81
82
# ------------------------------------------------------------------------------------
83
def get_go2parents_go2obj(go2obj):
84
    """Return go2parents (set of parent GO IDs) for all GO ID keys in go2obj."""
85
    goobjs, altgo2goobj = get_goobjs_altgo2goobj(go2obj)
86
    go2parents = get_go2parents(goobjs)
87
    add_alt_goids(go2parents, altgo2goobj)
88
    return go2parents
89
90
# ------------------------------------------------------------------------------------
91
def get_go2children_go2obj(go2obj):
92
    """Return go2children (set of child GO IDs) for all GO ID keys in go2obj."""
93
    goobjs, altgo2goobj = get_goobjs_altgo2goobj(go2obj)
94
    go2children = get_go2children(goobjs)
95
    add_alt_goids(go2children, altgo2goobj)
96
    return go2children
97
98
# ------------------------------------------------------------------------------------
99
def get_goobjs_altgo2goobj(go2obj):
100
    """Separate alt GO IDs and key GO IDs."""
101
    goobjs = set()
102
    altgo2goobj = {}
103
    for goid, goobj in go2obj.items():
104
        goobjs.add(goobj)
105
        if goid != goobj.id:
106
            altgo2goobj[goid] = goobj
107
    return goobjs, altgo2goobj
108
109
def add_alt_goids(go2values, altgo2goobj):
110
    """Add alternate source GO IDs."""
111
    for goobj_key in altgo2goobj.values():
112
        values_curr = go2values[goobj_key.id]
113
        for goid_alt in goobj_key.alt_ids:
114
            go2values[goid_alt] = values_curr
115
    return go2values
116
117
# ------------------------------------------------------------------------------------
118
def fill_main_goids(go2obj, goids):
119
    """Ensure main GO IDs are included in go2obj."""
120
    # User GO IDs (goids) may be either main GO IDs or alternate GO IDs.
121
    for goid in goids:
122
        goobj = go2obj[goid]
123
        # If a user specified an ALT GO ID and main GO ID not in go2obj:
124
        if goid != goobj.id and goobj.id not in go2obj:
125
            # Add main GO ID to go2obj
126
            go2obj[goobj.id] = goobj
127
128
def fill_altgoids(go2obj):
129
    """Given a go2obj containing key GO IDs, fill with all alternate GO IDs."""
130
    alt2obj = {altgo:goobj for goobj in go2obj.values() for altgo in goobj.alt_ids}
131
    for goid, goobj in alt2obj.items():
132
        go2obj[goid] = goobj
133
134
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
135
def fill_relationshipobjs(go2obj, relationships):
136
    """Add GO IDs to go2obj that are involved in relationships."""
137
    # Get all GO Term record objects that have relationships
138
    obj = RelationshipFill(go2obj, relationships)
139
    for goobj in go2obj.values():
140
        if goobj.relationship:
141
            obj.fill_relationshipgo2obj(goobj)
142
        if goobj.relationship_rev:
143
            obj.fill_relationshiprevgo2obj(goobj)
144
145
class RelationshipFill(object):
146
    """Fill go2obj with GO IDs in relatinships."""
147
148
    def __init__(self, go2obj, relationships):
149
        # This dict shall be augmented with higher parent/relationship GO IDs
150
        self.go2obj = go2obj
151
        # A set of relationships we would like to keep
152
        self.relationships = relationships
153
154
    def fill_relationshipgo2obj(self, goobj):
155
        """Fill go2obj with all relationship key GO IDs and their objects."""
156
        for reltyp, relgoobjs in goobj.relationship.items():
157
            if reltyp in self.relationships:
158
                for relgoobj in relgoobjs:
159
                    if relgoobj.id not in self.go2obj:
160
                        self.go2obj[relgoobj.id] = relgoobj
161
                        self.fill_relationshipgo2obj(relgoobj)
162
163
    def fill_relationshiprevgo2obj(self, goobj):
164
        """Fill go2obj with all relationship key GO IDs and their objects."""
165
        for reltyp, relgoobjs in goobj.relationship_rev.items():
166
            if reltyp in self.relationships:
167
                for relgoobj in relgoobjs:
168
                    if relgoobj.id not in self.go2obj:
169
                        self.go2obj[relgoobj.id] = relgoobj
170
                        self.fill_relationshiprevgo2obj(relgoobj)
171
172
# ------------------------------------------------------------------------------------
173
def get_child_objs(parent_obj):
174
    """Fill child2obj with all child key and alt GO IDs and their objects."""
175
    child2obj = {}
176
    fill_childgoid2obj(child2obj, parent_obj)
177
    fill_altgoids(child2obj)
178
    return child2obj
179
180
def fill_childgoid2obj(childgoid2obj, parent_obj):
181
    """Fill childgoid2obj with all child key GO IDs and their objects."""
182
    for child_obj in parent_obj.children:
183
        if child_obj.id not in childgoid2obj:
184
            childgoid2obj[child_obj.id] = child_obj
185
            fill_childgoid2obj(childgoid2obj, child_obj)
186
187
# ------------------------------------------------------------------------------------
188
def get_leaf_children(gos_user, go2obj_arg):
189
    """Find all the GO descendants under all user GO IDs. Return leaf-level GO IDs."""
190
    childgoid2obj = {}
191
    for goid_usr in gos_user:
192
        goobj_usr = go2obj_arg[goid_usr]
193
        fill_childgoid2obj(childgoid2obj, goobj_usr)
194
    return set(go for go, o in childgoid2obj.items() if not o.children)
195
196
# ------------------------------------------------------------------------------------
197
def goid_is_valid(goid):
198
    """Check format of user-provided GO IDs"""
199
    return goid[:3] == "GO:" and len(goid) == 10 and goid[3:].isdigit()
200
201
def goids_valid(goids):
202
    """Check format of user-provided GO IDs"""
203
    for goid in goids:
204
        if not goid_is_valid(goid):
205
            return False
206
    return True
207
208
def chk_goids(goids, msg=None, raise_except=True):
209
    """check that all GO IDs have the proper format."""
210
    for goid in goids:
211
        if not goid_is_valid(goid):
212
            if raise_except:
213
                raise RuntimeError("BAD GO({GO}): {MSG}".format(GO=goid, MSG=msg))
214
            else:
215
                return goid
216
217
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved.
218