OBOReader.__iter__()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 15
dl 0
loc 31
rs 2.9998
c 1
b 0
f 0

How to fix   Complexity   

Complexity

Complex classes like OBOReader.__iter__() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# Copyright (C) 2010-2018 by Haibao Tang et al. All rights reserved.
2
#
3
# This code is part of the goatools distribution and goverend by its
4
# license. Please see the LICENSE file included with goatools.
5
6
7
"""Read and store Gene Ontology's obo file."""
8
# -*- coding: UTF-8 -*-
9
from __future__ import print_function
10
11
import sys
12
import os
13
from goatools.godag.obo_optional_attributes import OboOptionalAttrs
14
from goatools.godag.typedef import TypeDef
15
from goatools.godag.typedef import add_to_typedef
16
17
GraphEngines = ("pygraphviz", "pydot")
18
19
__copyright__ = "Copyright (C) 2010-2018, H Tang et al., All rights reserved."
20
__author__ = "various"
21
22
23
#pylint: disable=too-few-public-methods
24
class OBOReader(object):
25
    """Read goatools.org's obo file. Load into this iterable class.
26
27
        Download obo from: http://geneontology.org/ontology/go-basic.obo
28
29
        >>> reader = OBOReader()
30
        >>> for rec in reader:
31
                print(rec)
32
    """
33
34
    # Scalar attributes for Typedefs:
35
    #                    'is_class_level', 'is_metadata_tag',
36
    #                    'is_transitive', 'transitive_over'])
37
38
    def __init__(self, obo_file="go-basic.obo", optional_attrs=None):
39
        """Read obo file. Load dictionary."""
40
        self.optobj = self._init_optional_attrs(optional_attrs)  # OboOptionalAttrs or None
41
        self.format_version = None # e.g., "1.2" of "format-version:" line
42
        self.data_version = None # e.g., "releases/2016-07-07" from "data-version:" line
43
        self.typedefs = {}
44
45
        # True if obo file exists or if a link to an obo file exists.
46
        if os.path.isfile(obo_file):
47
            self.obo_file = obo_file
48
            # GOTerm attributes that are necessary for any operations:
49
        else:
50
            raise Exception("COULD NOT READ({OBO})\n"
51
                            "download obo file first\n "
52
                            "[http://geneontology.org/ontology/"
53
                            "go-basic.obo]".format(OBO=obo_file))
54
55
    def __iter__(self):
56
        """Return one GO Term record at a time from an obo file."""
57
        # Wait to open file until needed. Automatically close file when done.
58
        with open(self.obo_file) as fstream:
59
            rec_curr = None # Stores current GO Term
60
            typedef_curr = None  # Stores current typedef
61
            for line in fstream:
62
                # obo lines start with any of: [Term], [Typedef], /^\S+:/, or /^\s*/
63
                if self.data_version is None:
64
                    self._init_obo_version(line)
65
                if rec_curr is None and line[0:6].lower() == "[term]":
66
                    rec_curr = GOTerm()
67
                    if self.optobj:
68
                        self.optobj.init_datamembers(rec_curr)
69
                elif typedef_curr is None and line[0:9].lower() == "[typedef]":
70
                    typedef_curr = TypeDef()
71
                elif rec_curr is not None or typedef_curr is not None:
72
                    line = line.rstrip()  # chomp
73
                    if line:
74
                        self._add_to_obj(rec_curr, typedef_curr, line)
75
                    else:
76
                        if rec_curr is not None:
77
                            yield rec_curr
78
                            rec_curr = None
79
                        elif typedef_curr is not None:
80
                            # Save typedef.
81
                            self.typedefs[typedef_curr.id] = typedef_curr
82
                            typedef_curr = None
83
            # Return last record, if necessary
84
            if rec_curr is not None:
85
                yield rec_curr
86
87
    def _add_to_obj(self, rec_curr, typedef_curr, line):
88
        """Add information on line to GOTerm or Typedef."""
89
        if rec_curr is not None:
90
            self._add_to_ref(rec_curr, line)
91
        else:
92
            add_to_typedef(typedef_curr, line)
93
94
    def _init_obo_version(self, line):
95
        """Save obo version and release."""
96
        if line[0:14] == "format-version":
97
            self.format_version = line[16:-1]
98
        if line[0:12] == "data-version":
99
            self.data_version = line[14:-1]
100
101
    def _add_to_ref(self, rec_curr, line):
102
        """Add new fields to the current reference."""
103
        # Examples of record lines containing ':' include:
104
        #   id: GO:0000002
105
        #   name: mitochondrial genome maintenance
106
        #   namespace: biological_process
107
        #   def: "The maintenance of ...
108
        #   is_a: GO:0007005 ! mitochondrion organization
109
        if line[:4] == "id: ":
110
            assert not rec_curr.id
111
            rec_curr.id = line[4:]
112
        elif line[:8] == "alt_id: ":
113
            rec_curr.alt_ids.add(line[8:])
114
        elif line[:6] == "name: ":
115
            assert not rec_curr.name
116
            rec_curr.name = line[6:]
117
        elif line[:11] == "namespace: ":
118
            assert not rec_curr.namespace
119
            rec_curr.namespace = line[11:]
120
        elif line[:6] == "is_a: ":
121
            rec_curr._parents.add(line[6:].split()[0])
122
        elif line[:13] == "is_obsolete: " and line[13:] == "true":
123
            rec_curr.is_obsolete = True
124
        elif self.optobj and ':' in line:
125
            self.optobj.update_rec(rec_curr, line)
126
127
    @staticmethod
128
    def _init_optional_attrs(optional_attrs):
129
        """Create OboOptionalAttrs or return None."""
130
        if optional_attrs is None:
131
            return None
132
        opts = OboOptionalAttrs.get_optional_attrs(optional_attrs)
133
        if opts:
134
            return OboOptionalAttrs(opts)
135
136
137
class GOTerm(object):
138
    """
139
    GO term, actually contain a lot more properties than interfaced here
140
    """
141
142
    def __init__(self):
143
        self.id = ""                # GO:NNNNNNN
144
        self.name = ""              # description
145
        self.namespace = ""         # BP, CC, MF
146
        self._parents = set()       # is_a basestring of parents
147
        self.parents = set()        # parent records
148
        self.children = set()       # children records
149
        self.level = None           # shortest distance from root node
150
        self.depth = None           # longest distance from root node
151
        self.is_obsolete = False    # is_obsolete
152
        self.alt_ids = set()        # alternative identifiers
153
154
    def __str__(self):
155
        ret = ['{GO}\t'.format(GO=self.id)]
156
        if self.level is not None:
157
            ret.append('level-{L:>02}\t'.format(L=self.level))
158
        if self.depth is not None:
159
            ret.append('depth-{D:>02}\t'.format(D=self.depth))
160
        ret.append('{NAME} [{NS}]'.format(NAME=self.name, NS=self.namespace))
161
        if self.is_obsolete:
162
            ret.append('obsolete')
163
        return ''.join(ret)
164
165
    def __repr__(self):
166
        """Print GO id and all attributes in GOTerm class."""
167
        ret = ["GOTerm('{ID}'):".format(ID=self.id)]
168
        for key, val in self.__dict__.items():
169
            if isinstance(val, int) or isinstance(val, str):
170
                ret.append("{K}:{V}".format(K=key, V=val))
171
            elif val is not None:
172
                ret.append("{K}: {V} items".format(K=key, V=len(val)))
173
                if len(val) < 10:
174
                    if not isinstance(val, dict):
175
                        for elem in val:
176
                            ret.append("  {ELEM}".format(ELEM=elem))
177
                    else:
178
                        for (typedef, terms) in val.items():
179
                            ret.append("  {TYPEDEF}: {NTERMS} items"
180
                                       .format(TYPEDEF=typedef,
181
                                               NTERMS=len(terms)))
182
                            for term in terms:
183
                                ret.append("    {TERM}".format(TERM=term))
184
            else:
185
                ret.append("{K}: None".format(K=key))
186
        return "\n  ".join(ret)
187
188
    def has_parent(self, term):
189
        """Return True if this GO object has a parent GO ID."""
190
        for parent in self.parents:
191
            if parent.id == term or parent.has_parent(term):
192
                return True
193
        return False
194
195
    def has_child(self, term):
196
        """Return True if this GO object has a child GO ID."""
197
        for parent in self.children:
198
            if parent.id == term or parent.has_child(term):
199
                return True
200
        return False
201
202
    def get_all_parents(self):
203
        """Return all parent GO IDs."""
204
        all_parents = set()
205
        for parent in self.parents:
206
            all_parents.add(parent.id)
207
            all_parents |= parent.get_all_parents()
208
        return all_parents
209
210
    def get_all_upper(self):
211
        """Return all parent GO IDs through both 'is_a' and all relationships."""
212
        all_upper = set()
213
        for upper in self.get_goterms_upper():
214
            all_upper.add(upper.id)
215
            all_upper |= upper.get_all_upper()
216
        return all_upper
217
218
    def get_all_children(self):
219
        """Return all children GO IDs."""
220
        all_children = set()
221
        for parent in self.children:
222
            all_children.add(parent.id)
223
            all_children |= parent.get_all_children()
224
        return all_children
225
226
    def get_all_lower(self):
227
        """Return all parent GO IDs through both reverse 'is_a' and all relationships."""
228
        all_lower = set()
229
        for lower in self.get_goterms_lower():
230
            all_lower.add(lower.id)
231
            all_lower |= lower.get_all_lower()
232
        return all_lower
233
234
    def get_all_parent_edges(self):
235
        """Return tuples for all parent GO IDs, containing current GO ID and parent GO ID."""
236
        all_parent_edges = set()
237
        for parent in self.parents:
238
            all_parent_edges.add((self.id, parent.id))
239
            all_parent_edges |= parent.get_all_parent_edges()
240
        return all_parent_edges
241
242
    def get_all_child_edges(self):
243
        """Return tuples for all child GO IDs, containing current GO ID and child GO ID."""
244
        all_child_edges = set()
245
        for parent in self.children:
246
            all_child_edges.add((parent.id, self.id))
247
            all_child_edges |= parent.get_all_child_edges()
248
        return all_child_edges
249
250
    def get_goterms_upper(self):
251
        """Returns a set containing parents and relationship GO Terms."""
252
        # Requires GODag is created with 'relationship' in optional_attrs argument
253
        # pylint: disable=no-member
254
        return set.union(self.parents, *self.relationship.values())
255
256
    def get_goterms_lower(self):
257
        """Returns a set containing children and reverse-relationship GO Terms."""
258
        # Requires GODag is created with 'relationship' in optional_attrs argument
259
        # pylint: disable=no-member
260
        return set.union(self.children, *self.relationship_rev.values())
261
262
263
####    def write_hier_rec(self, gos_printed, out=sys.stdout,
264
####                       len_dash=1, max_depth=None, num_child=None, short_prt=False,
265
####                       include_only=None, go_marks=None,
266
####                       depth=1, depth_dashes="-"):
267
####        """Write hierarchy for a GO Term record."""
268
####        # Added by DV Klopfenstein
269
####        goid = self.id
270
####        # Shortens hierarchy report by only printing the hierarchy
271
####        # for the sub-set of user-specified GO terms which are connected.
272
####        if include_only is not None and goid not in include_only:
273
####            return
274
####        nrp = short_prt and goid in gos_printed
275
####        if go_marks is not None:
276
####            out.write('{} '.format('>' if goid in go_marks else ' '))
277
####        if len_dash is not None:
278
####            # Default character indicating hierarchy level is '-'.
279
####            # '=' is used to indicate a hierarchical path printed in detail previously.
280
####            letter = '-' if not nrp or not self.children else '='
281
####            depth_dashes = ''.join([letter]*depth)
282
####            out.write('{DASHES:{N}} '.format(DASHES=depth_dashes, N=len_dash))
283
####        if num_child is not None:
284
####            out.write('{N:>5} '.format(N=len(self.get_all_children())))
285
####        out.write('{GO}\tL-{L:>02}\tD-{D:>02}\t{desc}\n'.format(
286
####            GO=self.id, L=self.level, D=self.depth, desc=self.name))
287
####        # Track GOs previously printed only if needed
288
####        if short_prt:
289
####            gos_printed.add(goid)
290
####        # Do not print hierarchy below this turn if it has already been printed
291
####        if nrp:
292
####            return
293
####        depth += 1
294
####        if max_depth is not None and depth > max_depth:
295
####            return
296
####        for child in self.children:
297
####            child.write_hier_rec(gos_printed, out, len_dash, max_depth, num_child, short_prt,
298
####                                 include_only, go_marks,
299
####                                 depth, depth_dashes)
300
301
302
class GODag(dict):
303
    """Holds the GO DAG as a dict."""
304
305
    def __init__(self, obo_file="go-basic.obo", optional_attrs=None, load_obsolete=False, prt=sys.stdout):
306
        super(GODag, self).__init__()
307
        self.version = self.load_obo_file(obo_file, optional_attrs, load_obsolete, prt)
308
309
    def load_obo_file(self, obo_file, optional_attrs, load_obsolete, prt):
310
        """Read obo file. Store results."""
311
        reader = OBOReader(obo_file, optional_attrs)
312
313
        # Save alt_ids and their corresponding main GO ID. Add to GODag after populating GO Terms
314
        alt2rec = {}
315
        for rec in reader:
316
            # Save record if:
317
            #   1) Argument load_obsolete is True OR
318
            #   2) Argument load_obsolete is False and the GO term is "live" (not obsolete)
319
            if load_obsolete or not rec.is_obsolete:
320
                self[rec.id] = rec
321
                for alt in rec.alt_ids:
322
                    alt2rec[alt] = rec
323
324
        # Save the typedefs and parsed optional_attrs
325
        # self.optobj = reader.optobj
326
        self.typedefs = reader.typedefs
327
328
        self._populate_terms(reader.optobj)
329
        self._set_level_depth(reader.optobj)
330
331
        # Add alt_ids to go2obj
332
        for goid_alt, rec in alt2rec.items():
333
            self[goid_alt] = rec
334
        desc = self._str_desc(reader)
335
        if prt is not None:
336
            prt.write("{DESC}\n".format(DESC=desc))
337
        return desc
338
339
    def _str_desc(self, reader):
340
        """String containing information about the current GO DAG."""
341
        data_version = reader.data_version
342
        if data_version is not None:
343
            data_version = data_version.replace("releases/", "")
344
        desc = "{OBO}: fmt({FMT}) rel({REL}) {N:,} GO Terms".format(
345
            OBO=reader.obo_file, FMT=reader.format_version,
346
            REL=data_version, N=len(self))
347
        if reader.optobj:
348
            desc = "{D}; optional_attrs({A})".format(D=desc, A=" ".join(sorted(reader.optobj.optional_attrs)))
349
        return desc
350
351
352
    def _populate_terms(self, optobj):
353
        """Convert GO IDs to GO Term record objects. Populate children."""
354
        has_relationship = optobj is not None and 'relationship' in optobj.optional_attrs
355
        # Make parents and relationships references to the actual GO terms.
356
        for rec in self.values():
357
            # Given parent GO IDs, set parent GO Term objects
358
            rec.parents = set([self[goid] for goid in rec._parents])
359
360
            # For each parent GO Term object, add it's child GO Term to the children data member
361
            for parent_rec in rec.parents:
362
                parent_rec.children.add(rec)
363
364
            if has_relationship:
365
                self._populate_relationships(rec)
366
367
    def _populate_relationships(self, rec_curr):
368
        """Convert GO IDs in relationships to GO Term record objects. Populate children."""
369
        for relationship_type, goids in rec_curr.relationship.items():
370
            parent_recs = set([self[goid] for goid in goids])
371
            rec_curr.relationship[relationship_type] = parent_recs
372
            for parent_rec in parent_recs:
373
                if relationship_type not in parent_rec.relationship_rev:
374
                    parent_rec.relationship_rev[relationship_type] = set([rec_curr])
375
                else:
376
                    parent_rec.relationship_rev[relationship_type].add(rec_curr)
377
378
    def _set_level_depth(self, optobj):
379
        """Set level, depth and add inverted relationships."""
380
        has_relationship = optobj is not None and 'relationship' in optobj.optional_attrs
381
382
        def _init_level(rec):
383
            if rec.level is None:
384
                if rec.parents:
385
                    rec.level = min(_init_level(rec) for rec in rec.parents) + 1
386
                else:
387
                    rec.level = 0
388
            return rec.level
389
390
        def _init_depth(rec):
391
            if rec.depth is None:
392
                if rec.parents:
393
                    rec.depth = max(_init_depth(rec) for rec in rec.parents) + 1
394
                else:
395
                    rec.depth = 0
396
            return rec.depth
397
398
        def _init_reldepth(rec):
399
            if not hasattr(rec, 'reldepth'):
400
                up_terms = rec.get_goterms_upper()
401
                if up_terms:
402
                    rec.reldepth = max(_init_reldepth(rec) for rec in up_terms) + 1
403
                else:
404
                    rec.reldepth = 0
405
            return rec.reldepth
406
407
        for rec in self.values():
408
409
            # Add invert relationships
410
            if has_relationship:
411
                if rec.depth is None:
412
                    _init_reldepth(rec)
413
414
                # print("BBBBBBBBBBB1", rec.id, rec.relationship)
415
                #for (typedef, terms) in rec.relationship.items():
416
                #    invert_typedef = self.typedefs[typedef].inverse_of
417
                #    # print("BBBBBBBBBBB2 {} ({}) ({}) ({})".format(
418
                #    #    rec.id, rec.relationship, typedef, invert_typedef))
419
                #    if invert_typedef:
420
                #        # Add inverted relationship
421
                #        for term in terms:
422
                #            if not hasattr(term, 'relationship'):
423
                #                term.relationship = defaultdict(set)
424
                #            term.relationship[invert_typedef].add(rec)
425
                # print("BBBBBBBBBBB3", rec.id, rec.relationship)
426
427
            if rec.level is None:
428
                _init_level(rec)
429
430
            if rec.depth is None:
431
                _init_depth(rec)
432
433
    def write_dag(self, out=sys.stdout):
434
        """Write info for all GO Terms in obo file, sorted numerically."""
435
        for rec in sorted(self.values()):
436
            print(rec, file=out)
437
438
####    def write_hier_all(self, out=sys.stdout,
439
####                       len_dash=1, max_depth=None, num_child=None, short_prt=False):
440
####        """Write hierarchy for all GO Terms in obo file."""
441
####        # Print: [biological_process, molecular_function, and cellular_component]
442
####        for go_id in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
443
####            self.write_hier(go_id, out, len_dash, max_depth, num_child, short_prt, None)
444
####
445
####    def write_hier(self, go_id, out=sys.stdout,
446
####                   len_dash=1, max_depth=None, num_child=None, short_prt=False,
447
####                   include_only=None, go_marks=None):
448
####        """Write hierarchy for a GO Term."""
449
####        gos_printed = set()
450
####        self[go_id].write_hier_rec(gos_printed, out, len_dash, max_depth, num_child,
451
####                                   short_prt, include_only, go_marks)
452
453
    @staticmethod
454
    def id2int(go_id):
455
        """Given a GO ID, return the int value."""
456
        return int(go_id.replace("GO:", "", 1))
457
458
    def query_term(self, term, verbose=False):
459
        """Given a GO ID, return GO object."""
460
        if term not in self:
461
            sys.stderr.write("Term %s not found!\n" % term)
462
            return
463
464
        rec = self[term]
465
        if verbose:
466
            print(rec)
467
            sys.stderr.write("all parents: {}\n".format(
468
                repr(rec.get_all_parents())))
469
            sys.stderr.write("all children: {}\n".format(
470
                repr(rec.get_all_children())))
471
        return rec
472
473
    def paths_to_top(self, term):
474
        """ Returns all possible paths to the root node
475
476
            Each path includes the term given. The order of the path is
477
            top -> bottom, i.e. it starts with the root and ends with the
478
            given term (inclusively).
479
480
            Parameters:
481
            -----------
482
            - term:
483
                the id of the GO term, where the paths begin (i.e. the
484
                accession 'GO:0003682')
485
486
            Returns:
487
            --------
488
            - a list of lists of GO Terms
489
        """
490
        # error handling consistent with original authors
491
        if term not in self:
492
            sys.stderr.write("Term %s not found!\n" % term)
493
            return
494
495
        def _paths_to_top_recursive(rec):
496
            if rec.level == 0:
497
                return [[rec]]
498
            paths = []
499
            for parent in rec.parents:
500
                top_paths = _paths_to_top_recursive(parent)
501
                for top_path in top_paths:
502
                    top_path.append(rec)
503
                    paths.append(top_path)
504
            return paths
505
506
        go_term = self[term]
507
        return _paths_to_top_recursive(go_term)
508
509
    def label_wrap(self, label):
510
        """Label text for plot."""
511
        wrapped_label = r"%s\n%s" % (label,
512
                                     self[label].name.replace(",", r"\n"))
513
        return wrapped_label
514
515
    def make_graph_pydot(self, recs, nodecolor,
516
                         edgecolor, dpi,
517
                         draw_parents=True, draw_children=True):
518
        """draw AMIGO style network, lineage containing one query record."""
519
        import pydot
520
        grph = pydot.Dot(graph_type='digraph', dpi="{}".format(dpi)) # Directed Graph
521
        edgeset = set()
522
        usr_ids = [rec.id for rec in recs]
523
        for rec in recs:
524
            if draw_parents:
525
                edgeset.update(rec.get_all_parent_edges())
526
            if draw_children:
527
                edgeset.update(rec.get_all_child_edges())
528
529
        rec_id_set = set([rec_id for endpts in edgeset for rec_id in endpts])
530
        nodes = {str(ID):pydot.Node(
531
            self.label_wrap(ID).replace("GO:", ""),  # Node name
532
            shape="box",
533
            style="rounded, filled",
534
            # Highlight query terms in plum:
535
            fillcolor="beige" if ID not in usr_ids else "plum",
536
            color=nodecolor)
537
                 for ID in rec_id_set}
538
539
        # add nodes explicitly via add_node
540
        for rec_id, node in nodes.items():
541
            grph.add_node(node)
542
543
        for src, target in edgeset:
544
            # default layout in graphviz is top->bottom, so we invert
545
            # the direction and plot using dir="back"
546
            grph.add_edge(pydot.Edge(nodes[target], nodes[src],
547
                                     shape="normal",
548
                                     color=edgecolor,
549
                                     label="is_a",
550
                                     dir="back"))
551
552
        return grph
553
554
    def make_graph_pygraphviz(self, recs, nodecolor,
555
                              edgecolor, dpi,
556
                              draw_parents=True, draw_children=True):
557
        """Draw AMIGO style network, lineage containing one query record."""
558
        import pygraphviz as pgv
559
560
        grph = pgv.AGraph(name="GO tree")
561
562
        edgeset = set()
563
        for rec in recs:
564
            if draw_parents:
565
                edgeset.update(rec.get_all_parent_edges())
566
            if draw_children:
567
                edgeset.update(rec.get_all_child_edges())
568
569
        edgeset = [(self.label_wrap(a), self.label_wrap(b))
570
                   for (a, b) in edgeset]
571
572
        # add nodes explicitly via add_node
573
        # adding nodes implicitly via add_edge misses nodes
574
        # without at least one edge
575
        for rec in recs:
576
            grph.add_node(self.label_wrap(rec.id))
577
578
        for src, target in edgeset:
579
            # default layout in graphviz is top->bottom, so we invert
580
            # the direction and plot using dir="back"
581
            grph.add_edge(target, src)
582
583
        grph.graph_attr.update(dpi="%d" % dpi)
584
        grph.node_attr.update(shape="box", style="rounded,filled",
585
                              fillcolor="beige", color=nodecolor)
586
        grph.edge_attr.update(shape="normal", color=edgecolor,
587
                              dir="back", label="is_a")
588
        # highlight the query terms
589
        for rec in recs:
590
            try:
591
                node = grph.get_node(self.label_wrap(rec.id))
592
                node.attr.update(fillcolor="plum")
593
            except:
594
                continue
595
596
        return grph
597
598
    def draw_lineage(self, recs, nodecolor="mediumseagreen",
599
                     edgecolor="lightslateblue", dpi=96,
600
                     lineage_img="GO_lineage.png", engine="pygraphviz",
601
                     gml=False, draw_parents=True, draw_children=True):
602
        """Draw GO DAG subplot."""
603
        assert engine in GraphEngines
604
        grph = None
605
        if engine == "pygraphviz":
606
            grph = self.make_graph_pygraphviz(recs, nodecolor, edgecolor, dpi,
607
                                              draw_parents=draw_parents,
608
                                              draw_children=draw_children)
609
        else:
610
            grph = self.make_graph_pydot(recs, nodecolor, edgecolor, dpi,
611
                                         draw_parents=draw_parents, draw_children=draw_children)
612
613
        if gml:
614
            import networkx as nx  # use networkx to do the conversion
615
            gmlbase = lineage_img.rsplit(".", 1)[0]
616
            obj = nx.from_agraph(grph) if engine == "pygraphviz" else nx.from_pydot(grph)
617
618
            del obj.graph['node']
619
            del obj.graph['edge']
620
            gmlfile = gmlbase + ".gml"
621
            nx.write_gml(self.label_wrap, gmlfile)
622
            sys.stderr.write("GML graph written to {0}\n".format(gmlfile))
623
624
        sys.stderr.write(("lineage info for terms %s written to %s\n" %
625
                          ([rec.id for rec in recs], lineage_img)))
626
627
        if engine == "pygraphviz":
628
            grph.draw(lineage_img, prog="dot")
629
        else:
630
            grph.write_png(lineage_img)
631
632
    def update_association(self, association):
633
        """Add the GO parents of a gene's associated GO IDs to the gene's association."""
634
        bad_goids = set()
635
        # Loop through all sets of GO IDs for all genes
636
        for goids in association.values():
637
            parents = set()
638
            # Iterate thru each GO ID in the current gene's association
639
            for goid in goids:
640
                try:
641
                    parents.update(self[goid].get_all_parents())
642
                except:
643
                    bad_goids.add(goid.strip())
644
            # Add the GO parents of all GO IDs in the current gene's association
645
            goids.update(parents)
646
        if bad_goids:
647
            sys.stdout.write("{N} GO IDs in assc. are not found in the GO-DAG: {GOs}\n".format(
648
                N=len(bad_goids), GOs=" ".join(bad_goids)))
649
650
# Copyright (C) 2010-2018, H Tang et al., All rights reserved.
651