Completed
Push — master ( e9207b...563ee4 )
by Haibao
11s
created

goatools.OBOReader   D

Complexity

Total Complexity 61

Size/Duplication

Total Lines 214
Duplicated Lines 0 %
Metric Value
dl 0
loc 214
rs 4.054
wmc 61

12 Methods

Rating   Name   Duplication   Size   Complexity  
A _add_nested() 0 7 1
A __init__() 0 13 2
F _init_optional_attrs() 0 28 10
F _add_to_ref() 0 32 10
F __iter__() 0 35 14
A _die() 0 4 1
A _init_goterm_ref() 0 6 2
A _init_typedef() 0 6 2
A _init_obo_version() 0 6 3
A _chk_none() 0 5 3
C update_rec() 0 27 7
B _add_to_typedef() 0 21 6

How to fix   Complexity   

Complex Class

Complex classes like goatools.OBOReader often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
# Copyright 2010-2016 by Haibao Tang et al. All rights reserved.
2
#
3
# This code is part of the goatools distribution and goverend by its
4
# license. Please see the LICENSE file included with goatools.
5
6
7
"""Read and store Gene Ontology's obo file."""
8
# -*- coding: UTF-8 -*-
9
from __future__ import print_function
10
from collections import defaultdict
11
import sys
12
import os
13
import re
14
15
GraphEngines = ("pygraphviz", "pydot")
16
17
__copyright__ = "Copyright (C) 2010-2016, H Tang et al., All rights reserved."
18
__author__ = "various"
19
20
class OBOReader(object):
21
    """Read goatools.org's obo file. Load into this iterable class.
22
23
        Download obo from: http://geneontology.org/ontology/go-basic.obo
24
25
        >>> reader = OBOReader()
26
        >>> for rec in reader:
27
                print rec
28
    """
29
30
    def __init__(self, obo_file="go-basic.obo", optional_attrs=None):
31
        """Read obo file. Load dictionary."""
32
        self._init_optional_attrs(optional_attrs)
33
        self.format_version = None
34
        self.data_version = None
35
        self.typedefs = {}
36
37
        # True if obo file exists or if a link to an obo file exists.
38
        if os.path.isfile(obo_file):
39
            self.obo_file = obo_file
40
            # GOTerm attributes that are necessary for any operations:
41
        else:
42
            raise Exception("download obo file first\n "
43
                            "[http://geneontology.org/ontology/"
44
                            "go-basic.obo]")
45
46
    def __iter__(self):
47
        """Return one GO Term record at a time from an obo file."""
48
        # Written by DV Klopfenstein
49
        # Wait to open file until needed. Automatically close file when done.
50
        with open(self.obo_file) as fstream:
51
            rec_curr = None # Stores current GO Term
52
            typedef_curr = None  # Stores current typedef
53
            for lnum, line in enumerate(fstream):
54
                # obo lines start with any of: [Term], [Typedef], /^\S+:/, or /^\s*/
55
                if self.data_version is None:
56
                    self._init_obo_version(line)
57
                if line[0:6].lower() == "[term]":
58
                    rec_curr = self._init_goterm_ref(rec_curr, "Term", lnum)
59
                elif line[0:9].lower() == "[typedef]":
60
                    typedef_curr = self._init_typedef(rec_curr, "Typedef", lnum)
61
                elif rec_curr is not None or typedef_curr is not None:
62
                    line = line.rstrip() # chomp
63
                    if ":" in line:
64
                        if rec_curr is not None:
65
                            self._add_to_ref(rec_curr, line, lnum)
66
                        else:
67
                            self._add_to_typedef(typedef_curr, line, lnum)
68
                    elif line == "":
69
                        if rec_curr is not None:
70
                            yield rec_curr
71
                            rec_curr = None
72
                        elif typedef_curr is not None:
73
                            # Save typedef.
74
                            self.typedefs[typedef_curr.id] = typedef_curr
75
                            typedef_curr = None
76
                    else:
77
                        self._die("UNEXPECTED LINE CONTENT: {L}".format(L=line), lnum)
78
            # Return last record, if necessary
79
            if rec_curr is not None:
80
                yield rec_curr
81
82
    def _init_obo_version(self, line):
83
        """Save obo version and release."""
84
        if line[0:14] == "format-version":
85
            self.format_version = line[16:-1]
86
        if line[0:12] == "data-version":
87
            self.data_version = line[14:-1]
88
89
    def _init_goterm_ref(self, rec_curr, name, lnum):
90
        """Initialize new reference and perform checks."""
91
        if rec_curr is None:
92
            return GOTerm()
93
        msg = "PREVIOUS {REC} WAS NOT TERMINATED AS EXPECTED".format(REC=name)
94
        self._die(msg, lnum)
95
96
    def _init_typedef(self, typedef_curr, name, lnum):
97
        """Initialize new typedef and perform checks."""
98
        if typedef_curr is None:
99
            return TypeDef()
100
        msg = "PREVIOUS {REC} WAS NOT TERMINATED AS EXPECTED".format(REC=name)
101
        self._die(msg, lnum)
102
103
    def _add_to_ref(self, rec_curr, line, lnum):
104
        """Add new fields to the current reference."""
105
        # Written by DV Klopfenstein
106
        # Examples of record lines containing ':' include:
107
        #   id: GO:0000002
108
        #   name: mitochondrial genome maintenance
109
        #   namespace: biological_process
110
        #   def: "The maintenance of ...
111
        #   is_a: GO:0007005 ! mitochondrion organization
112
        mtch = re.match(r'^(\S+):\s*(\S.*)$', line)
113
        if mtch:
114
            field_name = mtch.group(1)
115
            field_value = mtch.group(2)
116
            if field_name == "id":
117
                self._chk_none(rec_curr.id, lnum)
118
                rec_curr.id = field_value
119
            elif field_name == "alt_id":
120
                rec_curr.alt_ids.append(field_value)
121
            elif field_name == "name":
122
                self._chk_none(rec_curr.name, lnum)
123
                rec_curr.name = field_value
124
            elif field_name == "namespace":
125
                self._chk_none(rec_curr.namespace, lnum)
126
                rec_curr.namespace = field_value
127
            elif field_name == "is_a":
128
                rec_curr._parents.append(field_value.split()[0])
129
            elif field_name == "is_obsolete" and field_value == "true":
130
                rec_curr.is_obsolete = True
131
            elif field_name in self.optional_attrs:
132
                self.update_rec(rec_curr, field_name, field_value)
133
        else:
134
            self._die("UNEXPECTED FIELD CONTENT: {L}\n".format(L=line), lnum)
135
136
    def update_rec(self, rec, name, value):
137
        """Update current GOTerm with optional record."""
138
        # 'def' is a reserved word in python, do not use it as a Class attr.
139
        if name == "def":
140
            name = "defn"
141
142
        # If we have a relationship, then we will split this into a further
143
        # dictionary.
144
145
        if hasattr(rec, name):
146
            if name not in self.attrs_scalar:
147
                if name not in self.attrs_nested:
148
                    getattr(rec, name).add(value)
149
                else:
150
                    self._add_nested(rec, name, value)
151
            else:
152
                raise Exception("ATTR({NAME}) ALREADY SET({VAL})".format(
153
                    NAME=name, VAL=getattr(rec, name)))
154
        else: # Initialize new GOTerm attr
155
            if name in self.attrs_scalar:
156
                setattr(rec, name, value)
157
            elif name not in self.attrs_nested:
158
                setattr(rec, name, set([value]))
159
            else:
160
                name = '_{:s}'.format(name)
161
                setattr(rec, name, defaultdict(list))
162
                self._add_nested(rec, name, value)
163
164
    def _add_to_typedef(self, typedef_curr, line, lnum):
165
        """Add new fields to the current typedef."""
166
        mtch = re.match(r'^(\S+):\s*(\S.*)$', line)
167
        if mtch:
168
            field_name = mtch.group(1)
169
            field_value = mtch.group(2).split('!')[0].rstrip()
170
171
            if field_name == "id":
172
                self._chk_none(typedef_curr.id, lnum)
173
                typedef_curr.id = field_value
174
            elif field_name == "name":
175
                self._chk_none(typedef_curr.name, lnum)
176
                typedef_curr.name = field_value
177
            elif field_name == "transitive_over":
178
                typedef_curr.transitive_over.append(field_value)
179
            elif field_name == "inverse_of":
180
                self._chk_none(typedef_curr.inverse_of, lnum)
181
                typedef_curr.inverse_of = field_value
182
            # Note: there are other tags that aren't imported here.
183
        else:
184
            self._die("UNEXPECTED FIELD CONTENT: {L}\n".format(L=line), lnum)
185
186
    def _add_nested(self, rec, name, value):
187
        """Adds a term's nested attributes."""
188
        # Remove comments and split term into typedef / target term.
189
        (typedef, target_term) = value.split('!')[0].rstrip().split(' ')
190
191
        # Save the nested term.
192
        getattr(rec, name)[typedef].append(target_term)
193
194
    def _init_optional_attrs(self, optional_attrs):
195
        """Prepare to store data from user-desired optional fields.
196
197
          Not loading these optional fields by default saves in space and speed.
198
          But allow the possibility for saving these fields, if the user desires,
199
            Including:
200
              comment consider def is_class_level is_metadata_tag is_transitive
201
              relationship replaced_by subset synonym transitive_over xref
202
        """
203
        # Written by DV Klopfenstein
204
        # Required attributes are always loaded. All others are optionally loaded.
205
        self.attrs_req = ['id', 'alt_id', 'name', 'namespace', 'is_a', 'is_obsolete']
206
        self.attrs_scalar = ['comment', 'defn',
207
                             'is_class_level', 'is_metadata_tag',
208
                             'is_transitive', 'transitive_over']
209
        self.attrs_nested = frozenset(['relationship'])
210
        # Allow user to specify either: 'def' or 'defn'
211
        #   'def' is an obo field name, but 'defn' is legal Python attribute name
212
        fnc = lambda aopt: aopt if aopt != "defn" else "def"
213
        if optional_attrs is None:
214
            optional_attrs = []
215
        elif isinstance(optional_attrs, str):
216
            optional_attrs = [fnc(optional_attrs)] if optional_attrs not in self.attrs_req else []
217
        elif isinstance(optional_attrs, list) or isinstance(optional_attrs, set):
218
            optional_attrs = set([fnc(f) for f in optional_attrs if f not in self.attrs_req])
219
        else:
220
            raise Exception("optional_attrs arg MUST BE A str, list, or set.")
221
        self.optional_attrs = optional_attrs
222
223
224
    def _die(self, msg, lnum):
225
        """Raise an Exception if file read is unexpected."""
226
        raise Exception("**FATAL {FILE}({LNUM}): {MSG}\n".format(
227
            FILE=self.obo_file, LNUM=lnum, MSG=msg))
228
229
    def _chk_none(self, init_val, lnum):
230
        """Expect these lines to be uninitialized."""
231
        if init_val is None or init_val is "":
232
            return
233
        self._die("FIELD IS ALREADY INITIALIZED", lnum)
234
235
236
237
238
class GOTerm:
239
    """
240
    GO term, actually contain a lot more properties than interfaced here
241
    """
242
243
    def __init__(self):
244
        self.id = ""                # GO:NNNNNNN
245
        self.name = ""              # description
246
        self.namespace = ""         # BP, CC, MF
247
        self._parents = []          # is_a basestring of parents
248
        self.parents = []           # parent records
249
        self.children = []          # children records
250
        self.level = None           # shortest distance from root node
251
        self.depth = None           # longest distance from root node
252
        self.is_obsolete = False    # is_obsolete
253
        self.alt_ids = []           # alternative identifiers
254
255
    def __str__(self):
256
        obsolete = "obsolete" if self.is_obsolete else ""
257
        return "%s\tlevel-%02d\tdepth-%02d\t%s [%s] %s" % (self.id, self.level, self.depth,
258
                                               self.name, self.namespace, obsolete)
259
260
    def __repr__(self):
261
        """Print GO id and all attributes in GOTerm class."""
262
        ret = ["GOTerm('{ID}'):".format(ID=self.id)]
263
        for key, val in self.__dict__.items():
264
            if isinstance(val, int) or isinstance(val, str):
265
                ret.append("{K}:{V}".format(K=key, V=val))
266
            else:
267
                ret.append("{K}: {V} items".format(K=key, V=len(val)))
268
                if len(val) < 10:
269
                    if not isinstance(val, dict):
270
                        for elem in val:
271
                            ret.append("  {ELEM}".format(ELEM=elem))
272
                    else:
273
                        for (typedef, terms) in val.items():
274
                            ret.append("  {TYPEDEF}: {NTERMS} items"
275
                                       .format(TYPEDEF=typedef,
276
                                               NTERMS=len(terms)))
277
                            for t in terms:
278
                                ret.append("    {TERM}".format(TERM=t))
279
        return "\n  ".join(ret)
280
281
    def has_parent(self, term):
282
        for p in self.parents:
283
            if p.id == term or p.has_parent(term):
284
                return True
285
        return False
286
287
    def has_child(self, term):
288
        for p in self.children:
289
            if p.id == term or p.has_child(term):
290
                return True
291
        return False
292
293
    def get_all_parents(self):
294
        all_parents = set()
295
        for p in self.parents:
296
            all_parents.add(p.id)
297
            all_parents |= p.get_all_parents()
298
        return all_parents
299
300
    def get_all_children(self):
301
        all_children = set()
302
        for p in self.children:
303
            all_children.add(p.id)
304
            all_children |= p.get_all_children()
305
        return all_children
306
307
    def get_all_parent_edges(self):
308
        all_parent_edges = set()
309
        for p in self.parents:
310
            all_parent_edges.add((self.id, p.id))
311
            all_parent_edges |= p.get_all_parent_edges()
312
        return all_parent_edges
313
314
    def get_all_child_edges(self):
315
        all_child_edges = set()
316
        for p in self.children:
317
            all_child_edges.add((p.id, self.id))
318
            all_child_edges |= p.get_all_child_edges()
319
        return all_child_edges
320
321
    def write_hier_rec(self, gos_printed, out=sys.stdout,
322
                      len_dash=1, max_depth=None, num_child=None, short_prt=False,
323
                      include_only=None, go_marks=None,
324
                      depth=1, dp="-"):
325
        """Write hierarchy for a GO Term record."""
326
        # Added by DV Klopfenstein
327
        GO_id = self.id
328
        # Shortens hierarchy report by only printing the hierarchy
329
        # for the sub-set of user-specified GO terms which are connected.
330
        if include_only is not None and GO_id not in include_only:
331
          return
332
        nrp = short_prt and GO_id in gos_printed
333
        if go_marks is not None:
334
          out.write('{} '.format('>' if GO_id in go_marks else ' '))
335
        if len_dash is not None:
336
            # Default character indicating hierarchy level is '-'.
337
            # '=' is used to indicate a hierarchical path printed in detail previously.
338
            letter = '-' if not nrp or not self.children else '='
339
            dp = ''.join([letter]*depth)
340
            out.write('{DASHES:{N}} '.format(DASHES=dp, N=len_dash))
341
        if num_child is not None:
342
            out.write('{N:>5} '.format(N=len(self.get_all_children())))
343
        out.write('{GO}\tL-{L:>02}\tD-{D:>02}\t{desc}\n'.format(
344
            GO=self.id, L=self.level, D=self.depth, desc=self.name))
345
        # Track GOs previously printed only if needed
346
        if short_prt:
347
          gos_printed.add(GO_id)
348
        # Do not print hierarchy below this turn if it has already been printed
349
        if nrp:
350
            return
351
        depth += 1
352
        if max_depth is not None and depth > max_depth:
353
            return
354
        for p in self.children:
355
            p.write_hier_rec(gos_printed, out, len_dash, max_depth, num_child, short_prt,
356
                include_only, go_marks,
357
                depth, dp)
358
359
360
class TypeDef(object):
361
    """
362
        TypeDef term. These contain more tags than included here, but these
363
        are the most important.
364
    """
365
366
    def __init__(self):
367
        self.id = ""                # GO:NNNNNNN
368
        self.name = ""              # description
369
        self.transitive_over = []   # List of other typedefs
370
        self.inverse_of = ""        # Name of inverse typedef.
371
372
    def __str__(self):
373
        ret = []
374
        ret.append("Typedef - {} ({}):".format(self.id, self.name))
375
        ret.append("  Inverse of: {}".format(self.inverse_of
376
                                             if self.inverse_of else "None"))
377
        if self.transitive_over:
378
            ret.append("  Transitive over:")
379
            for t in self.transitive_over:
380
                ret.append("    - {}".format(t))
381
        return "\n".join(ret)
382
383
384
class GODag(dict):
385
386
    def __init__(self, obo_file="go-basic.obo", optional_attrs=None):
387
        self.load_obo_file(obo_file, optional_attrs)
388
389
    def load_obo_file(self, obo_file, optional_attrs):
390
391
        print("load obo file %s" % obo_file, file=sys.stderr)
392
        reader = OBOReader(obo_file, optional_attrs)
393
        for rec in reader:
394
            self[rec.id] = rec
395
            for alt in rec.alt_ids:
396
                self[alt] = rec
397
398
        print("{OBO}: format-version({FMT}) data-version({REL})".format(
399
            OBO=obo_file, FMT=reader.format_version, REL=reader.data_version))
400
401
        # Save the typedefs and parsed optional_attrs
402
        self.typedefs = reader.typedefs
403
        self.optional_attrs = reader.optional_attrs
404
405
        self.populate_terms()
406
        print(len(self), "nodes imported", file=sys.stderr)
407
408
    def populate_terms(self):
409
410
        def _init_level(rec):
411
            if rec.level is None:
412
                if not rec.parents:
413
                    rec.level = 0
414
                else:
415
                    rec.level = min(_init_level(rec) for rec in rec.parents) + 1
416
            return rec.level
417
418
        def _init_depth(rec):
419
            if rec.depth is None:
420
                if not rec.parents:
421
                    rec.depth = 0
422
                else:
423
                    rec.depth = max(_init_depth(rec) for rec in rec.parents) + 1
424
            return rec.depth
425
426
        # Make parents and relationships references to the actual GO terms.
427
        for rec in self.values():
428
            rec.parents = [self[x] for x in rec._parents]
429
430
            if hasattr(rec, '_relationship'):
431
                rec.relationship = defaultdict(set)
432
                for (typedef, terms) in rec._relationship.items():
433
                    rec.relationship[typedef].update(set([self[x] for x in terms]))
434
                delattr(rec, '_relationship')
435
436
        # populate children, levels and add inverted relationships
437
        for rec in self.values():
438
            for p in rec.parents:
439
                if rec not in p.children:
440
                    p.children.append(rec)
441
442
            # Add invert relationships
443
            if hasattr(rec, 'relationship'):
444
                for (typedef, terms) in rec.relationship.items():
445
                    invert_typedef = self.typedefs[typedef].inverse_of
446
                    if invert_typedef:
447
                        # Add inverted relationship
448
                        for t in terms:
449
                            if not hasattr(t, 'relationship'):
450
                                t.relationship = defaultdict(set)
451
                            t.relationship[invert_typedef].add(rec)
452
453
            if rec.level is None:
454
                _init_level(rec)
455
456
            if rec.depth is None:
457
                _init_depth(rec)
458
459
    def write_dag(self, out=sys.stdout):
460
        """Write info for all GO Terms in obo file, sorted numerically."""
461
        for rec_id, rec in sorted(self.items()):
462
            print(rec, file=out)
463
464
    def write_hier_all(self, out=sys.stdout,
465
                      len_dash=1, max_depth=None, num_child=None, short_prt=False):
466
        """Write hierarchy for all GO Terms in obo file."""
467
        # Print: [biological_process, molecular_function, and cellular_component]
468
        for go_id in ['GO:0008150', 'GO:0003674', 'GO:0005575']:
469
          self.write_hier(go_id, out, len_dash, max_depth, num_child, short_prt, None)
470
471
    def write_hier(self, GO_id, out=sys.stdout,
472
                       len_dash=1, max_depth=None, num_child=None, short_prt=False,
473
                       include_only=None, go_marks=None):
474
        """Write hierarchy for a GO Term."""
475
        gos_printed = set()
476
        self[GO_id].write_hier_rec(gos_printed, out, len_dash, max_depth, num_child,
477
            short_prt, include_only, go_marks)
478
479
    @staticmethod
480
    def id2int(GO_id): return int(GO_id.replace("GO:", "", 1))
481
482
    def query_term(self, term, verbose=False):
483
        if term not in self:
484
            print("Term %s not found!" % term, file=sys.stderr)
485
            return
486
487
        rec = self[term]
488
        if verbose:
489
            print(rec)
490
            print("all parents:", rec.get_all_parents(), file=sys.stderr)
491
            print("all children:", rec.get_all_children(), file=sys.stderr)
492
        return rec
493
494
    def paths_to_top(self, term):
495
        """ Returns all possible paths to the root node
496
497
            Each path includes the term given. The order of the path is
498
            top -> bottom, i.e. it starts with the root and ends with the
499
            given term (inclusively).
500
501
            Parameters:
502
            -----------
503
            - term:
504
                the id of the GO term, where the paths begin (i.e. the
505
                accession 'GO:0003682')
506
507
            Returns:
508
            --------
509
            - a list of lists of GO Terms
510
        """
511
        # error handling consistent with original authors
512
        if term not in self:
513
            print("Term %s not found!" % term, file=sys.stderr)
514
            return
515
516
        def _paths_to_top_recursive(rec):
517
            if rec.level == 0:
518
                return [[rec]]
519
            paths = []
520
            for parent in rec.parents:
521
                top_paths = _paths_to_top_recursive(parent)
522
                for top_path in top_paths:
523
                    top_path.append(rec)
524
                    paths.append(top_path)
525
            return paths
526
527
        go_term = self[term]
528
        return _paths_to_top_recursive(go_term)
529
530
    def _label_wrap(self, label):
531
        wrapped_label = r"%s\n%s" % (label,
532
                                     self[label].name.replace(",", r"\n"))
533
        return wrapped_label
534
535
    def make_graph_pydot(self, recs, nodecolor,
536
                     edgecolor, dpi,
537
                     draw_parents=True, draw_children=True):
538
        """draw AMIGO style network, lineage containing one query record."""
539
        import pydot
540
        G = pydot.Dot(graph_type='digraph', dpi="{}".format(dpi)) # Directed Graph
541
        edgeset = set()
542
        usr_ids = [rec.id for rec in recs]
543
        for rec in recs:
544
            if draw_parents:
545
                edgeset.update(rec.get_all_parent_edges())
546
            if draw_children:
547
                edgeset.update(rec.get_all_child_edges())
548
549
        lw = self._label_wrap
550
        rec_id_set = set([rec_id for endpts in edgeset for rec_id in endpts])
551
        nodes = {str(ID):pydot.Node(
552
              lw(ID).replace("GO:",""),  # Node name
553
              shape="box",
554
              style="rounded, filled",
555
              # Highlight query terms in plum:
556
              fillcolor="beige" if ID not in usr_ids else "plum",
557
              color=nodecolor)
558
                for ID in rec_id_set}
559
560
        # add nodes explicitly via add_node
561
        for rec_id, node in nodes.items():
562
            G.add_node(node)
563
564
        for src, target in edgeset:
565
            # default layout in graphviz is top->bottom, so we invert
566
            # the direction and plot using dir="back"
567
            G.add_edge(pydot.Edge(nodes[target], nodes[src],
568
              shape="normal",
569
              color=edgecolor,
570
              label="is_a",
571
              dir="back"))
572
573
        return G
574
575
    def make_graph_pygraphviz(self, recs, nodecolor,
576
                     edgecolor, dpi,
577
                     draw_parents=True, draw_children=True):
578
        # draw AMIGO style network, lineage containing one query record
579
        import pygraphviz as pgv
580
581
        G = pgv.AGraph(name="GO tree")
582
        edgeset = set()
583
        for rec in recs:
584
            if draw_parents:
585
                edgeset.update(rec.get_all_parent_edges())
586
            if draw_children:
587
                edgeset.update(rec.get_all_child_edges())
588
589
        edgeset = [(self._label_wrap(a), self._label_wrap(b))
590
                   for (a, b) in edgeset]
591
592
        # add nodes explicitly via add_node
593
        # adding nodes implicitly via add_edge misses nodes
594
        # without at least one edge
595
        for rec in recs:
596
            G.add_node(self._label_wrap(rec.id))
597
598
        for src, target in edgeset:
599
            # default layout in graphviz is top->bottom, so we invert
600
            # the direction and plot using dir="back"
601
            G.add_edge(target, src)
602
603
        G.graph_attr.update(dpi="%d" % dpi)
604
        G.node_attr.update(shape="box", style="rounded,filled",
605
                           fillcolor="beige", color=nodecolor)
606
        G.edge_attr.update(shape="normal", color=edgecolor,
607
                           dir="back", label="is_a")
608
        # highlight the query terms
609
        for rec in recs:
610
            try:
611
                q = G.get_node(self._label_wrap(rec.id))
612
                q.attr.update(fillcolor="plum")
613
            except:
614
                continue
615
616
        return G
617
618
    def draw_lineage(self, recs, nodecolor="mediumseagreen",
619
                     edgecolor="lightslateblue", dpi=96,
620
                     lineage_img="GO_lineage.png", engine="pygraphviz",
621
                     gml=False, draw_parents=True, draw_children=True):
622
        assert engine in GraphEngines
623
        if engine == "pygraphviz":
624
            G = self.make_graph_pygraphviz(recs, nodecolor, edgecolor, dpi,
625
                              draw_parents=draw_parents, draw_children=draw_children)
626
        else:
627
            G = self.make_graph_pydot(recs, nodecolor, edgecolor, dpi,
628
                              draw_parents=draw_parents, draw_children=draw_children)
629
630
        if gml:
631
            import networkx as nx  # use networkx to do the conversion
632
            pf = lineage_img.rsplit(".", 1)[0]
633
            NG = nx.from_agraph(G) if engine == "pygraphviz" else nx.from_pydot(G)
634
635
            del NG.graph['node']
636
            del NG.graph['edge']
637
            gmlfile = pf + ".gml"
638
            nx.write_gml(NG, gmlfile)
639
            print("GML graph written to {0}".format(gmlfile), file=sys.stderr)
640
641
        print(("lineage info for terms %s written to %s" %
642
                             ([rec.id for rec in recs], lineage_img)), file=sys.stderr)
643
644
        if engine == "pygraphviz":
645
            G.draw(lineage_img, prog="dot")
646
        else:
647
            G.write_png(lineage_img)
648
649
    def update_association(self, association):
650
        bad_terms = set()
651
        for key, terms in list(association.items()):
652
            parents = set()
653
            for term in terms:
654
                try:
655
                    parents.update(self[term].get_all_parents())
656
                except:
657
                    bad_terms.add(term.strip())
658
            terms.update(parents)
659
        if bad_terms:
660
            print("terms not found: %s" % (bad_terms,), file=sys.stderr)
661
662
# Copyright (C) 2010-2016, H Tang et al., All rights reserved.
663