Completed
Push — master ( 6ca8c9...3d9366 )
by
unknown
57s
created

GetGOs._update_ret()   A

Complexity

Conditions 4

Size

Total Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
c 1
b 0
f 0
dl 0
loc 8
rs 9.2
1
"""Command-line script to create GO term diagrams
2
3
Usage:
4
  go_plot.py [GO ...] [options]
5
  go_plot.py [GO ...] [--obo=<file.obo>] [--outfile=<file.png>] [--title=<title>]
6
             [--go_file=<file.txt>]
7
             [--relationship]
8
             [--sections=<sections.txt>]
9
             [--gaf=<file.gaf>]
10
             [--gene2go=<gene2go>] [--taxid=<Taxonomy_number>]
11
             [--shorten]
12
             [--parentcnt] [--childcnt] [--mark_alt_id]
13
             [--go_aliases=<go_aliases.txt>]
14
             [--draw-children]
15
             [--norel]
16
  go_plot.py [GO ...] [--obo=<file.obo>] [-o <file.png>] [-t <title>]
17
             [--shorten] [-p] [-c]
18
  go_plot.py [GO ...] [-o <file.png>] [--draw-children]
19
  go_plot.py [GO ...] [-o <file.png>] [--draw-children] [--shorten]
20
  go_plot.py [--obo=<file.obo>]
21
  go_plot.py [--obo=<file.obo>] [--outfile=<file.png>]
22
  go_plot.py [GO ...]
23
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>]
24
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] [--shorten]
25
  go_plot.py [GO ...] [-o <file.png>] [-t <title>]
26
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt]
27
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--childcnt]
28
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] [--childcnt]
29
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p]
30
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] [-c]
31
32
Options:
33
  -h --help                            show this help message and exit
34
  -i --go_file=<file.txt>              GO IDs in an ASCII file
35
  -o <file.png>, --outfile=<file.png>  Plot file name [default: go_plot.png]
36
  -r --relationship                    Plot all relationships
37
  -s <sections.txt> --sections=<sections.txt>  Sections file for grouping
38
  -S <sections module str>             Sections file for grouping
39
40
  --gaf=<file.gaf>                     Annotations from a gaf file
41
  --gene2go=<gene2go>                  Annotations from a gene2go file downloaded from NCBI
42
43
  --obo=<file.obo>                     Ontologies in obo file [default: go-basic.obo].
44
45
  -t <title>, --title=<title>          Title string to place in image
46
  -p --parentcnt                       Include parent count in each GO term
47
  -c --childcnt                        Include child count in each GO term
48
  --shorten                            Shorten the GO name on plots
49
  --mark_alt_id                        Add 'a' if GO ID is an alternate ID: GO:0007582a
50
  --draw-children                      Draw children. By default, they are not drawn.
51
  --go_aliases=<go_aliases.txt>        ASCII file containing letter alias
52
53
  --norel                              Don't load relationship from the GO DAG
54
"""
55
56
from __future__ import print_function
57
58
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
59
__author__ = "DV Klopfenstein"
60
61
62
import re
63
import os
64
import sys
65
66
from goatools.obo_parser import GODag
67
from goatools.associations import get_tcntobj
68
from goatools.godag.obo_optional_attributes import OboOptionalAttrs
69
70
from goatools.cli.docopt_parse import DocOptParse
71
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
72
from goatools.gosubdag.plot.go2color import Go2Color
73
from goatools.gosubdag.gosubdag import GoSubDag
74
from goatools.gosubdag.go_tasks import get_go2obj_unique
75
from goatools.gosubdag.go_tasks import get_leaf_children
76
from goatools.gosubdag.rpt.wr_xlsx import read_d1_letter
77
# COMING SOON: Plotting using GOATOOLS grouping:
78
#   from goatools.gosubdag.rpt.read_goids import read_sections
79
#   from goatools.grouper.grprdflts import GrouperDflts
80
#   from goatools.grouper.hdrgos import HdrgosSections
81
#   from goatools.grouper.grprobj import Grouper
82
#   from goatools.grouper.colors import GrouperColors
83
#   from goatools.grouper.grprplt import GrouperPlot
84
85
86
# pylint: disable=too-few-public-methods
87
class GetGOs(object):
88
    """Return a list of GO IDs for plotting."""
89
90
    exp_color_chars = set('ABCDEFabcdef0123456789')
91
    exp_kws_dct = set(['GO', 'go_file'])
92
    exp_kws_set = set(['draw-children'])
93
    max_gos = 200  # Maximum number of source GO IDs
94
95
    def __init__(self, go2obj):
96
        self.go2obj = go2obj
97
        self.re_goids = re.compile(r"(GO:\d{7})+?")
98
        self.re_color = re.compile(r"(#[0-9a-fA-F]{6})+?")
99
100
    def get_go_color(self, **kws):
101
        """Return source GO IDs ."""
102
        # kws: GO go_file draw-children
103
        ret = {'GOs':set(), 'go2color':{}}
104
        if 'GO' in kws:
105
            self._goargs(ret, kws['GO'])
106
        if 'go_file' in kws:
107
            self._rdtxt_gos(ret, kws['go_file'])
108
        if 'draw-children' in kws:
109
            self._add_gochildleaf(ret)
110
        # If there have been no GO IDs explicitly specified by the user
111
        if not ret['GOs']:
112
            # If the GO-DAG is sufficiently small, print all GO IDs
113
            if len(self.go2obj) < self.max_gos:
114
                main_gos = set(o.id for go, o in self.go2obj.items() if go != o.id)
115
                go_leafs = set(go for go, o in self.go2obj.items() if not o.children)
116
                ret['GOs'] = go_leafs.difference(main_gos)
117
            else:
118
                raise RuntimeError("GO IDs NEEDED")
119
        go2obj = {go:self.go2obj[go] for go in ret['GOs']}
120
        ret['GOs'] = set(get_go2obj_unique(go2obj))
121
        return [ret['GOs'], ret['go2color']]
122 View Code Duplication
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
123
    def _goargs(self, ret, go_args):
124
        """Get GO IDs and colors for GO IDs from the GO ID runtime arguments."""
125
        goids = set()
126
        go2color = {}
127
        # Match on "GO ID" or "GO ID and color"
128
        re_gocolor = re.compile(r'(GO:\d{7})((?:#[0-9a-fA-F]{6})?)')
129
        for go_arg in go_args:
130
            mtch = re_gocolor.match(go_arg)
131
            if mtch:
132
                goid, color = mtch.groups()
133
                goids.add(goid)
134
                if color:
135
                    go2color[goid] = color
136
            else:
137
                print("WARNING: UNRECOGNIZED ARG({})".format(go_arg))
138
        self._update_ret(ret, goids, go2color)
139
140
    def _rdtxt_gos(self, ret, go_file):
141
        """Read GO IDs from a file."""
142
        if not os.path.exists(go_file):
143
            raise RuntimeError("CAN NOT READ: {FILE}\n".format(FILE=go_file))
144
        goids = set()
145
        go2color = {}
146
        with open(go_file) as ifstrm:
147
            for line in ifstrm:
148
                goids_found = self.re_goids.findall(line)
149
                if goids_found:
150
                    goids.update(goids_found)
151
                    colors = self.re_color.findall(line)
152
                    if colors:
153
                        if len(goids_found) == len(colors):
154
                            for goid, color in zip(goids_found, colors):
155
                                go2color[goid] = color
156
                        else:
157
                            print("IGNORING: {L}".format(L=line),)
158
        self._update_ret(ret, goids, go2color)
159
160
    def _add_gochildleaf(self, ret):
161
        """Add leaf-level GO children to GO list colored uniquely."""
162
        leaf_gos = get_leaf_children(ret['GOs'], self.go2obj)
163
        if leaf_gos:
164
            ret['GOs'].update(leaf_gos)
165
            leaf_go_color = Go2Color.key2col['go_leafchild']
166
            go2color = ret['go2color']
167
            for goid in leaf_gos:
168
                if goid not in go2color:
169
                    go2color[goid] = leaf_go_color
170
171
    @staticmethod
172
    def _update_ret(ret, goids, go2color):
173
        """Update 'GOs' and 'go2color' in dict with goids and go2color."""
174
        if goids:
175
            ret['GOs'].update(goids)
176
        if go2color:
177
            for goid, color in go2color.items():
178
                ret['go2color'][goid] = color
179
180
181
class PlotCli(object):
182
    """Class for command-line interface for creating GO term diagrams"""
183
184
    kws_dict = set(['GO', 'outfile', 'go_file', 'sections', 'S',
185
                    'gaf', 'gene2go', 'taxid',
186
                    'title',
187
                    'obo',
188
                    'go_aliases'])
189
    kws_set = set(['relationship',
190
                   'parentcnt', 'childcnt', 'mark_alt_id', 'shorten',
191
                   'draw-children',
192
                   'norel'])
193
    dflt_outfile = "go_plot.png"
194
    kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten'])
195
196
    def __init__(self, gosubdag=None):
197
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
198
        self.gosubdag = None if gosubdag is None else gosubdag
199
200
    def cli(self):
201
        """Command-line interface for go_draw script."""
202
        kws_all = self.get_docargs(prt=None)
203
        optional_attrs = self._get_optional_attrs(kws_all)
204
        go2obj = GODag(kws_all['obo'], optional_attrs)
205
        # GO kws_all: GO go_file draw-children
206
        goids, go2color = GetGOs(go2obj).get_go_color(**kws_all)
207
        relationships = 'relationship' in optional_attrs
208
        kws_dag = self._get_kwsdag(goids, go2obj, **kws_all)
209
        self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag)
210
211
        if 'sections' in kws_all:
212
            return self._plt_gogrouped(goids, go2color, **kws_all)
213
        else:
214
            return self._plt_gosubdag(goids, go2color, **kws_all)
215
216
    # pylint: disable=unused-argument,no-self-use
217
    def _plt_gogrouped(self, goids, go2color_usr, **kws):
218
        """Plot grouped GO IDs."""
219
        print("Plotting with GOATOOLS grouping coming soon...")
220
   #      fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
221
   #      sections = read_sections(kws['sections'], exclude_ungrouped=True)
222
   #      # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt}
223
   #      grprobj_cur = self._get_grprobj(goids, sections)
224
   #      # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only
225
   #      # BORDER: Black=hdr Blu=hdr&usr
226
   #      grpcolor = GrouperColors(grprobj_cur)  # get_bordercolor get_go2color_users
227
   #      grp_go2color = grpcolor.get_go2color_users()
228
   #      grp_go2bordercolor = grpcolor.get_bordercolor()
229
   #      for goid, color in go2color_usr.items():
230
   #          grp_go2color[goid] = color
231
   #      objcolor = Go2Color(self.gosubdag, objgoea=None,
232
   #                          go2color=grp_go2color, go2bordercolor=grp_go2bordercolor)
233
   #      go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor)
234
   #      objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws)
235
   #      objplt.prt_goids(sys.stdout)
236
   #      objplt.plt_dag(fout_img)
237
   #      sys.stdout.write("{N:>6} sections read\n".format(
238
   #          N="NO" if sections is None else len(sections)))
239
   #      return fout_img
240
241
   #  def _get_grprobj(self, goids, sections):
242
   #      """Get Grouper, given GO IDs and sections."""
243
   #      grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
244
   #      hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
245
   #      return Grouper("sections", goids, hdrobj, self.gosubdag)
246
247
    def _plt_gosubdag(self, goids, go2color, **kws):
248
        """Plot GO IDs."""
249
        print("PLOTTING KWS", kws)
250
        fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
251
        objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color)
252
        objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
253
        objplt.prt_goids(sys.stdout)
254
        objplt.plt_dag(fout_img)
255
        return fout_img
256
257
    def _get_kwsdag(self, goids, go2obj, **kws_all):
258
        """Get keyword args for a GoSubDag."""
259
        kws_dag = {}
260
        # GO letters specified by the user
261
        if 'go_aliases' in kws_all:
262
            fin_go_aliases = kws_all['go_aliases']
263
            if os.path.exists(fin_go_aliases):
264
                go2letter = read_d1_letter(fin_go_aliases)
265
                if go2letter:
266
                    kws_dag['go2letter'] = go2letter
267
        return kws_dag
268
269
    @staticmethod
270
    def _get_tcntobj(goids, go2obj, **kws):
271
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
272
        # kws: gaf (gene2go taxid)
273
        if 'gaf' in kws or 'gene2go' in kws:
274
            return get_tcntobj(go2obj, **kws)  # TermCounts
275
276
    def get_docargs(self, args=None, prt=None):
277
        """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values."""
278
        # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c']))
279
        docargs = self.objdoc.get_docargs(args, prt)
280
        self._chk_docopts(docargs)
281
        return docargs
282
283
    def _chk_docopts(self, kws):
284
        """Check for common user command-line errors."""
285
        # outfile should contain .png, .png, etc.
286
        outfile = kws['outfile']
287
        if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \
288
            kws['outfile'] == self.dflt_outfile:
289
            self._err("NO GO IDS SPECFIED", err=False)
290
        if 'obo' in outfile:
291
            self._err("BAD outfile({O})".format(O=outfile))
292
        if 'gaf' in kws and 'gene2go' in kws:
293
            self._err("SPECIFY ANNOTAIONS FROM ONE FILE")
294
        if 'gene2go' in kws:
295
            if 'taxid' not in kws:
296
                self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE")
297
298
    def _err(self, msg, err=True):
299
        """Print useage and error before exiting."""
300
        severity = "FATAL" if err else "NOTE"
301
        txt = "".join([self.objdoc.doc,
302
                       "User's command-line:\n\n",
303
                       "  % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])),
304
                       "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)])
305
        if err:
306
            raise RuntimeError(txt)
307
        sys.stdout.write(txt)
308
        sys.exit(0)
309
310
    def get_outfile(self, outfile, goids=None, b_rel=False):
311
        """Return output file for GO Term plot."""
312
        # 1. Use the user-specfied output filename for the GO Term plot
313
        if outfile != self.dflt_outfile:
314
            return outfile
315
        rstr = "_r1" if b_rel else ""
316
        # 2. If only plotting 1 GO term, use GO is in plot name
317
        if goids is not None and len(goids) == 1:
318
            goid = next(iter(goids))
319
            goobj = self.gosubdag.go2obj[goid]
320
            fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name)
321
            return "".join([re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), rstr, '.png'])
322
        # 3. Return default name
323
        if not b_rel:
324
            return self.dflt_outfile
325
        else:
326
            return self.dflt_outfile.replace('.png', '_r1.png')
327
328
    @staticmethod
329
    def _get_optional_attrs(kws):
330
        """Given keyword args, return optional_attributes to be loaded into the GODag."""
331
        vals = OboOptionalAttrs.attributes.intersection(kws.keys())
332
        if 'sections' in kws:
333
            vals.add('relationship')
334
        if 'norel' in kws:
335
            vals.discard('relationship')
336
        return vals
337
338
339
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
340