Completed
Push — master ( ab12c1...e91310 )
by
unknown
52s
created

PlotCli._plt_gogrouped()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
c 1
b 0
f 0
dl 0
loc 3
rs 10
1
"""Command-line script to create GO term diagrams
2
3
Usage:
4
  go_plot.py [GO ...] [options]
5
  go_plot.py [GO ...] [--obo=<file.obo>] [--outfile=<file.png>] [--title=<title>]
6
             [--go_file=<file.txt>]
7
             [--relationship]
8
             [--sections=<sections.txt>]
9
             [--gaf=<file.gaf>]
10
             [--gene2go=<gene2go>] [--taxid=<Taxonomy_number>]
11
             [--shorten]
12
             [--parentcnt] [--childcnt] [--mark_alt_id]
13
             [--go_aliases=<go_aliases.txt>]
14
             [--draw-children]
15
             [--norel]
16
  go_plot.py [GO ...] [--obo=<file.obo>] [-o <file.png>] [-t <title>]
17
             [--shorten] [-p] [-c]
18
  go_plot.py [GO ...] [-o <file.png>] [--draw-children]
19
  go_plot.py [GO ...] [-o <file.png>] [--draw-children] [--shorten]
20
  go_plot.py [--obo=<file.obo>]
21
  go_plot.py [--obo=<file.obo>] [--outfile=<file.png>]
22
  go_plot.py [GO ...]
23
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>]
24
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] [--shorten]
25
  go_plot.py [GO ...] [-o <file.png>] [-t <title>]
26
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt]
27
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--childcnt]
28
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] [--childcnt]
29
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p]
30
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] [-c]
31
32
Options:
33
  -h --help                            show this help message and exit
34
  -i --go_file=<file.txt>              GO IDs in an ASCII file
35
  -o <file.png>, --outfile=<file.png>  Plot file name [default: go_plot.png]
36
  -r --relationship                    Plot all relationships
37
  -s <sections.txt> --sections=<sections.txt>  Sections file for grouping
38
  -S <sections module str>             Sections file for grouping
39
40
  --gaf=<file.gaf>                     Annotations from a gaf file
41
  --gene2go=<gene2go>                  Annotations from a gene2go file downloaded from NCBI
42
43
  --obo=<file.obo>                     Ontologies in obo file [default: go-basic.obo].
44
45
  -t <title>, --title=<title>          Title string to place in image
46
  -p --parentcnt                       Include parent count in each GO term
47
  -c --childcnt                        Include child count in each GO term
48
  --shorten                            Shorten the GO name on plots
49
  --mark_alt_id                        Add 'a' if GO ID is an alternate ID: GO:0007582a
50
  --draw-children                      Draw children. By default, they are not drawn.
51
  --go_aliases=<go_aliases.txt>        ASCII file containing letter alias
52
53
  --norel                              Don't load relationship from the GO DAG
54
"""
55
56
from __future__ import print_function
57
58
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
59
__author__ = "DV Klopfenstein"
60
61
62
import re
63
import os
64
import sys
65
66
from goatools.obo_parser import GODag
67
from goatools.associations import get_tcntobj
68
from goatools.godag.obo_optional_attributes import OboOptionalAttrs
69
70
from goatools.cli.docopt_parse import DocOptParse
71
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
72
from goatools.gosubdag.plot.go2color import Go2Color
73
from goatools.gosubdag.gosubdag import GoSubDag
74
from goatools.gosubdag.go_tasks import get_go2obj_unique
75
from goatools.gosubdag.go_tasks import get_leaf_children
76
from goatools.gosubdag.rpt.wr_xlsx import read_d1_letter
77
# COMING SOON: Plotting using GOATOOLS grouping:
78
#   from goatools.gosubdag.rpt.read_goids import read_sections
79
#   from goatools.grouper.grprdflts import GrouperDflts
80
#   from goatools.grouper.hdrgos import HdrgosSections
81
#   from goatools.grouper.grprobj import Grouper
82
#   from goatools.grouper.colors import GrouperColors
83
#   from goatools.grouper.grprplt import GrouperPlot
84
85
86
# pylint: disable=too-few-public-methods
87
class GetGOs(object):
88
    """Return a list of GO IDs for plotting."""
89
90
    exp_color_chars = set('ABCDEFabcdef0123456789')
91
    exp_kws_dct = set(['GO', 'go_file'])
92
    exp_kws_set = set(['draw-children'])
93
    max_gos = 200  # Maximum number of source GO IDs
94
95
    def __init__(self, go2obj):
96
        self.go2obj = go2obj
97
        self.re_goids = re.compile(r"(GO:\d{7})+?")
98
        self.re_color = re.compile(r"(#[0-9a-fA-F]{6})+?")
99
100
    def get_go_color(self, **kws):
101
        """Return source GO IDs ."""
102
        ret = {'GOs':set(), 'go2color':{}}
103
        if 'GO' in kws:
104
            self._goargs(ret, kws['GO'])
105
        if 'go_file' in kws:
106
            self._rdtxt_gos(ret, kws['go_file'])
107
        if 'draw-children' in kws:
108
            ret['GOs'].update(get_leaf_children(ret['GOs'], self.go2obj))
109
        # If there have been no GO IDs explicitly specified by the user
110
        if not ret['GOs']:
111
            # If the GO-DAG is sufficiently small, print all GO IDs
112
            if len(self.go2obj) < self.max_gos:
113
                main_gos = set(o.id for go, o in self.go2obj.items() if go != o.id)
114
                go_leafs = set(go for go, o in self.go2obj.items() if not o.children)
115
                ret['GOs'] = go_leafs.difference(main_gos)
116
            else:
117
                raise RuntimeError("GO IDs NEEDED")
118
        go2obj = {go:self.go2obj[go] for go in ret['GOs']}
119
        ret['GOs'] = set(get_go2obj_unique(go2obj))
120
        return [ret['GOs'], ret['go2color']]
121
122 View Code Duplication
    def _goargs(self, ret, go_args):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
123
        """Get GO IDs and colors for GO IDs from the GO ID runtime arguments."""
124
        goids = set()
125
        go2color = {}
126
        # Match on "GO ID" or "GO ID and color"
127
        re_gocolor = re.compile(r'(GO:\d{7})((?:#[0-9a-fA-F]{6})?)')
128
        for go_arg in go_args:
129
            mtch = re_gocolor.match(go_arg)
130
            if mtch:
131
                goid, color = mtch.groups()
132
                goids.add(goid)
133
                if color:
134
                    go2color[goid] = color
135
            else:
136
                print("WARNING: UNRECOGNIZED ARG({})".format(go_arg))
137
        self._update_ret(ret, goids, go2color)
138
139
    def _rdtxt_gos(self, ret, go_file):
140
        """Read GO IDs from a file."""
141
        if not os.path.exists(go_file):
142
            raise RuntimeError("CAN NOT READ: {FILE}\n".format(FILE=go_file))
143
        goids = set()
144
        go2color = {}
145
        with open(go_file) as ifstrm:
146
            for line in ifstrm:
147
                goids_found = self.re_goids.findall(line)
148
                if goids_found:
149
                    goids.update(goids_found)
150
                    colors = self.re_color.findall(line)
151
                    if colors:
152
                        if len(goids_found) == len(colors):
153
                            for goid, color in zip(goids_found, colors):
154
                                go2color[goid] = color
155
                        else:
156
                            print("IGNORING: {L}".format(L=line),)
157
        self._update_ret(ret, goids, go2color)
158
159
    @staticmethod
160
    def _update_ret(ret, goids, go2color):
161
        """Update 'GOs' and 'go2color' in dict with goids and go2color."""
162
        if goids:
163
            ret['GOs'].update(goids)
164
        if go2color:
165
            for goid, color in go2color.items():
166
                ret['go2color'][goid] = color
167
168
169
class PlotCli(object):
170
    """Class for command-line interface for creating GO term diagrams"""
171
172
    kws_dict = set(['GO', 'outfile', 'go_file', 'sections', 'S',
173
                    'gaf', 'gene2go', 'taxid',
174
                    'title',
175
                    'obo',
176
                    'go_aliases'])
177
    kws_set = set(['relationship',
178
                   'parentcnt', 'childcnt', 'mark_alt_id', 'shorten',
179
                   'draw-children',
180
                   'norel'])
181
    dflt_outfile = "go_plot.png"
182
    kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten'])
183
184
    def __init__(self, gosubdag=None):
185
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
186
        self.gosubdag = None if gosubdag is None else gosubdag
187
188
    def cli(self):
189
        """Command-line interface for go_draw script."""
190
        kws_all = self.get_docargs(prt=None)
191
        optional_attrs = self._get_optional_attrs(kws_all)
192
        go2obj = GODag(kws_all['obo'], optional_attrs)
193
        # GO kws_all: GO go_file draw-children
194
        goids, go2color = GetGOs(go2obj).get_go_color(**kws_all)
195
        relationships = 'relationship' in optional_attrs
196
        kws_dag = self._get_kwsdag(goids, go2obj, **kws_all)
197
        self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag)
198
199
        if 'sections' in kws_all:
200
            return self._plt_gogrouped(goids, go2color, **kws_all)
201
        else:
202
            return self._plt_gosubdag(goids, go2color, **kws_all)
203
204
    # pylint: disable=unused-argument,no-self-use
205
    def _plt_gogrouped(self, goids, go2color_usr, **kws):
206
        """Plot grouped GO IDs."""
207
        print("Plotting with GOATOOLS grouping coming soon...")
208
   #      fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
209
   #      sections = read_sections(kws['sections'], exclude_ungrouped=True)
210
   #      # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt}
211
   #      grprobj_cur = self._get_grprobj(goids, sections)
212
   #      # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only
213
   #      # BORDER: Black=hdr Blu=hdr&usr
214
   #      grpcolor = GrouperColors(grprobj_cur)  # get_bordercolor get_go2color_users
215
   #      grp_go2color = grpcolor.get_go2color_users()
216
   #      grp_go2bordercolor = grpcolor.get_bordercolor()
217
   #      for goid, color in go2color_usr.items():
218
   #          grp_go2color[goid] = color
219
   #      objcolor = Go2Color(self.gosubdag, objgoea=None,
220
   #                          go2color=grp_go2color, go2bordercolor=grp_go2bordercolor)
221
   #      go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor)
222
   #      objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws)
223
   #      objplt.prt_goids(sys.stdout)
224
   #      objplt.plt_dag(fout_img)
225
   #      sys.stdout.write("{N:>6} sections read\n".format(
226
   #          N="NO" if sections is None else len(sections)))
227
   #      return fout_img
228
229
   #  def _get_grprobj(self, goids, sections):
230
   #      """Get Grouper, given GO IDs and sections."""
231
   #      grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
232
   #      hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
233
   #      return Grouper("sections", goids, hdrobj, self.gosubdag)
234
235
    def _plt_gosubdag(self, goids, go2color, **kws):
236
        """Plot GO IDs."""
237
        print("PLOTTING KWS", kws)
238
        fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
239
        objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color)
240
        objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
241
        objplt.prt_goids(sys.stdout)
242
        objplt.plt_dag(fout_img)
243
        return fout_img
244
245
    def _get_kwsdag(self, goids, go2obj, **kws_all):
246
        """Get keyword args for a GoSubDag."""
247
        kws_dag = {}
248
        # GO letters specified by the user
249
        if 'go_aliases' in kws_all:
250
            fin_go_aliases = kws_all['go_aliases']
251
            if os.path.exists(fin_go_aliases):
252
                go2letter = read_d1_letter(fin_go_aliases)
253
                if go2letter:
254
                    kws_dag['go2letter'] = go2letter
255
        return kws_dag
256
257
    @staticmethod
258
    def _get_tcntobj(goids, go2obj, **kws):
259
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
260
        # kws: gaf (gene2go taxid)
261
        if 'gaf' in kws or 'gene2go' in kws:
262
            return get_tcntobj(go2obj, **kws)  # TermCounts
263
264
    def get_docargs(self, args=None, prt=None):
265
        """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values."""
266
        # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c']))
267
        docargs = self.objdoc.get_docargs(args, prt)
268
        self._chk_docopts(docargs)
269
        return docargs
270
271
    def _chk_docopts(self, kws):
272
        """Check for common user command-line errors."""
273
        # outfile should contain .png, .png, etc.
274
        outfile = kws['outfile']
275
        if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \
276
            kws['outfile'] == self.dflt_outfile:
277
            self._err("NO GO IDS SPECFIED", err=False)
278
        if 'obo' in outfile:
279
            self._err("BAD outfile({O})".format(O=outfile))
280
        if 'gaf' in kws and 'gene2go' in kws:
281
            self._err("SPECIFY ANNOTAIONS FROM ONE FILE")
282
        if 'gene2go' in kws:
283
            if 'taxid' not in kws:
284
                self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE")
285
286
    def _err(self, msg, err=True):
287
        """Print useage and error before exiting."""
288
        severity = "FATAL" if err else "NOTE"
289
        txt = "".join([self.objdoc.doc,
290
                       "User's command-line:\n\n",
291
                       "  % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])),
292
                       "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)])
293
        if err:
294
            raise RuntimeError(txt)
295
        sys.stdout.write(txt)
296
        sys.exit(0)
297
298
    def get_outfile(self, outfile, goids=None, b_rel=False):
299
        """Return output file for GO Term plot."""
300
        # 1. Use the user-specfied output filename for the GO Term plot
301
        if outfile != self.dflt_outfile:
302
            return outfile
303
        rstr = "_r1" if b_rel else ""
304
        # 2. If only plotting 1 GO term, use GO is in plot name
305
        if goids is not None and len(goids) == 1:
306
            goid = next(iter(goids))
307
            goobj = self.gosubdag.go2obj[goid]
308
            fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name)
309
            return "".join([re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), rstr, '.png'])
310
        # 3. Return default name
311
        if not b_rel:
312
            return self.dflt_outfile
313
        else:
314
            return self.dflt_outfile.replace('.png', '_r1.png')
315
316
    @staticmethod
317
    def _get_optional_attrs(kws):
318
        """Given keyword args, return optional_attributes to be loaded into the GODag."""
319
        vals = OboOptionalAttrs.attributes.intersection(kws.keys())
320
        if 'sections' in kws:
321
            vals.add('relationship')
322
        if 'norel' in kws:
323
            vals.discard('relationship')
324
        return vals
325
326
327
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
328