Completed
Push — master ( 0f596f...821888 )
by
unknown
01:11
created

PlotCli   A

Complexity

Total Complexity 33

Size/Duplication

Total Lines 151
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 151
rs 9.3999
wmc 33

11 Methods

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 3 2
A get_docargs() 0 6 1
C _chk_docopts() 0 14 9
A _get_optional_attrs() 0 9 3
A get_outfile() 0 13 4
A cli() 0 15 2
A _get_tcntobj() 0 6 3
A _err() 0 11 3
A _get_kwsdag() 0 11 4
A _plt_gosubdag() 0 8 1
A _plt_gogrouped() 0 3 1
1
"""Command-line script to create GO term diagrams
2
3
Usage:
4
  go_plot.py [GO ...] [options]
5
  go_plot.py [GO ...] [--obo=<file.obo>] [--outfile=<file.png>] [--title=<title>]
6
             [--go_file=<file.txt>]
7
             [--relationship]
8
             [--sections=<sections.txt>]
9
             [--gaf=<file.gaf>]
10
             [--gene2go=<gene2go>] [--taxid=<Taxonomy_number>]
11
             [--shorten]
12
             [--parentcnt] [--childcnt] [--mark_alt_id]
13
             [--go_aliases=<go_aliases.txt>]
14
             [--draw-children]
15
             [--norel]
16
  go_plot.py [GO ...] [--obo=<file.obo>] [-o <file.png>] [-t <title>]
17
             [--shorten] [-p] [-c]
18
  go_plot.py [GO ...] [-o <file.png>] [--draw-children]
19
  go_plot.py [GO ...] [-o <file.png>] [--draw-children] [--shorten]
20
  go_plot.py [--obo=<file.obo>]
21
  go_plot.py [--obo=<file.obo>] [--outfile=<file.png>]
22
  go_plot.py [GO ...]
23
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>]
24
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] [--shorten]
25
  go_plot.py [GO ...] [-o <file.png>] [-t <title>]
26
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt]
27
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--childcnt]
28
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] [--childcnt]
29
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p]
30
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] [-c]
31
32
Options:
33
  -h --help                            show this help message and exit
34
  -i --go_file=<file.txt>              GO IDs in an ASCII file
35
  -o <file.png>, --outfile=<file.png>  Plot file name [default: go_plot.png]
36
  -r --relationship                    Plot all relationships
37
  -s <sections.txt> --sections=<sections.txt>  Sections file for grouping
38
  -S <sections module str>             Sections file for grouping
39
40
  --gaf=<file.gaf>                     Annotations from a gaf file
41
  --gene2go=<gene2go>                  Annotations from a gene2go file downloaded from NCBI
42
43
  --obo=<file.obo>                     Ontologies in obo file [default: go-basic.obo].
44
45
  -t <title>, --title=<title>          Title string to place in image
46
  -p --parentcnt                       Include parent count in each GO term
47
  -c --childcnt                        Include child count in each GO term
48
  --shorten                            Shorten the GO name on plots
49
  --mark_alt_id                        Add 'a' if GO ID is an alternate ID: GO:0007582a
50
  --draw-children                      Draw children. By default, they are not drawn.
51
  --go_aliases=<go_aliases.txt>        ASCII file containing letter alias
52
53
  --norel                              Don't load relationship from the GO DAG
54
"""
55
56
from __future__ import print_function
57
58
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
59
__author__ = "DV Klopfenstein"
60
61
62
import re
63
import os
64
import sys
65
66
from goatools.obo_parser import GODag
67
from goatools.associations import get_tcntobj
68
from goatools.godag.obo_optional_attributes import OboOptionalAttrs
69
70
from goatools.cli.docopt_parse import DocOptParse
71
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
72
from goatools.gosubdag.plot.go2color import Go2Color
73
from goatools.gosubdag.gosubdag import GoSubDag
74
from goatools.gosubdag.go_tasks import get_go2obj_unique
75
from goatools.gosubdag.go_tasks import get_leaf_children
76
from goatools.gosubdag.rpt.wr_xlsx import read_d1_letter
77
# COMING SOON: Plotting using GOATOOLS grouping:
78
#   from goatools.gosubdag.rpt.read_goids import read_sections
79
#   from goatools.grouper.grprdflts import GrouperDflts
80
#   from goatools.grouper.hdrgos import HdrgosSections
81
#   from goatools.grouper.grprobj import Grouper
82
#   from goatools.grouper.colors import GrouperColors
83
#   from goatools.grouper.grprplt import GrouperPlot
84
85
86
# pylint: disable=too-few-public-methods
87
class GetGOs(object):
88
    """Return a list of GO IDs for plotting."""
89
90
    exp_color_chars = set('ABCDEFabcdef0123456789')
91
    exp_kws_dct = set(['GO', 'go_file'])
92
    exp_kws_set = set(['draw-children'])
93
    max_gos = 200  # Maximum number of source GO IDs
94
95
    def __init__(self, go2obj):
96
        self.go2obj = go2obj
97
        self.re_goids = re.compile(r"(GO:\d{7})+?")
98
        self.re_color = re.compile(r"(#[0-9a-fA-F]{6})+?")
99
100
    def get_go_color(self, **kws):
101
        """Return source GO IDs ."""
102
        ret = {'GOs':set(), 'go2color':{}}
103
        if 'GO' in kws:
104
            self._goargs(ret, kws['GO'])
105
        if 'go_file' in kws:
106
            self._rdtxt_gos(ret, kws['go_file'])
107
        if 'draw-children' in kws:
108
            ret['GOs'].update(get_leaf_children(ret['GOs'], self.go2obj))
109
        # If there have been no GO IDs explicitly specified by the user
110
        if not ret['GOs']:
111
            # If the GO-DAG is sufficiently small, print all GO IDs
112
            if len(self.go2obj) < self.max_gos:
113
                main_gos = set(o.id for go, o in self.go2obj.items() if go != o.id)
114
                go_leafs = set(go for go, o in self.go2obj.items() if not o.children)
115
                ret['GOs'] = go_leafs.difference(main_gos)
116
            else:
117
                raise RuntimeError("GO IDs NEEDED")
118
        go2obj = {go:self.go2obj[go] for go in ret['GOs']}
119
        ret['GOs'] = set(get_go2obj_unique(go2obj))
120
        return [ret['GOs'], ret['go2color']]
121
122 View Code Duplication
    def _goargs(self, ret, go_args):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
123
        """Get GO IDs and colors for GO IDs from the GO ID runtime arguments."""
124
        goids = set()
125
        go2color = {}
126
        # Match on "GO ID" or "GO ID and color"
127
        re_gocolor = re.compile(r'(GO:\d{7})((?:#[0-9a-fA-F]{6})?)')
128
        for go_arg in go_args:
129
            mtch = re_gocolor.match(go_arg)
130
            if mtch:
131
                goid, color = mtch.groups()
132
                goids.add(goid)
133
                if color:
134
                    go2color[goid] = color
135
            else:
136
                print("WARNING: UNRECOGNIZED ARG({})".format(go_arg))
137
        self._update_ret(ret, goids, go2color)
138
139
    def _rdtxt_gos(self, ret, go_file):
140
        """Read GO IDs from a file."""
141
        if not os.path.exists(go_file):
142
            raise RuntimeError("CAN NOT READ: {FILE}\n".format(FILE=go_file))
143
        goids = set()
144
        go2color = {}
145
        with open(go_file) as ifstrm:
146
            for line in ifstrm:
147
                goids_found = self.re_goids.findall(line)
148
                if goids_found:
149
                    goids.update(goids_found)
150
                    colors = self.re_color.findall(line)
151
                    if colors:
152
                        if len(goids_found) == len(colors):
153
                            for goid, color in zip(goids_found, colors):
154
                                go2color[goid] = color
155
                        else:
156
                            print("IGNORING: {L}".format(L=line),)
157
        self._update_ret(ret, goids, go2color)
158
159
    @staticmethod
160
    def _update_ret(ret, goids, go2color):
161
        """Update 'GOs' and 'go2color' in dict with goids and go2color."""
162
        if goids:
163
            ret['GOs'].update(goids)
164
        if go2color:
165
            for goid, color in go2color.items():
166
                ret['go2color'][goid] = color
167
168
169
class PlotCli(object):
170
    """Class for command-line interface for creating GO term diagrams"""
171
172
    kws_dict = set(['GO', 'outfile', 'go_file', 'sections', 'S',
173
                    'gaf', 'gene2go', 'taxid',
174
                    'title',
175
                    'obo',
176
                    'go_aliases'])
177
    kws_set = set(['relationship',
178
                   'parentcnt', 'childcnt', 'mark_alt_id', 'shorten',
179
                   'draw-children',
180
                   'norel'])
181
    dflt_outfile = "go_plot.png"
182
    kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten'])
183
184
    def __init__(self, gosubdag=None):
185
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
186
        self.gosubdag = None if gosubdag is None else gosubdag
187
188
    def cli(self):
189
        """Command-line interface for go_draw script."""
190
        kws_all = self.get_docargs(prt=None)
191
        optional_attrs = self._get_optional_attrs(kws_all)
192
        go2obj = GODag(kws_all['obo'], optional_attrs)
193
        # GO kws_all: GO go_file draw-children
194
        goids, go2color = GetGOs(go2obj).get_go_color(**kws_all)
195
        relationships = 'relationship' in optional_attrs
196
        kws_dag = self._get_kwsdag(goids, go2obj, **kws_all)
197
        self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag)
198
199
        if 'sections' in kws_all:
200
            return self._plt_gogrouped(goids, go2color, **kws_all)
201
        else:
202
            return self._plt_gosubdag(goids, go2color, **kws_all)
203
204
    # pylint: disable=unused-argument,no-self-use
205
    def _plt_gogrouped(self, goids, go2color_usr, **kws):
206
        """Plot grouped GO IDs."""
207
        print("Plotting with GOATOOLS grouping coming soon...")
208
   #      fout_img = self.get_outfile(kws['outfile'], goids)
209
   #      sections = read_sections(kws['sections'], exclude_ungrouped=True)
210
   #      # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt}
211
   #      grprobj_cur = self._get_grprobj(goids, sections)
212
   #      # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only
213
   #      # BORDER: Black=hdr Blu=hdr&usr
214
   #      grpcolor = GrouperColors(grprobj_cur)  # get_bordercolor get_go2color_users
215
   #      grp_go2color = grpcolor.get_go2color_users()
216
   #      grp_go2bordercolor = grpcolor.get_bordercolor()
217
   #      for goid, color in go2color_usr.items():
218
   #          grp_go2color[goid] = color
219
   #      objcolor = Go2Color(self.gosubdag, objgoea=None,
220
   #                          go2color=grp_go2color, go2bordercolor=grp_go2bordercolor)
221
   #      go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor)
222
   #      objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws)
223
   #      objplt.prt_goids(sys.stdout)
224
   #      objplt.plt_dag(fout_img)
225
   #      sys.stdout.write("{N:>6} sections read\n".format(
226
   #          N="NO" if sections is None else len(sections)))
227
   #      return fout_img
228
229
   #  def _get_grprobj(self, goids, sections):
230
   #      """Get Grouper, given GO IDs and sections."""
231
   #      grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
232
   #      hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
233
   #      return Grouper("sections", goids, hdrobj, self.gosubdag)
234
235
    def _plt_gosubdag(self, goids, go2color, **kws):
236
        """Plot GO IDs."""
237
        fout_img = self.get_outfile(kws['outfile'], goids)
238
        objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color)
239
        objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
240
        objplt.prt_goids(sys.stdout)
241
        objplt.plt_dag(fout_img)
242
        return fout_img
243
244
    def _get_kwsdag(self, goids, go2obj, **kws_all):
245
        """Get keyword args for a GoSubDag."""
246
        kws_dag = {}
247
        # GO letters specified by the user
248
        if 'go_aliases' in kws_all:
249
            fin_go_aliases = kws_all['go_aliases']
250
            if os.path.exists(fin_go_aliases):
251
                go2letter = read_d1_letter(fin_go_aliases)
252
                if go2letter:
253
                    kws_dag['go2letter'] = go2letter
254
        return kws_dag
255
256
    @staticmethod
257
    def _get_tcntobj(goids, go2obj, **kws):
258
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
259
        # kws: gaf (gene2go taxid)
260
        if 'gaf' in kws or 'gene2go' in kws:
261
            return get_tcntobj(go2obj, **kws)  # TermCounts
262
263
    def get_docargs(self, args=None, prt=None):
264
        """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values."""
265
        # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c']))
266
        docargs = self.objdoc.get_docargs(args, prt)
267
        self._chk_docopts(docargs)
268
        return docargs
269
270
    def _chk_docopts(self, kws):
271
        """Check for common user command-line errors."""
272
        # outfile should contain .png, .png, etc.
273
        outfile = kws['outfile']
274
        if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \
275
            kws['outfile'] == self.dflt_outfile:
276
            self._err("NO GO IDS SPECFIED", err=False)
277
        if 'obo' in outfile:
278
            self._err("BAD outfile({O})".format(O=outfile))
279
        if 'gaf' in kws and 'gene2go' in kws:
280
            self._err("SPECIFY ANNOTAIONS FROM ONE FILE")
281
        if 'gene2go' in kws:
282
            if 'taxid' not in kws:
283
                self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE")
284
285
    def _err(self, msg, err=True):
286
        """Print useage and error before exiting."""
287
        severity = "FATAL" if err else "NOTE"
288
        txt = "".join([self.objdoc.doc,
289
                       "User's command-line:\n\n",
290
                       "  % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])),
291
                       "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)])
292
        if err:
293
            raise RuntimeError(txt)
294
        sys.stdout.write(txt)
295
        sys.exit(0)
296
297
    def get_outfile(self, outfile, goids=None):
298
        """Return output file for GO Term plot."""
299
        # 1. Use the user-specfied output filename for the GO Term plot
300
        if outfile != self.dflt_outfile:
301
            return outfile
302
        # 2. If only plotting 1 GO term, use GO is in plot name
303
        if goids is not None and len(goids) == 1:
304
            goid = next(iter(goids))
305
            goobj = self.gosubdag.go2obj[goid]
306
            fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name)
307
            return ".".join([re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), 'png'])
308
        # 3. Return default name
309
        return self.dflt_outfile
310
311
    @staticmethod
312
    def _get_optional_attrs(kws):
313
        """Given keyword args, return optional_attributes to be loaded into the GODag."""
314
        vals = OboOptionalAttrs.attributes.intersection(kws.keys())
315
        if 'sections' in kws:
316
            vals.add('relationship')
317
        if 'norel' in kws:
318
            vals.discard('relationship')
319
        return vals
320
321
322
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
323