Completed
Push — master ( 3d9366...ca146f )
by
unknown
01:24
created

GetGOs._rdtxt_gos()   C

Complexity

Conditions 8

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 8
c 1
b 0
f 0
dl 0
loc 19
rs 6.6666
1
"""Command-line script to create GO term diagrams
2
3
Usage:
4
  go_plot.py [GO ...] [options]
5
  go_plot.py [GO ...] [--obo=<file.obo>] [--outfile=<file.png>] [--title=<title>]
6
             [--go_file=<file.txt>]
7
             [--relationship]
8
             [--sections=<sections.txt>]
9
             [--gaf=<file.gaf>]
10
             [--gene2go=<gene2go>] [--taxid=<Taxonomy_number>]
11
             [--shorten]
12
             [--parentcnt] [--childcnt] [--mark_alt_id]
13
             [--go_aliases=<go_aliases.txt>]
14
             [--draw-children]
15
             [--norel]
16
  go_plot.py [GO ...] [--obo=<file.obo>] [-o <file.png>] [-t <title>]
17
             [--shorten] [-p] [-c]
18
  go_plot.py [GO ...] [-o <file.png>] [--draw-children]
19
  go_plot.py [GO ...] [-o <file.png>] [--draw-children] [--shorten]
20
  go_plot.py [--obo=<file.obo>]
21
  go_plot.py [--obo=<file.obo>] [--outfile=<file.png>]
22
  go_plot.py [GO ...]
23
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>]
24
  go_plot.py [GO ...] [--outfile=<file.png>] [--title=<title>] [--shorten]
25
  go_plot.py [GO ...] [-o <file.png>] [-t <title>]
26
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt]
27
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--childcnt]
28
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [--parentcnt] [--childcnt]
29
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p]
30
  go_plot.py [GO ...] [-o <file.png>] [-t <title>] [-p] [-c]
31
32
Options:
33
  -h --help                            show this help message and exit
34
  -i --go_file=<file.txt>              GO IDs in an ASCII file
35
  -o <file.png>, --outfile=<file.png>  Plot file name [default: go_plot.png]
36
  -r --relationship                    Plot all relationships
37
  -s <sections.txt> --sections=<sections.txt>  Sections file for grouping
38
  -S <sections module str>             Sections file for grouping
39
40
  --gaf=<file.gaf>                     Annotations from a gaf file
41
  --gene2go=<gene2go>                  Annotations from a gene2go file downloaded from NCBI
42
43
  --obo=<file.obo>                     Ontologies in obo file [default: go-basic.obo].
44
45
  -t <title>, --title=<title>          Title string to place in image
46
  -p --parentcnt                       Include parent count in each GO term
47
  -c --childcnt                        Include child count in each GO term
48
  --shorten                            Shorten the GO name on plots
49
  --mark_alt_id                        Add 'a' if GO ID is an alternate ID: GO:0007582a
50
  --draw-children                      Draw children. By default, they are not drawn.
51
  --go_aliases=<go_aliases.txt>        ASCII file containing letter alias
52
53
  --norel                              Don't load relationship from the GO DAG
54
"""
55
56
from __future__ import print_function
57
58
__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved."
59
__author__ = "DV Klopfenstein"
60
61
62
import re
63
import os
64
import sys
65
66
from goatools.obo_parser import GODag
67
from goatools.associations import get_tcntobj
68
from goatools.godag.obo_optional_attributes import OboOptionalAttrs
69
70
from goatools.cli.docopt_parse import DocOptParse
71
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
72
from goatools.gosubdag.plot.go2color import Go2Color
73
from goatools.gosubdag.gosubdag import GoSubDag
74
from goatools.gosubdag.go_tasks import get_go2obj_unique
75
from goatools.gosubdag.go_tasks import get_leaf_children
76
from goatools.gosubdag.rpt.wr_xlsx import read_d1_letter
77
# COMING SOON: Plotting using GOATOOLS grouping:
78
#   from goatools.gosubdag.rpt.read_goids import read_sections
79
#   from goatools.grouper.grprdflts import GrouperDflts
80
#   from goatools.grouper.hdrgos import HdrgosSections
81
#   from goatools.grouper.grprobj import Grouper
82
#   from goatools.grouper.colors import GrouperColors
83
#   from goatools.grouper.grprplt import GrouperPlot
84
85
86
# pylint: disable=too-few-public-methods
87
class GetGOs(object):
88
    """Return a list of GO IDs for plotting."""
89
90
    exp_color_chars = set('ABCDEFabcdef0123456789')
91
    exp_kws_dct = set(['GO', 'go_file'])
92
    exp_kws_set = set(['draw-children'])
93
    max_gos = 200  # Maximum number of source GO IDs
94
95
    def __init__(self, go2obj):
96
        self.go2obj = go2obj
97
        self.re_goids = re.compile(r"(GO:\d{7})+?")
98
        self.re_color = re.compile(r"(#[0-9a-fA-F]{6})+?")
99
100
    def get_go_color(self, **kws):
101
        """Return source GO IDs ."""
102
        # kws: GO go_file draw-children
103
        ret = {'GOs':set(), 'go2color':{}}
104
        if 'GO' in kws:
105
            self._goargs(ret, kws['GO'])
106
        if 'go_file' in kws:
107
            self._rdtxt_gos(ret, kws['go_file'])
108
        if 'draw-children' in kws:
109
            self._add_gochildleaf(ret)
110
        # If there have been no GO IDs explicitly specified by the user
111
        if not ret['GOs']:
112
            # If the GO-DAG is sufficiently small, print all GO IDs
113
            if len(self.go2obj) < self.max_gos:
114
                self._add_all_leafs(ret)
115
            else:
116
                raise RuntimeError("GO IDs NEEDED")
117
        go2obj = {go:self.go2obj[go] for go in ret['GOs']}
118
        ret['GOs'] = set(get_go2obj_unique(go2obj))
119
        return [ret['GOs'], ret['go2color']]
120
121
    def _goargs(self, ret, go_args):
122 View Code Duplication
        """Get GO IDs and colors for GO IDs from the GO ID runtime arguments."""
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
123
        goids = set()
124
        go2color = {}
125
        # Match on "GO ID" or "GO ID and color"
126
        re_gocolor = re.compile(r'(GO:\d{7})((?:#[0-9a-fA-F]{6})?)')
127
        for go_arg in go_args:
128
            mtch = re_gocolor.match(go_arg)
129
            if mtch:
130
                goid, color = mtch.groups()
131
                goids.add(goid)
132
                if color:
133
                    go2color[goid] = color
134
            else:
135
                print("WARNING: UNRECOGNIZED ARG({})".format(go_arg))
136
        self._update_ret(ret, goids, go2color)
137
138
    def _rdtxt_gos(self, ret, go_file):
139
        """Read GO IDs from a file."""
140
        if not os.path.exists(go_file):
141
            raise RuntimeError("CAN NOT READ: {FILE}\n".format(FILE=go_file))
142
        goids = set()
143
        go2color = {}
144
        with open(go_file) as ifstrm:
145
            for line in ifstrm:
146
                goids_found = self.re_goids.findall(line)
147
                if goids_found:
148
                    goids.update(goids_found)
149
                    colors = self.re_color.findall(line)
150
                    if colors:
151
                        if len(goids_found) == len(colors):
152
                            for goid, color in zip(goids_found, colors):
153
                                go2color[goid] = color
154
                        else:
155
                            print("IGNORING: {L}".format(L=line),)
156
        self._update_ret(ret, goids, go2color)
157
158
    def _add_gochildleaf(self, ret):
159
        """Add leaf-level GO children to GO list colored uniquely."""
160
        leaf_gos = get_leaf_children(ret['GOs'], self.go2obj)
161
        if leaf_gos:
162
            ret['GOs'].update(leaf_gos)
163
            leaf_go_color = Go2Color.key2col['go_leafchild']
164
            go2color = ret['go2color']
165
            for goid in leaf_gos:
166
                if goid not in go2color:
167
                    go2color[goid] = leaf_go_color
168
169
    def _add_all_leafs(self, ret):
170
        """Print all GO IDs."""
171
        main_gos = set(o.id for go, o in self.go2obj.items() if go != o.id)
172
        go_leafs = set(go for go, o in self.go2obj.items() if not o.children)
173
        ret['GOs'] = go_leafs.difference(main_gos)
174
175
    @staticmethod
176
    def _update_ret(ret, goids, go2color):
177
        """Update 'GOs' and 'go2color' in dict with goids and go2color."""
178
        if goids:
179
            ret['GOs'].update(goids)
180
        if go2color:
181
            for goid, color in go2color.items():
182
                ret['go2color'][goid] = color
183
184
185
class PlotCli(object):
186
    """Class for command-line interface for creating GO term diagrams"""
187
188
    kws_dict = set(['GO', 'outfile', 'go_file', 'sections', 'S',
189
                    'gaf', 'gene2go', 'taxid',
190
                    'title',
191
                    'obo',
192
                    'go_aliases'])
193
    kws_set = set(['relationship',
194
                   'parentcnt', 'childcnt', 'mark_alt_id', 'shorten',
195
                   'draw-children',
196
                   'norel'])
197
    dflt_outfile = "go_plot.png"
198
    kws_plt = set(['parentcnt', 'childcnt', 'mark_alt_id', 'shorten'])
199
200
    def __init__(self, gosubdag=None):
201
        self.objdoc = DocOptParse(__doc__, self.kws_dict, self.kws_set)
202
        self.gosubdag = None if gosubdag is None else gosubdag
203
204
    def cli(self):
205
        """Command-line interface for go_draw script."""
206
        kws_all = self.get_docargs(prt=None)
207
        optional_attrs = self._get_optional_attrs(kws_all)
208
        go2obj = GODag(kws_all['obo'], optional_attrs)
209
        # GO kws_all: GO go_file draw-children
210
        goids, go2color = GetGOs(go2obj).get_go_color(**kws_all)
211
        relationships = 'relationship' in optional_attrs
212
        kws_dag = self._get_kwsdag(goids, go2obj, **kws_all)
213
        self.gosubdag = GoSubDag(goids, go2obj, relationships, **kws_dag)
214
215
        if 'sections' in kws_all:
216
            return self._plt_gogrouped(goids, go2color, **kws_all)
217
        else:
218
            return self._plt_gosubdag(goids, go2color, **kws_all)
219
220
    # pylint: disable=unused-argument,no-self-use
221
    def _plt_gogrouped(self, goids, go2color_usr, **kws):
222
        """Plot grouped GO IDs."""
223
        print("Plotting with GOATOOLS grouping coming soon...")
224
   #      fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
225
   #      sections = read_sections(kws['sections'], exclude_ungrouped=True)
226
   #      # kws_plt = {k:v for k, v in kws.items if k in self.kws_plt}
227
   #      grprobj_cur = self._get_grprobj(goids, sections)
228
   #      # GO: purple=hdr-only, green=hdr&usr, yellow=usr-only
229
   #      # BORDER: Black=hdr Blu=hdr&usr
230
   #      grpcolor = GrouperColors(grprobj_cur)  # get_bordercolor get_go2color_users
231
   #      grp_go2color = grpcolor.get_go2color_users()
232
   #      grp_go2bordercolor = grpcolor.get_bordercolor()
233
   #      for goid, color in go2color_usr.items():
234
   #          grp_go2color[goid] = color
235
   #      objcolor = Go2Color(self.gosubdag, objgoea=None,
236
   #                          go2color=grp_go2color, go2bordercolor=grp_go2bordercolor)
237
   #      go2txt = GrouperPlot.get_go2txt(grprobj_cur, grp_go2color, grp_go2bordercolor)
238
   #      objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, go2txt=go2txt, **kws)
239
   #      objplt.prt_goids(sys.stdout)
240
   #      objplt.plt_dag(fout_img)
241
   #      sys.stdout.write("{N:>6} sections read\n".format(
242
   #          N="NO" if sections is None else len(sections)))
243
   #      return fout_img
244
245
   #  def _get_grprobj(self, goids, sections):
246
   #      """Get Grouper, given GO IDs and sections."""
247
   #      grprdflt = GrouperDflts(self.gosubdag, "goslim_generic.obo")
248
   #      hdrobj = HdrgosSections(self.gosubdag, grprdflt.hdrgos_dflt, sections)
249
   #      return Grouper("sections", goids, hdrobj, self.gosubdag)
250
251
    def _plt_gosubdag(self, goids, go2color, **kws):
252
        """Plot GO IDs."""
253
        print("PLOTTING KWS", kws)
254
        fout_img = self.get_outfile(kws['outfile'], goids, 'relationship' in kws)
255
        objcolor = Go2Color(self.gosubdag, objgoea=None, go2color=go2color)
256
        objplt = GoSubDagPlot(self.gosubdag, Go2Color=objcolor, **kws)
257
        objplt.prt_goids(sys.stdout)
258
        objplt.plt_dag(fout_img)
259
        return fout_img
260
261
    def _get_kwsdag(self, goids, go2obj, **kws_all):
262
        """Get keyword args for a GoSubDag."""
263
        kws_dag = {}
264
        # GO letters specified by the user
265
        if 'go_aliases' in kws_all:
266
            fin_go_aliases = kws_all['go_aliases']
267
            if os.path.exists(fin_go_aliases):
268
                go2letter = read_d1_letter(fin_go_aliases)
269
                if go2letter:
270
                    kws_dag['go2letter'] = go2letter
271
        return kws_dag
272
273
    @staticmethod
274
    def _get_tcntobj(goids, go2obj, **kws):
275
        """Get a TermCounts object if the user provides an annotation file, otherwise None."""
276
        # kws: gaf (gene2go taxid)
277
        if 'gaf' in kws or 'gene2go' in kws:
278
            return get_tcntobj(go2obj, **kws)  # TermCounts
279
280
    def get_docargs(self, args=None, prt=None):
281
        """Pare down docopt. Return a minimal dictionary and a set containing runtime arg values."""
282
        # docargs = self.objdoc.get_docargs(args, exp_letters=set(['o', 't', 'p', 'c']))
283
        docargs = self.objdoc.get_docargs(args, prt)
284
        self._chk_docopts(docargs)
285
        return docargs
286
287
    def _chk_docopts(self, kws):
288
        """Check for common user command-line errors."""
289
        # outfile should contain .png, .png, etc.
290
        outfile = kws['outfile']
291
        if len(kws) == 2 and os.path.basename(kws['obo']) == "go-basic.obo" and \
292
            kws['outfile'] == self.dflt_outfile:
293
            self._err("NO GO IDS SPECFIED", err=False)
294
        if 'obo' in outfile:
295
            self._err("BAD outfile({O})".format(O=outfile))
296
        if 'gaf' in kws and 'gene2go' in kws:
297
            self._err("SPECIFY ANNOTAIONS FROM ONE FILE")
298
        if 'gene2go' in kws:
299
            if 'taxid' not in kws:
300
                self._err("SPECIFIY taxid WHEN READ NCBI'S gene2go FILE")
301
302
    def _err(self, msg, err=True):
303
        """Print useage and error before exiting."""
304
        severity = "FATAL" if err else "NOTE"
305
        txt = "".join([self.objdoc.doc,
306
                       "User's command-line:\n\n",
307
                       "  % go_plot.py {ARGS}\n\n".format(ARGS=" ".join(sys.argv[1:])),
308
                       "**{SEV}: {MSG}\n".format(SEV=severity, MSG=msg)])
309
        if err:
310
            raise RuntimeError(txt)
311
        sys.stdout.write(txt)
312
        sys.exit(0)
313
314
    def get_outfile(self, outfile, goids=None, b_rel=False):
315
        """Return output file for GO Term plot."""
316
        # 1. Use the user-specfied output filename for the GO Term plot
317
        if outfile != self.dflt_outfile:
318
            return outfile
319
        rstr = "_r1" if b_rel else ""
320
        # 2. If only plotting 1 GO term, use GO is in plot name
321
        if goids is not None and len(goids) == 1:
322
            goid = next(iter(goids))
323
            goobj = self.gosubdag.go2obj[goid]
324
            fout = "GO_{NN}_{NM}".format(NN=goid.replace("GO:", ""), NM=goobj.name)
325
            return "".join([re.sub(r"[\s#'()+,-./:<=>\[\]_}]", '_', fout), rstr, '.png'])
326
        # 3. Return default name
327
        if not b_rel:
328
            return self.dflt_outfile
329
        else:
330
            return self.dflt_outfile.replace('.png', '_r1.png')
331
332
    @staticmethod
333
    def _get_optional_attrs(kws):
334
        """Given keyword args, return optional_attributes to be loaded into the GODag."""
335
        vals = OboOptionalAttrs.attributes.intersection(kws.keys())
336
        if 'sections' in kws:
337
            vals.add('relationship')
338
        if 'norel' in kws:
339
            vals.discard('relationship')
340
        return vals
341
342
343
# Copyright (C) 2016-2018, DV Klopfenstein, H Tang. All rights reserved.
344