x3DNA.get_torsions()   F
last analyzed

Complexity

Conditions 15

Size

Total Lines 62
Code Lines 45

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 15
eloc 45
nop 2
dl 0
loc 62
rs 2.9998
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like build.rna_tools.tools.rna_x3dna.rna_x3dna.x3DNA.get_torsions() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""Python parser to 3dna <http://x3dna.org/>.
4
5
Installation::
6
7
  # install the code from http://forum.x3dna.org/downloads/3dna-download/
8
  Create a copy of the rna_x3dna_config_local_sample.py (remove "_sample") present in rna-tools/rna_tools/tools/rna_x3dna folder.
9
  Edit this line :
10
  BINARY_PATH = <path to your x3dna-dssr file>
11
  matching the path with the path of your x3dna-dssr file.
12
  e.g. in my case: BINARY_PATH = ~/bin/x3dna-dssr.bin
13
14
For one structure you can run this script as::
15
16
    [mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/1xjr.pdb
17
    test_data/1xjr.pdb
18
    >1xjr nts=47 [1xjr] -- secondary structure derived by DSSR
19
    gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU
20
    ..(((((((...((((.((((.....))..))..))).).)))))))
21
22
For multiple structures in the folder, run the script like this::
23
24
    [mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/*
25
    test_data/1xjr.pdb
26
    >1xjr nts=47 [1xjr] -- secondary structure derived by DSSR
27
    gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU
28
    ..(((((((...((((.((((.....))..))..))).).)))))))
29
    test_data/6TNA.pdb
30
    >6TNA nts=76 [6TNA] -- secondary structure derived by DSSR
31
    GCGGAUUUAgCUCAGuuGGGAGAGCgCCAGAcUgAAgAPcUGGAGgUCcUGUGtPCGaUCCACAGAAUUCGCACCA
32
    (((((((..((((.....[..)))).((((.........)))).....(((((..]....))))))))))))....
33
    test_data/rp2_bujnicki_1_rpr.pdb
34
    >rp2_bujnicki_1_rpr nts=100 [rp2_bujnicki_1_rpr] -- secondary structure derived by DSSR
35
    CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU
36
    [[[[(((.....(((&{{{{))))))&(((((((.....(.(&]]]]).))))&[[[[[[......[[[&))))]]].]]&}}}}(((.....(((&]]]]))))))
37
38
.. warning:: This script should not be used in this given form with Parallel because it process output files from x3dna that are named always in the same way, e.g. dssr-torsions.txt. #TODO
39
40
"""
41
import re
42
import argparse
43
44
from subprocess import Popen, PIPE
45
import os
46
from rna_tools.rna_tools_config import X3DNA, X3DNA_FP
47
# x3dna-dssr-64bit
48
49
50
class x3DNAMissingFile(Exception):
51
    pass
52
53
54
def get_parser():
55
    parser = argparse.ArgumentParser(
56
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
57
    parser.add_argument('-c', '--compact',  action='store_true')
58
    parser.add_argument('-x', '--rerun',  action='store_true')
59
    parser.add_argument('-s', '--show',  action='store_true', help="show results")
60
    parser.add_argument('--pymol',  action='store_true', help='get resi to color code puckers in PyMOL')
61
    parser.add_argument('-l', '--show-log',  action='store_true', help="show full log")
62
    parser.add_argument('-v', '--verbose',  action='store_true', help="show full log")
63
    parser.add_argument('files', help='file', nargs='+')
64
    return parser
65
66
67
class x3DNA(object):
68
69
    """
70
    Atributes:
71
72
     **curr_fn**
73
     **report**
74
75
    """
76
77
    def __init__(self, pdbfn, show_log=False):
78
        """Set self.curr_fn based on pdbfn"""
79
        self.curr_fn = pdbfn
80
        self.run_x3dna(show_log)
81
        self.clean_up()
82
83
    def __get_report(self):
84
        """Off right now. Run find_pair
85
86
        ..warning:: To get modification use get_modifications()
87
        """
88
        cmd = X3DNA_FP + ' ' + self.curr_fn + ' stdout | analyze stdin'  # hack
89
        out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True)
90
91
        stdout = out.stdout.read()
92
        outerr = out.stderr.read()
93
94
        text = ''
95
        for l in outerr.split('\n'):
96
            if l.startswith('Time used') or l.startswith('..') or l.startswith('handling') or l.startswith('uncommon residue'):
97
                continue
98
            if l.strip():
99
                text += l + '\n'
100
        return text.strip()
101
102
    def get_modifications(self):
103
        """Run find_pair to find modifications.
104
        """
105
        cmd = X3DNA_FP + ' -p ' + self.curr_fn + ' /tmp/fpout'  # hack
106
        out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True)
107
108
        outerr = out.stderr.read()
109
110
        text = ''
111
        for l in outerr.split('\n'):
112
            if l.startswith('uncommon residue'):
113
                text += l.replace('uncommon residue ', '') + '\n'
114
        return text.strip()
115
116
    def run_x3dna(self, show_log=False, verbose=False):
117
        """
118
        """
119
        cmd = X3DNA + ' -i=' + self.curr_fn
120
        out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True)
121
122
        stdout = str(out.stdout.read().decode())
123
        outerr = str(out.stderr.read().decode())
124
125
        f = open('py3dna.log', 'w')
126
        if verbose: print(f'cmd: {cmd}')
127
        f.write(cmd + '\n' + stdout)
128
129
        if show_log:
130
            print(stdout)
131
        f.close()
132
133
        if outerr.find('does not exist!') > -1:  # not very pretty
134
            raise x3DNAMissingFile
135
        if outerr.find('not found') > -1:  # not very pretty
136
            raise Exception('x3dna not found!')
137
138
        rx = re.compile('no. of DNA/RNA chains:\s+(?P<no_DNARNAchains>\d+)\s').search(stdout)
139
        if rx:
140
            no_of_DNARNA_chains = int(rx.group('no_DNARNAchains'))
141
            msg = 'py3dna::no of DNARNA chains'
142
            self.report = msg + '\n' + msg + '\n'  # hack!
143
        else:
144
            raise Exception('no_of_DNARNA_chains not found')
145
146
        if no_of_DNARNA_chains:
147
            self.report = stdout.strip()
148
149
    def get_ion_water_report(self):
150
        """@todo
151
File name: /tmp/tmp0pdNHS
152
    no. of DNA/RNA chains: 0 []
153
    no. of nucleotides:    174
154
    no. of waters:         793
155
    no. of metals:         33 [Na=29, Mg=1, K=3]
156
        """
157
        pass
158
159
    def clean_up(self, verbose=False):
160
        files_to_remove = [
161
            'dssr-helices.pdb',
162
            'dssr-pairs.pdb',
163
            'dssr-torsions.dat',
164
            'dssr-hairpins.pdb',
165
            'dssr-multiplets.pdb',
166
            'dssr-stems.pdb',
167
            'dssr-Aminors.pdb',
168
            'hel_regions.pdb',
169
            'bp_order.dat',
170
            'bestpairs.pdb',
171
        ]
172
173
        for f in files_to_remove:
174
            try:
175
                os.remove(f)
176
                pass
177
            except OSError:
178
                if verbose:
179
                    print('error: can not remove %s' % f)
180
181
    def get_seq(self):
182
        """Get sequence.
183
184
        Somehow 1bzt_1 x3dna	UCAGACUUUUAAPCUGA, what is P?
185
        P -> u
186
        
187
        """
188
        return self.report.split('\n')[-2].replace('P', 'u').replace('I', 'a')
189
190
    def get_secstruc(self):
191
        """Get secondary structure.
192
        """
193
        hits = re.search("as a whole and per chain.*?\n(?P<ss>.+?)\n\*", self.report, re.DOTALL|re.MULTILINE)
194
        if hits:
195
             return hits.group('ss').strip()
196
        else:
197
            self.report.split('\n')[-1] # tofix
198
199
    def get_torsions(self, outfn) -> str:
200
        """Get torsion angles into 'torsion.csv' file::
201
202
            nt,id,res,alpha,beta,gamma,delta,epsilon,zeta,e-z,chi,phase-angle,sugar-type,ssZp,Dp,splay,bpseq
203
            1,g,A.GTP1,nan,nan,142.1,89.5,-131.0,-78.3,-53(BI),-178.2(anti),358.6(C2'-exo),~C3'-endo,4.68,4.68,29.98,0
204
            2,G,A.G2,-75.8,-167.0,57.2,79.5,-143.4,-69.7,-74(BI),-169.2(anti),5.8(C3'-endo),~C3'-endo,4.68,4.76,25.61,0
205
        
206
        """
207
        angles = ''
208
        save = False
209
210
        c2pendo = []
211
        c3pendo = []
212
        if not os.path.isfile('dssr-torsions.txt'):
213
            
214
            print(f'Problem to get torsion angles for {self.curr_fn}')
215
            return f'Problem to get torsion angles for {self.curr_fn}'
216
            
217
        for l in open('dssr-torsions.txt'):
218
            if 'nt               alpha    beta' in l:
219
                save = True
220
                l = l.replace('nt',  'nt id   res   ')
221
            if '***************' in l and save:
222
                save = False
223
            if save:
224
                if "~C2'-endo" in l:
225
                    c2pendo.append(l.split()[0])
226
                if "~C3'-endo" in l:
227
                    c3pendo.append(l.split()[0])
228
                angles += l
229
230
        c2 = 'color pink, resi ' + '+'.join(c2pendo)
231
        c3 = 'color blue, resi ' + '+'.join(c3pendo)
232
233
        if args.pymol:
0 ignored issues
show
introduced by
The variable args does not seem to be defined in case __name__ == '__main__' on line 263 is False. Are you sure this can never be the case?
Loading history...
234
            print(c2)
235
            print(c3)
236
            return
237
            
238
        import re
239
        nangles = ''
240
        #'9 C    41',
241
        #'10 C     0',
242
        bpseq = ['bpseq'] + [x.strip().split()[2] for x in open('dssr-2ndstrs.bpseq').readlines()]
243
        for i, l in enumerate(angles.split('\n')):
244
            if l.strip():
245
                l = re.sub(r'\s+', ',', l, 0, re.MULTILINE)
246
                if bpseq[i] == '0':
247
                    bpseq[i] = 'no paired'
248
                else:
249
                    bpseq[i] = 'paired'
250
                l = l[1:] + ',' + bpseq[i] + '\n'
251
                nangles += l
252
        nangles = re.sub(r'---', 'nan', nangles, 0, re.MULTILINE)
253
        with open(outfn, 'w') as f:
254
            f.write(nangles.strip())
255
256
        if args.show:
257
            import pandas as pd
258
            df = pd.read_csv(outfn)
259
            print(df)
260
        return nangles.strip()
261
    
262
# name
263
if __name__ == '__main__':
264
    if not X3DNA:
265
        raise Exception(
266
            'Set up BINARY_PATH in rna_x3dna_config_local.py, .e.g "/Users/magnus/work/opt/x3dna/x3dna-dssr"')
267
268
    # get parser and arguments
269
    parser = get_parser()
270
    args = parser.parse_args()
271
272
    # try:
273
    #    compact = sys.argv[2]
274
    #    if compact == '-c':
275
    #       compact = True
276
    #
277
    # except IndexError:
278
    #    compact = False
279
280
    for f in args.files:
281
        if args.compact:
282
            p = x3DNA(f)
283
            print((f, p.get_secstruc()))
284
        else:
285
            print(f'input: {f}')
286
            outfn = os.path.basename(f.replace('.pdb', '')) + '-torsion-paired.csv'
287
            if not args.rerun:
288
                if os.path.isfile(outfn):
289
                    print(f'skip: {f}, use --rerun to run analysis again')
290
                    continue
291
            p = x3DNA(f, args.show_log, args.verbose)
292
            s = p.get_seq()
293
            print(s)
294
            s = p.get_secstruc()
295
            print(s)
296
            s = p.get_torsions(outfn)
297
            if args.verbose: print(s)
298
            p.clean_up(args.verbose)
299