|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
"""Python parser to 3dna <http://x3dna.org/>. |
|
4
|
|
|
|
|
5
|
|
|
Installation:: |
|
6
|
|
|
|
|
7
|
|
|
# install the code from http://forum.x3dna.org/downloads/3dna-download/ |
|
8
|
|
|
Create a copy of the rna_x3dna_config_local_sample.py (remove "_sample") present in rna-tools/rna_tools/tools/rna_x3dna folder. |
|
9
|
|
|
Edit this line : |
|
10
|
|
|
BINARY_PATH = <path to your x3dna-dssr file> |
|
11
|
|
|
matching the path with the path of your x3dna-dssr file. |
|
12
|
|
|
e.g. in my case: BINARY_PATH = ~/bin/x3dna-dssr.bin |
|
13
|
|
|
|
|
14
|
|
|
For one structure you can run this script as:: |
|
15
|
|
|
|
|
16
|
|
|
[mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/1xjr.pdb |
|
17
|
|
|
test_data/1xjr.pdb |
|
18
|
|
|
>1xjr nts=47 [1xjr] -- secondary structure derived by DSSR |
|
19
|
|
|
gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
|
20
|
|
|
..(((((((...((((.((((.....))..))..))).).))))))) |
|
21
|
|
|
|
|
22
|
|
|
For multiple structures in the folder, run the script like this:: |
|
23
|
|
|
|
|
24
|
|
|
[mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/* |
|
25
|
|
|
test_data/1xjr.pdb |
|
26
|
|
|
>1xjr nts=47 [1xjr] -- secondary structure derived by DSSR |
|
27
|
|
|
gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
|
28
|
|
|
..(((((((...((((.((((.....))..))..))).).))))))) |
|
29
|
|
|
test_data/6TNA.pdb |
|
30
|
|
|
>6TNA nts=76 [6TNA] -- secondary structure derived by DSSR |
|
31
|
|
|
GCGGAUUUAgCUCAGuuGGGAGAGCgCCAGAcUgAAgAPcUGGAGgUCcUGUGtPCGaUCCACAGAAUUCGCACCA |
|
32
|
|
|
(((((((..((((.....[..)))).((((.........)))).....(((((..]....)))))))))))).... |
|
33
|
|
|
test_data/rp2_bujnicki_1_rpr.pdb |
|
34
|
|
|
>rp2_bujnicki_1_rpr nts=100 [rp2_bujnicki_1_rpr] -- secondary structure derived by DSSR |
|
35
|
|
|
CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU |
|
36
|
|
|
[[[[(((.....(((&{{{{))))))&(((((((.....(.(&]]]]).))))&[[[[[[......[[[&))))]]].]]&}}}}(((.....(((&]]]])))))) |
|
37
|
|
|
|
|
38
|
|
|
.. warning:: This script should not be used in this given form with Parallel because it process output files from x3dna that are named always in the same way, e.g. dssr-torsions.txt. #TODO |
|
39
|
|
|
|
|
40
|
|
|
""" |
|
41
|
|
|
import re |
|
42
|
|
|
import argparse |
|
43
|
|
|
|
|
44
|
|
|
from subprocess import Popen, PIPE |
|
45
|
|
|
import os |
|
46
|
|
|
from rna_tools.rna_tools_config import X3DNA, X3DNA_FP |
|
47
|
|
|
# x3dna-dssr-64bit |
|
48
|
|
|
|
|
49
|
|
|
|
|
50
|
|
|
class x3DNAMissingFile(Exception): |
|
51
|
|
|
pass |
|
52
|
|
|
|
|
53
|
|
|
|
|
54
|
|
|
def get_parser(): |
|
55
|
|
|
parser = argparse.ArgumentParser( |
|
56
|
|
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
|
57
|
|
|
parser.add_argument('-c', '--compact', action='store_true') |
|
58
|
|
|
parser.add_argument('-x', '--rerun', action='store_true') |
|
59
|
|
|
parser.add_argument('-s', '--show', action='store_true', help="show results") |
|
60
|
|
|
parser.add_argument('--pymol', action='store_true', help='get resi to color code puckers in PyMOL') |
|
61
|
|
|
parser.add_argument('-l', '--show-log', action='store_true', help="show full log") |
|
62
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', help="show full log") |
|
63
|
|
|
parser.add_argument('files', help='file', nargs='+') |
|
64
|
|
|
return parser |
|
65
|
|
|
|
|
66
|
|
|
|
|
67
|
|
|
class x3DNA(object): |
|
68
|
|
|
|
|
69
|
|
|
""" |
|
70
|
|
|
Atributes: |
|
71
|
|
|
|
|
72
|
|
|
**curr_fn** |
|
73
|
|
|
**report** |
|
74
|
|
|
|
|
75
|
|
|
""" |
|
76
|
|
|
|
|
77
|
|
|
def __init__(self, pdbfn, show_log=False): |
|
78
|
|
|
"""Set self.curr_fn based on pdbfn""" |
|
79
|
|
|
self.curr_fn = pdbfn |
|
80
|
|
|
self.run_x3dna(show_log) |
|
81
|
|
|
self.clean_up() |
|
82
|
|
|
|
|
83
|
|
|
def __get_report(self): |
|
84
|
|
|
"""Off right now. Run find_pair |
|
85
|
|
|
|
|
86
|
|
|
..warning:: To get modification use get_modifications() |
|
87
|
|
|
""" |
|
88
|
|
|
cmd = X3DNA_FP + ' ' + self.curr_fn + ' stdout | analyze stdin' # hack |
|
89
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
|
90
|
|
|
|
|
91
|
|
|
stdout = out.stdout.read() |
|
92
|
|
|
outerr = out.stderr.read() |
|
93
|
|
|
|
|
94
|
|
|
text = '' |
|
95
|
|
|
for l in outerr.split('\n'): |
|
96
|
|
|
if l.startswith('Time used') or l.startswith('..') or l.startswith('handling') or l.startswith('uncommon residue'): |
|
97
|
|
|
continue |
|
98
|
|
|
if l.strip(): |
|
99
|
|
|
text += l + '\n' |
|
100
|
|
|
return text.strip() |
|
101
|
|
|
|
|
102
|
|
|
def get_modifications(self): |
|
103
|
|
|
"""Run find_pair to find modifications. |
|
104
|
|
|
""" |
|
105
|
|
|
cmd = X3DNA_FP + ' -p ' + self.curr_fn + ' /tmp/fpout' # hack |
|
106
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
|
107
|
|
|
|
|
108
|
|
|
outerr = out.stderr.read() |
|
109
|
|
|
|
|
110
|
|
|
text = '' |
|
111
|
|
|
for l in outerr.split('\n'): |
|
112
|
|
|
if l.startswith('uncommon residue'): |
|
113
|
|
|
text += l.replace('uncommon residue ', '') + '\n' |
|
114
|
|
|
return text.strip() |
|
115
|
|
|
|
|
116
|
|
|
def run_x3dna(self, show_log=False, verbose=False): |
|
117
|
|
|
""" |
|
118
|
|
|
""" |
|
119
|
|
|
cmd = X3DNA + ' -i=' + self.curr_fn |
|
120
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
|
121
|
|
|
|
|
122
|
|
|
stdout = str(out.stdout.read().decode()) |
|
123
|
|
|
outerr = str(out.stderr.read().decode()) |
|
124
|
|
|
|
|
125
|
|
|
f = open('py3dna.log', 'w') |
|
126
|
|
|
if verbose: print(f'cmd: {cmd}') |
|
127
|
|
|
f.write(cmd + '\n' + stdout) |
|
128
|
|
|
|
|
129
|
|
|
if show_log: |
|
130
|
|
|
print(stdout) |
|
131
|
|
|
f.close() |
|
132
|
|
|
|
|
133
|
|
|
if outerr.find('does not exist!') > -1: # not very pretty |
|
134
|
|
|
raise x3DNAMissingFile |
|
135
|
|
|
if outerr.find('not found') > -1: # not very pretty |
|
136
|
|
|
raise Exception('x3dna not found!') |
|
137
|
|
|
|
|
138
|
|
|
rx = re.compile('no. of DNA/RNA chains:\s+(?P<no_DNARNAchains>\d+)\s').search(stdout) |
|
139
|
|
|
if rx: |
|
140
|
|
|
no_of_DNARNA_chains = int(rx.group('no_DNARNAchains')) |
|
141
|
|
|
msg = 'py3dna::no of DNARNA chains' |
|
142
|
|
|
self.report = msg + '\n' + msg + '\n' # hack! |
|
143
|
|
|
else: |
|
144
|
|
|
raise Exception('no_of_DNARNA_chains not found') |
|
145
|
|
|
|
|
146
|
|
|
if no_of_DNARNA_chains: |
|
147
|
|
|
self.report = stdout.strip() |
|
148
|
|
|
|
|
149
|
|
|
def get_ion_water_report(self): |
|
150
|
|
|
"""@todo |
|
151
|
|
|
File name: /tmp/tmp0pdNHS |
|
152
|
|
|
no. of DNA/RNA chains: 0 [] |
|
153
|
|
|
no. of nucleotides: 174 |
|
154
|
|
|
no. of waters: 793 |
|
155
|
|
|
no. of metals: 33 [Na=29, Mg=1, K=3] |
|
156
|
|
|
""" |
|
157
|
|
|
pass |
|
158
|
|
|
|
|
159
|
|
|
def clean_up(self, verbose=False): |
|
160
|
|
|
files_to_remove = [ |
|
161
|
|
|
'dssr-helices.pdb', |
|
162
|
|
|
'dssr-pairs.pdb', |
|
163
|
|
|
'dssr-torsions.dat', |
|
164
|
|
|
'dssr-hairpins.pdb', |
|
165
|
|
|
'dssr-multiplets.pdb', |
|
166
|
|
|
'dssr-stems.pdb', |
|
167
|
|
|
'dssr-Aminors.pdb', |
|
168
|
|
|
'hel_regions.pdb', |
|
169
|
|
|
'bp_order.dat', |
|
170
|
|
|
'bestpairs.pdb', |
|
171
|
|
|
] |
|
172
|
|
|
|
|
173
|
|
|
for f in files_to_remove: |
|
174
|
|
|
try: |
|
175
|
|
|
os.remove(f) |
|
176
|
|
|
pass |
|
177
|
|
|
except OSError: |
|
178
|
|
|
if verbose: |
|
179
|
|
|
print('error: can not remove %s' % f) |
|
180
|
|
|
|
|
181
|
|
|
def get_seq(self): |
|
182
|
|
|
"""Get sequence. |
|
183
|
|
|
|
|
184
|
|
|
Somehow 1bzt_1 x3dna UCAGACUUUUAAPCUGA, what is P? |
|
185
|
|
|
P -> u |
|
186
|
|
|
|
|
187
|
|
|
""" |
|
188
|
|
|
return self.report.split('\n')[-2].replace('P', 'u').replace('I', 'a') |
|
189
|
|
|
|
|
190
|
|
|
def get_secstruc(self): |
|
191
|
|
|
"""Get secondary structure. |
|
192
|
|
|
""" |
|
193
|
|
|
hits = re.search("as a whole and per chain.*?\n(?P<ss>.+?)\n\*", self.report, re.DOTALL|re.MULTILINE) |
|
194
|
|
|
if hits: |
|
195
|
|
|
return hits.group('ss').strip() |
|
196
|
|
|
else: |
|
197
|
|
|
self.report.split('\n')[-1] # tofix |
|
198
|
|
|
|
|
199
|
|
|
def get_torsions(self, outfn) -> str: |
|
200
|
|
|
"""Get torsion angles into 'torsion.csv' file:: |
|
201
|
|
|
|
|
202
|
|
|
nt,id,res,alpha,beta,gamma,delta,epsilon,zeta,e-z,chi,phase-angle,sugar-type,ssZp,Dp,splay,bpseq |
|
203
|
|
|
1,g,A.GTP1,nan,nan,142.1,89.5,-131.0,-78.3,-53(BI),-178.2(anti),358.6(C2'-exo),~C3'-endo,4.68,4.68,29.98,0 |
|
204
|
|
|
2,G,A.G2,-75.8,-167.0,57.2,79.5,-143.4,-69.7,-74(BI),-169.2(anti),5.8(C3'-endo),~C3'-endo,4.68,4.76,25.61,0 |
|
205
|
|
|
|
|
206
|
|
|
""" |
|
207
|
|
|
angles = '' |
|
208
|
|
|
save = False |
|
209
|
|
|
|
|
210
|
|
|
c2pendo = [] |
|
211
|
|
|
c3pendo = [] |
|
212
|
|
|
if not os.path.isfile('dssr-torsions.txt'): |
|
213
|
|
|
|
|
214
|
|
|
print(f'Problem to get torsion angles for {self.curr_fn}') |
|
215
|
|
|
return f'Problem to get torsion angles for {self.curr_fn}' |
|
216
|
|
|
|
|
217
|
|
|
for l in open('dssr-torsions.txt'): |
|
218
|
|
|
if 'nt alpha beta' in l: |
|
219
|
|
|
save = True |
|
220
|
|
|
l = l.replace('nt', 'nt id res ') |
|
221
|
|
|
if '***************' in l and save: |
|
222
|
|
|
save = False |
|
223
|
|
|
if save: |
|
224
|
|
|
if "~C2'-endo" in l: |
|
225
|
|
|
c2pendo.append(l.split()[0]) |
|
226
|
|
|
if "~C3'-endo" in l: |
|
227
|
|
|
c3pendo.append(l.split()[0]) |
|
228
|
|
|
angles += l |
|
229
|
|
|
|
|
230
|
|
|
c2 = 'color pink, resi ' + '+'.join(c2pendo) |
|
231
|
|
|
c3 = 'color blue, resi ' + '+'.join(c3pendo) |
|
232
|
|
|
|
|
233
|
|
|
if args.pymol: |
|
|
|
|
|
|
234
|
|
|
print(c2) |
|
235
|
|
|
print(c3) |
|
236
|
|
|
return |
|
237
|
|
|
|
|
238
|
|
|
import re |
|
239
|
|
|
nangles = '' |
|
240
|
|
|
#'9 C 41', |
|
241
|
|
|
#'10 C 0', |
|
242
|
|
|
bpseq = ['bpseq'] + [x.strip().split()[2] for x in open('dssr-2ndstrs.bpseq').readlines()] |
|
243
|
|
|
for i, l in enumerate(angles.split('\n')): |
|
244
|
|
|
if l.strip(): |
|
245
|
|
|
l = re.sub(r'\s+', ',', l, 0, re.MULTILINE) |
|
246
|
|
|
if bpseq[i] == '0': |
|
247
|
|
|
bpseq[i] = 'no paired' |
|
248
|
|
|
else: |
|
249
|
|
|
bpseq[i] = 'paired' |
|
250
|
|
|
l = l[1:] + ',' + bpseq[i] + '\n' |
|
251
|
|
|
nangles += l |
|
252
|
|
|
nangles = re.sub(r'---', 'nan', nangles, 0, re.MULTILINE) |
|
253
|
|
|
with open(outfn, 'w') as f: |
|
254
|
|
|
f.write(nangles.strip()) |
|
255
|
|
|
|
|
256
|
|
|
if args.show: |
|
257
|
|
|
import pandas as pd |
|
258
|
|
|
df = pd.read_csv(outfn) |
|
259
|
|
|
print(df) |
|
260
|
|
|
return nangles.strip() |
|
261
|
|
|
|
|
262
|
|
|
# name |
|
263
|
|
|
if __name__ == '__main__': |
|
264
|
|
|
if not X3DNA: |
|
265
|
|
|
raise Exception( |
|
266
|
|
|
'Set up BINARY_PATH in rna_x3dna_config_local.py, .e.g "/Users/magnus/work/opt/x3dna/x3dna-dssr"') |
|
267
|
|
|
|
|
268
|
|
|
# get parser and arguments |
|
269
|
|
|
parser = get_parser() |
|
270
|
|
|
args = parser.parse_args() |
|
271
|
|
|
|
|
272
|
|
|
# try: |
|
273
|
|
|
# compact = sys.argv[2] |
|
274
|
|
|
# if compact == '-c': |
|
275
|
|
|
# compact = True |
|
276
|
|
|
# |
|
277
|
|
|
# except IndexError: |
|
278
|
|
|
# compact = False |
|
279
|
|
|
|
|
280
|
|
|
for f in args.files: |
|
281
|
|
|
if args.compact: |
|
282
|
|
|
p = x3DNA(f) |
|
283
|
|
|
print((f, p.get_secstruc())) |
|
284
|
|
|
else: |
|
285
|
|
|
print(f'input: {f}') |
|
286
|
|
|
outfn = os.path.basename(f.replace('.pdb', '')) + '-torsion-paired.csv' |
|
287
|
|
|
if not args.rerun: |
|
288
|
|
|
if os.path.isfile(outfn): |
|
289
|
|
|
print(f'skip: {f}, use --rerun to run analysis again') |
|
290
|
|
|
continue |
|
291
|
|
|
p = x3DNA(f, args.show_log, args.verbose) |
|
292
|
|
|
s = p.get_seq() |
|
293
|
|
|
print(s) |
|
294
|
|
|
s = p.get_secstruc() |
|
295
|
|
|
print(s) |
|
296
|
|
|
s = p.get_torsions(outfn) |
|
297
|
|
|
if args.verbose: print(s) |
|
298
|
|
|
p.clean_up(args.verbose) |
|
299
|
|
|
|