1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
"""Python parser to 3dna <http://x3dna.org/>. |
4
|
|
|
|
5
|
|
|
Installation:: |
6
|
|
|
|
7
|
|
|
# install the code from http://forum.x3dna.org/downloads/3dna-download/ |
8
|
|
|
Create a copy of the rna_x3dna_config_local_sample.py (remove "_sample") present in rna-tools/rna_tools/tools/rna_x3dna folder. |
9
|
|
|
Edit this line : |
10
|
|
|
BINARY_PATH = <path to your x3dna-dssr file> |
11
|
|
|
matching the path with the path of your x3dna-dssr file. |
12
|
|
|
e.g. in my case: BINARY_PATH = ~/bin/x3dna-dssr.bin |
13
|
|
|
|
14
|
|
|
For one structure you can run this script as:: |
15
|
|
|
|
16
|
|
|
[mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/1xjr.pdb |
17
|
|
|
test_data/1xjr.pdb |
18
|
|
|
>1xjr nts=47 [1xjr] -- secondary structure derived by DSSR |
19
|
|
|
gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
20
|
|
|
..(((((((...((((.((((.....))..))..))).).))))))) |
21
|
|
|
|
22
|
|
|
For multiple structures in the folder, run the script like this:: |
23
|
|
|
|
24
|
|
|
[mm] py3dna$ git:(master) ✗ ./rna_x3dna.py test_data/* |
25
|
|
|
test_data/1xjr.pdb |
26
|
|
|
>1xjr nts=47 [1xjr] -- secondary structure derived by DSSR |
27
|
|
|
gGAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
28
|
|
|
..(((((((...((((.((((.....))..))..))).).))))))) |
29
|
|
|
test_data/6TNA.pdb |
30
|
|
|
>6TNA nts=76 [6TNA] -- secondary structure derived by DSSR |
31
|
|
|
GCGGAUUUAgCUCAGuuGGGAGAGCgCCAGAcUgAAgAPcUGGAGgUCcUGUGtPCGaUCCACAGAAUUCGCACCA |
32
|
|
|
(((((((..((((.....[..)))).((((.........)))).....(((((..]....)))))))))))).... |
33
|
|
|
test_data/rp2_bujnicki_1_rpr.pdb |
34
|
|
|
>rp2_bujnicki_1_rpr nts=100 [rp2_bujnicki_1_rpr] -- secondary structure derived by DSSR |
35
|
|
|
CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU&CCGGAGGAACUACUG&CCGGCAGCCU |
36
|
|
|
[[[[(((.....(((&{{{{))))))&(((((((.....(.(&]]]]).))))&[[[[[[......[[[&))))]]].]]&}}}}(((.....(((&]]]])))))) |
37
|
|
|
|
38
|
|
|
.. warning:: This script should not be used in this given form with Parallel because it process output files from x3dna that are named always in the same way, e.g. dssr-torsions.txt. #TODO |
39
|
|
|
|
40
|
|
|
""" |
41
|
|
|
import re |
42
|
|
|
import argparse |
43
|
|
|
|
44
|
|
|
from subprocess import Popen, PIPE |
45
|
|
|
import os |
46
|
|
|
from rna_tools.rna_tools_config import X3DNA, X3DNA_FP |
47
|
|
|
# x3dna-dssr-64bit |
48
|
|
|
|
49
|
|
|
|
50
|
|
|
class x3DNAMissingFile(Exception): |
51
|
|
|
pass |
52
|
|
|
|
53
|
|
|
|
54
|
|
|
def get_parser(): |
55
|
|
|
parser = argparse.ArgumentParser( |
56
|
|
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
57
|
|
|
parser.add_argument('-c', '--compact', action='store_true') |
58
|
|
|
parser.add_argument('-x', '--rerun', action='store_true') |
59
|
|
|
parser.add_argument('-s', '--show', action='store_true', help="show results") |
60
|
|
|
parser.add_argument('--pymol', action='store_true', help='get resi to color code puckers in PyMOL') |
61
|
|
|
parser.add_argument('-l', '--show-log', action='store_true', help="show full log") |
62
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', help="show full log") |
63
|
|
|
parser.add_argument('files', help='file', nargs='+') |
64
|
|
|
return parser |
65
|
|
|
|
66
|
|
|
|
67
|
|
|
class x3DNA(object): |
68
|
|
|
|
69
|
|
|
""" |
70
|
|
|
Atributes: |
71
|
|
|
|
72
|
|
|
**curr_fn** |
73
|
|
|
**report** |
74
|
|
|
|
75
|
|
|
""" |
76
|
|
|
|
77
|
|
|
def __init__(self, pdbfn, show_log=False): |
78
|
|
|
"""Set self.curr_fn based on pdbfn""" |
79
|
|
|
self.curr_fn = pdbfn |
80
|
|
|
self.run_x3dna(show_log) |
81
|
|
|
self.clean_up() |
82
|
|
|
|
83
|
|
|
def __get_report(self): |
84
|
|
|
"""Off right now. Run find_pair |
85
|
|
|
|
86
|
|
|
..warning:: To get modification use get_modifications() |
87
|
|
|
""" |
88
|
|
|
cmd = X3DNA_FP + ' ' + self.curr_fn + ' stdout | analyze stdin' # hack |
89
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
90
|
|
|
|
91
|
|
|
stdout = out.stdout.read() |
92
|
|
|
outerr = out.stderr.read() |
93
|
|
|
|
94
|
|
|
text = '' |
95
|
|
|
for l in outerr.split('\n'): |
96
|
|
|
if l.startswith('Time used') or l.startswith('..') or l.startswith('handling') or l.startswith('uncommon residue'): |
97
|
|
|
continue |
98
|
|
|
if l.strip(): |
99
|
|
|
text += l + '\n' |
100
|
|
|
return text.strip() |
101
|
|
|
|
102
|
|
|
def get_modifications(self): |
103
|
|
|
"""Run find_pair to find modifications. |
104
|
|
|
""" |
105
|
|
|
cmd = X3DNA_FP + ' -p ' + self.curr_fn + ' /tmp/fpout' # hack |
106
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
107
|
|
|
|
108
|
|
|
outerr = out.stderr.read() |
109
|
|
|
|
110
|
|
|
text = '' |
111
|
|
|
for l in outerr.split('\n'): |
112
|
|
|
if l.startswith('uncommon residue'): |
113
|
|
|
text += l.replace('uncommon residue ', '') + '\n' |
114
|
|
|
return text.strip() |
115
|
|
|
|
116
|
|
|
def run_x3dna(self, show_log=False, verbose=False): |
117
|
|
|
""" |
118
|
|
|
""" |
119
|
|
|
cmd = X3DNA + ' -i=' + self.curr_fn |
120
|
|
|
out = Popen([cmd], stderr=PIPE, stdout=PIPE, shell=True) |
121
|
|
|
|
122
|
|
|
stdout = str(out.stdout.read().decode()) |
123
|
|
|
outerr = str(out.stderr.read().decode()) |
124
|
|
|
|
125
|
|
|
f = open('py3dna.log', 'w') |
126
|
|
|
if verbose: print(f'cmd: {cmd}') |
127
|
|
|
f.write(cmd + '\n' + stdout) |
128
|
|
|
|
129
|
|
|
if show_log: |
130
|
|
|
print(stdout) |
131
|
|
|
f.close() |
132
|
|
|
|
133
|
|
|
if outerr.find('does not exist!') > -1: # not very pretty |
134
|
|
|
raise x3DNAMissingFile |
135
|
|
|
if outerr.find('not found') > -1: # not very pretty |
136
|
|
|
raise Exception('x3dna not found!') |
137
|
|
|
|
138
|
|
|
rx = re.compile('no. of DNA/RNA chains:\s+(?P<no_DNARNAchains>\d+)\s').search(stdout) |
139
|
|
|
if rx: |
140
|
|
|
no_of_DNARNA_chains = int(rx.group('no_DNARNAchains')) |
141
|
|
|
msg = 'py3dna::no of DNARNA chains' |
142
|
|
|
self.report = msg + '\n' + msg + '\n' # hack! |
143
|
|
|
else: |
144
|
|
|
raise Exception('no_of_DNARNA_chains not found') |
145
|
|
|
|
146
|
|
|
if no_of_DNARNA_chains: |
147
|
|
|
self.report = stdout.strip() |
148
|
|
|
|
149
|
|
|
def get_ion_water_report(self): |
150
|
|
|
"""@todo |
151
|
|
|
File name: /tmp/tmp0pdNHS |
152
|
|
|
no. of DNA/RNA chains: 0 [] |
153
|
|
|
no. of nucleotides: 174 |
154
|
|
|
no. of waters: 793 |
155
|
|
|
no. of metals: 33 [Na=29, Mg=1, K=3] |
156
|
|
|
""" |
157
|
|
|
pass |
158
|
|
|
|
159
|
|
|
def clean_up(self, verbose=False): |
160
|
|
|
files_to_remove = [ |
161
|
|
|
'dssr-helices.pdb', |
162
|
|
|
'dssr-pairs.pdb', |
163
|
|
|
'dssr-torsions.dat', |
164
|
|
|
'dssr-hairpins.pdb', |
165
|
|
|
'dssr-multiplets.pdb', |
166
|
|
|
'dssr-stems.pdb', |
167
|
|
|
'dssr-Aminors.pdb', |
168
|
|
|
'hel_regions.pdb', |
169
|
|
|
'bp_order.dat', |
170
|
|
|
'bestpairs.pdb', |
171
|
|
|
] |
172
|
|
|
|
173
|
|
|
for f in files_to_remove: |
174
|
|
|
try: |
175
|
|
|
os.remove(f) |
176
|
|
|
pass |
177
|
|
|
except OSError: |
178
|
|
|
if verbose: |
179
|
|
|
print('error: can not remove %s' % f) |
180
|
|
|
|
181
|
|
|
def get_seq(self): |
182
|
|
|
"""Get sequence. |
183
|
|
|
|
184
|
|
|
Somehow 1bzt_1 x3dna UCAGACUUUUAAPCUGA, what is P? |
185
|
|
|
P -> u |
186
|
|
|
|
187
|
|
|
""" |
188
|
|
|
return self.report.split('\n')[-2].replace('P', 'u').replace('I', 'a') |
189
|
|
|
|
190
|
|
|
def get_secstruc(self): |
191
|
|
|
"""Get secondary structure. |
192
|
|
|
""" |
193
|
|
|
hits = re.search("as a whole and per chain.*?\n(?P<ss>.+?)\n\*", self.report, re.DOTALL|re.MULTILINE) |
194
|
|
|
if hits: |
195
|
|
|
return hits.group('ss').strip() |
196
|
|
|
else: |
197
|
|
|
self.report.split('\n')[-1] # tofix |
198
|
|
|
|
199
|
|
|
def get_torsions(self, outfn) -> str: |
200
|
|
|
"""Get torsion angles into 'torsion.csv' file:: |
201
|
|
|
|
202
|
|
|
nt,id,res,alpha,beta,gamma,delta,epsilon,zeta,e-z,chi,phase-angle,sugar-type,ssZp,Dp,splay,bpseq |
203
|
|
|
1,g,A.GTP1,nan,nan,142.1,89.5,-131.0,-78.3,-53(BI),-178.2(anti),358.6(C2'-exo),~C3'-endo,4.68,4.68,29.98,0 |
204
|
|
|
2,G,A.G2,-75.8,-167.0,57.2,79.5,-143.4,-69.7,-74(BI),-169.2(anti),5.8(C3'-endo),~C3'-endo,4.68,4.76,25.61,0 |
205
|
|
|
|
206
|
|
|
""" |
207
|
|
|
angles = '' |
208
|
|
|
save = False |
209
|
|
|
|
210
|
|
|
c2pendo = [] |
211
|
|
|
c3pendo = [] |
212
|
|
|
if not os.path.isfile('dssr-torsions.txt'): |
213
|
|
|
|
214
|
|
|
print(f'Problem to get torsion angles for {self.curr_fn}') |
215
|
|
|
return f'Problem to get torsion angles for {self.curr_fn}' |
216
|
|
|
|
217
|
|
|
for l in open('dssr-torsions.txt'): |
218
|
|
|
if 'nt alpha beta' in l: |
219
|
|
|
save = True |
220
|
|
|
l = l.replace('nt', 'nt id res ') |
221
|
|
|
if '***************' in l and save: |
222
|
|
|
save = False |
223
|
|
|
if save: |
224
|
|
|
if "~C2'-endo" in l: |
225
|
|
|
c2pendo.append(l.split()[0]) |
226
|
|
|
if "~C3'-endo" in l: |
227
|
|
|
c3pendo.append(l.split()[0]) |
228
|
|
|
angles += l |
229
|
|
|
|
230
|
|
|
c2 = 'color pink, resi ' + '+'.join(c2pendo) |
231
|
|
|
c3 = 'color blue, resi ' + '+'.join(c3pendo) |
232
|
|
|
|
233
|
|
|
if args.pymol: |
|
|
|
|
234
|
|
|
print(c2) |
235
|
|
|
print(c3) |
236
|
|
|
return |
237
|
|
|
|
238
|
|
|
import re |
239
|
|
|
nangles = '' |
240
|
|
|
#'9 C 41', |
241
|
|
|
#'10 C 0', |
242
|
|
|
bpseq = ['bpseq'] + [x.strip().split()[2] for x in open('dssr-2ndstrs.bpseq').readlines()] |
243
|
|
|
for i, l in enumerate(angles.split('\n')): |
244
|
|
|
if l.strip(): |
245
|
|
|
l = re.sub(r'\s+', ',', l, 0, re.MULTILINE) |
246
|
|
|
if bpseq[i] == '0': |
247
|
|
|
bpseq[i] = 'no paired' |
248
|
|
|
else: |
249
|
|
|
bpseq[i] = 'paired' |
250
|
|
|
l = l[1:] + ',' + bpseq[i] + '\n' |
251
|
|
|
nangles += l |
252
|
|
|
nangles = re.sub(r'---', 'nan', nangles, 0, re.MULTILINE) |
253
|
|
|
with open(outfn, 'w') as f: |
254
|
|
|
f.write(nangles.strip()) |
255
|
|
|
|
256
|
|
|
if args.show: |
257
|
|
|
import pandas as pd |
258
|
|
|
df = pd.read_csv(outfn) |
259
|
|
|
print(df) |
260
|
|
|
return nangles.strip() |
261
|
|
|
|
262
|
|
|
# name |
263
|
|
|
if __name__ == '__main__': |
264
|
|
|
if not X3DNA: |
265
|
|
|
raise Exception( |
266
|
|
|
'Set up BINARY_PATH in rna_x3dna_config_local.py, .e.g "/Users/magnus/work/opt/x3dna/x3dna-dssr"') |
267
|
|
|
|
268
|
|
|
# get parser and arguments |
269
|
|
|
parser = get_parser() |
270
|
|
|
args = parser.parse_args() |
271
|
|
|
|
272
|
|
|
# try: |
273
|
|
|
# compact = sys.argv[2] |
274
|
|
|
# if compact == '-c': |
275
|
|
|
# compact = True |
276
|
|
|
# |
277
|
|
|
# except IndexError: |
278
|
|
|
# compact = False |
279
|
|
|
|
280
|
|
|
for f in args.files: |
281
|
|
|
if args.compact: |
282
|
|
|
p = x3DNA(f) |
283
|
|
|
print((f, p.get_secstruc())) |
284
|
|
|
else: |
285
|
|
|
print(f'input: {f}') |
286
|
|
|
outfn = os.path.basename(f.replace('.pdb', '')) + '-torsion-paired.csv' |
287
|
|
|
if not args.rerun: |
288
|
|
|
if os.path.isfile(outfn): |
289
|
|
|
print(f'skip: {f}, use --rerun to run analysis again') |
290
|
|
|
continue |
291
|
|
|
p = x3DNA(f, args.show_log, args.verbose) |
292
|
|
|
s = p.get_seq() |
293
|
|
|
print(s) |
294
|
|
|
s = p.get_secstruc() |
295
|
|
|
print(s) |
296
|
|
|
s = p.get_torsions(outfn) |
297
|
|
|
if args.verbose: print(s) |
298
|
|
|
p.clean_up(args.verbose) |
299
|
|
|
|