1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
"""This module contains functions for computing RNAkb potential |
4
|
|
|
|
5
|
|
|
It seems that this is impossible to run RNAkb in full atom mode. So this works only in 5 pt (5 points/atom per residue) mode. |
6
|
|
|
|
7
|
|
|
https://gromacs.bioexcel.eu/t/fatal-error-an-input-file-contains-a-line-longer-than-4095-characters/1397/2 |
8
|
|
|
""" |
9
|
|
|
import os |
10
|
|
|
import sys |
11
|
|
|
import re |
12
|
|
|
|
13
|
|
|
from math import isnan |
14
|
|
|
from shutil import copyfile |
15
|
|
|
|
16
|
|
|
from rna_tools.tools.mq.lib.wrappers.SubprocessUtils import run_command |
17
|
|
|
from rna_tools.tools.mq.lib.wrappers.base_wrappers import ProgramWrapper |
18
|
|
|
from rna_tools.tools.md.rnakb_utils import make_rna_rnakb_ready, fix_gromacs_gro, prepare_groups, format_score_mdp |
19
|
|
|
from rna_tools.rna_tools_config import GRMLIB_PATH, DB_AA_PATH, DB_5PT_PATH, WRAPPERS_PATH, GROMACS_LD_PATH, GROMACS_PATH_BIN, TMP_PATH |
20
|
|
|
|
21
|
|
|
import tempfile |
22
|
|
|
|
23
|
|
|
class RNAkb(ProgramWrapper): |
24
|
|
|
"""Wrapper class for running RNAkb automatically. |
25
|
|
|
""" |
26
|
|
|
executable = ['/usr/local/gromacs/bin/' + exe for exe in ['pdb2gmx', 'make_ndx', 'editconf', 'grompp', 'mdrun']] |
27
|
|
|
|
28
|
|
|
def __init__(self, job_id='', sandbox=False): |
29
|
|
|
SANDBOX_PATH = tempfile.TemporaryDirectory().name # '.' or './sandbox' for sandbox here |
30
|
|
|
super(RNAkb, self).__init__('', '', job_id=job_id) |
31
|
|
|
|
32
|
|
|
# sandbox q-&-d |
33
|
|
|
if sandbox: |
34
|
|
|
try: |
35
|
|
|
os.mkdir(SANDBOX_PATH) |
36
|
|
|
except: |
37
|
|
|
pass |
38
|
|
|
# os.system('rm ' + SANDBOX_PATH + '/*') |
39
|
|
|
self.sandbox_dir= SANDBOX_PATH |
40
|
|
|
|
41
|
|
|
def log_stdout_stderr(self): |
42
|
|
|
stdout_file = open(self.sandbox_dir + os.sep + 'out.txt') |
43
|
|
|
stderr_file = open(self.sandbox_dir + os.sep + 'err.txt') |
44
|
|
|
stdout = stdout_file.read() |
45
|
|
|
stderr = stderr_file.read() |
46
|
|
|
self.log('STDOUT:\n' + stdout, 'debug') |
47
|
|
|
self.log('STDERR:\n' + stderr, 'debug') |
48
|
|
|
|
49
|
|
|
def _get_result(self, md_file): |
50
|
|
|
"""Format results as a dictionary.""" |
51
|
|
|
log_file = open(md_file) |
52
|
|
|
lines = log_file.readlines() |
53
|
|
|
log_file.close() |
54
|
|
|
res = [] |
55
|
|
|
append = False |
56
|
|
|
headline = None |
57
|
|
|
for l in lines: |
58
|
|
|
if append: |
59
|
|
|
res.append(l) |
60
|
|
|
if 'LJ-SR' in l: |
61
|
|
|
headline = l |
62
|
|
|
append = True |
63
|
|
|
if 'other-other' in l: |
64
|
|
|
break |
65
|
|
|
res_index = headline.split().index('LJ-SR') - 1 |
66
|
|
|
res_dict = {} |
67
|
|
|
for l in res: |
68
|
|
|
res_dict[l.split()[0]] = l.split()[res_index] |
69
|
|
|
return res_dict |
70
|
|
|
|
71
|
|
|
|
72
|
|
|
def run(self, name, potential_type, verbose=False): |
73
|
|
|
"""Compute RNAkb potential for a single file |
74
|
|
|
|
75
|
|
|
Args: |
76
|
|
|
|
77
|
|
|
path (str): name of a PDB file |
78
|
|
|
potential_type (str): '5pt' for 5 point or 'aa' for all atom aa is off |
79
|
|
|
|
80
|
|
|
Returns: |
81
|
|
|
|
82
|
|
|
a list of energies (strings) ['2.57116e+05', '1.62131e+03', '7.82459e+02', '3.00789e-01', '0.00000e+00', '0.00000e+00', '-2.51238e-03', '0.00000e+00', '2.59520e+05', '2.54174e-09', '2.59520e+05'] |
83
|
|
|
|
84
|
|
|
.. warning:: 'aa' does not work because of "a line longer than 4095 characters" |
85
|
|
|
|
86
|
|
|
The function parses:: |
87
|
|
|
|
88
|
|
|
Statistics over 1 steps using 1 frames |
89
|
|
|
Energies (kJ/mol) |
90
|
|
|
Bond Angle Proper Dih. Improper Dih. LJ-14 |
91
|
|
|
2.44111e+05 1.76069e+03 8.12947e+02 1.82656e-01 0.00000e+00 |
92
|
|
|
Coulomb-14 LJ (SR) Coulomb (SR) Potential Kinetic En. |
93
|
|
|
0.00000e+00 -1.94624e-03 0.00000e+00 2.46685e+05 2.43227e-09 |
94
|
|
|
Total Energy Temperature Pressure (bar) |
95
|
|
|
2.46685e+05 6.67884e-10 -5.94232e+04 |
96
|
|
|
Total Virial |
97
|
|
|
|
98
|
|
|
# ['Statistics', 'over', '1', 'steps', 'using', '1', 'frames', 'Energies', '(kJ/mol)', |
99
|
|
|
'Bond', 'Angle', 'Proper', 'Dih.', 'Improper', 'Dih.', 'LJ-14', '2.44111e+05', '1.76069e+03', |
100
|
|
|
'8.12947e+02', '1.82656e-01', '0.00000e+00', 'Coulomb-14', 'LJ', '(SR)', 'Coulomb', '(SR)', |
101
|
|
|
'Potential', 'Kinetic', 'En.', '0.00000e+00', '-1.94624e-03', '0.00000e+00', '2.46685e+05', |
102
|
|
|
'2.43227e-09', 'Total', 'Energy', 'Temperature', 'Pressure', '(bar)', '2.46685e+05', '6.67884e-10', |
103
|
|
|
'-5.94232e+04', 'Total', 'Virial'] |
104
|
|
|
|
105
|
|
|
result[6] is really the RNAkb |
106
|
|
|
|
107
|
|
|
""" |
108
|
|
|
prep = False |
109
|
|
|
|
110
|
|
|
self.sandbox_dir = self.sandbox_dir + os.sep |
111
|
|
|
|
112
|
|
|
self.log('Run %s' % name) |
113
|
|
|
if verbose: print('input: ', name, '\n', '-' * 80) |
114
|
|
|
copyfile(name, self.sandbox_dir + 'query.pdb') |
115
|
|
|
|
116
|
|
|
f = open(name) |
117
|
|
|
pdb_string = f.read() |
118
|
|
|
f.close() |
119
|
|
|
old_pdb = pdb_string |
120
|
|
|
pdb_string = make_rna_rnakb_ready(pdb_string) |
121
|
|
|
if old_pdb != pdb_string: |
122
|
|
|
self.pdb_fixes.append('make_rna_rnakb_ready') |
123
|
|
|
|
124
|
|
|
fn = self.sandbox_dir + 'query.pdb' |
125
|
|
|
if verbose: print('Gromacs ready file created: %s' % fn) |
126
|
|
|
f = open(fn, 'w') |
127
|
|
|
f.write(pdb_string) |
128
|
|
|
f.close() |
129
|
|
|
|
130
|
|
|
try: |
131
|
|
|
old_path = os.getcwd() |
132
|
|
|
except OSError: |
133
|
|
|
pass |
134
|
|
|
|
135
|
|
|
os.chdir(self.sandbox_dir) |
136
|
|
|
my_env = os.environ.copy() |
137
|
|
|
|
138
|
|
|
if potential_type == 'aa': |
139
|
|
|
my_env['GRMLIB'] = GRMLIB_PATH + ':' + DB_AA_PATH |
140
|
|
|
my_env['GMXLIB'] = GRMLIB_PATH + ':' + DB_AA_PATH |
141
|
|
|
elif potential_type == '5pt': |
142
|
|
|
my_env['GRMLIB'] = GRMLIB_PATH + ':' + DB_5PT_PATH |
143
|
|
|
my_env['GMXLIB'] = GRMLIB_PATH + ':' + DB_5PT_PATH |
144
|
|
|
else: |
145
|
|
|
raise Exception('RNAkb -- select aa or 5pt') |
146
|
|
|
|
147
|
|
|
if verbose: |
148
|
|
|
print('ff_aa:', GRMLIB_PATH + ':' + DB_AA_PATH) |
149
|
|
|
print('ff_5pt:', GRMLIB_PATH + ':' + DB_5PT_PATH) |
150
|
|
|
|
151
|
|
|
my_env['GMX_MAXBACKUP'] = '-1' |
152
|
|
|
my_env['LD_LIBRARY_PATH'] = GROMACS_LD_PATH |
153
|
|
|
|
154
|
|
|
# create topology files |
155
|
|
|
self.log('create topology files') |
156
|
|
|
# pdb2gmx |
157
|
|
|
out = run_command(self.executable[0], # '-v', |
158
|
|
|
['-quiet', '-ff', 'amber03', '-water', 'none' , '-f', self.sandbox_dir + 'query.pdb', '-o', self.sandbox_dir + 'query.gro', |
159
|
|
|
'-p', self.sandbox_dir + 'query.top', |
160
|
|
|
], |
161
|
|
|
env=my_env, |
162
|
|
|
cmd_input='1\n6\n', |
163
|
|
|
stdout_file=self.sandbox_dir + os.sep + 'out.txt', |
164
|
|
|
stderr_file=self.sandbox_dir + os.sep + 'err.txt', |
165
|
|
|
verbose=verbose, |
166
|
|
|
) |
167
|
|
|
if out: |
168
|
|
|
print(out) |
169
|
|
|
self.log('Run failed') |
170
|
|
|
self.log_stdout_stderr() |
171
|
|
|
os.chdir(old_path) |
172
|
|
|
return -1 |
173
|
|
|
|
174
|
|
|
# make index files |
175
|
|
|
fix_gromacs_gro(self.sandbox_dir + os.sep + 'query.gro') |
176
|
|
|
self.log('make index files') |
177
|
|
|
command = 'GRMLIB=' + my_env['GRMLIB'] + ' ' |
178
|
|
|
command += 'GMXLIB=' + my_env['GMXLIB'] + ' ' |
179
|
|
|
command += 'LD_LIBRARY_PATH=' + my_env['LD_LIBRARY_PATH'] + ' ' |
180
|
|
|
|
181
|
|
|
|
182
|
|
|
# generate sandbox/groups.txt @groups |
183
|
|
|
gr = self.sandbox_dir + 'groups.txt' |
184
|
|
|
gtxt, energygrps, seq_uniq = prepare_groups(self.sandbox_dir + 'query.pdb', gr, potential=potential_type) |
185
|
|
|
if verbose: print('Groups file created: %s' % gr) |
186
|
|
|
|
187
|
|
|
# generate sandbox/score.mdp @score |
188
|
|
|
fout = self.sandbox_dir + 'score.mdp' |
189
|
|
|
|
190
|
|
|
if potential_type == 'aa': # ugly |
191
|
|
|
#format_score_mdp_aa(fout, energygrps, seq_uniq) |
192
|
|
|
format_score_mdp(fout, energygrps, seq_uniq) |
193
|
|
|
else: |
194
|
|
|
format_score_mdp(fout, energygrps, seq_uniq) |
195
|
|
|
|
196
|
|
|
command += self.executable[1] + ' -f query.gro > /dev/null 2> /dev/null < ' + gr |
197
|
|
|
#command += self.executable[1] + ' -f query.gro <' + gr |
198
|
|
|
bash_script = "#!/bin/bash\n" + command + "\n" |
199
|
|
|
with open(self.sandbox_dir + os.sep + 'make_index.sh', 'w') as f: |
200
|
|
|
f.write(bash_script) |
201
|
|
|
cmd = 'bash ' + self.sandbox_dir + 'make_index.sh'# >/dev/null' |
202
|
|
|
if verbose: print('index cmd:', cmd) |
203
|
|
|
os.system(cmd) |
204
|
|
|
|
205
|
|
|
## energy computation |
206
|
|
|
self.log('prepare energy computation') |
207
|
|
|
run_error = run_command(self.executable[3], # grompp |
208
|
|
|
['-noh', '-c', self.sandbox_dir + 'query.gro', '-p', self.sandbox_dir + 'query.top', |
209
|
|
|
'-time', '1', |
210
|
|
|
'-f', self.sandbox_dir + 'score.mdp', |
211
|
|
|
'-n', self.sandbox_dir + 'index.ndx', |
212
|
|
|
'-o', self.sandbox_dir + 'run.tpr', '-maxwarn', '15'], |
213
|
|
|
env=my_env, |
214
|
|
|
stdout_file=self.sandbox_dir + 'out.txt', |
215
|
|
|
stderr_file=self.sandbox_dir + 'err.txt', |
216
|
|
|
verbose=verbose, |
217
|
|
|
) |
218
|
|
|
if not run_error: |
219
|
|
|
if verbose: print('prepare energy computation: OK', run_error) |
220
|
|
|
else: |
221
|
|
|
if verbose: print('prepare compute energy: FAIL, try to increase the box') |
222
|
|
|
# something went wrong, probably following lines will fix it |
223
|
|
|
f = open(self.sandbox_dir + os.sep + 'query.gro') |
224
|
|
|
box_size = f.readlines()[-1].split() |
225
|
|
|
f.close() |
226
|
|
|
|
227
|
|
|
box_size = [float(i) for i in box_size] |
228
|
|
|
max_size = 10 |
229
|
|
|
|
230
|
|
|
for mult in range(5, max_size): |
231
|
|
|
## increase box size |
232
|
|
|
if verbose: print('> Increase box size %d times' % mult) |
233
|
|
|
self.log('Increase box size %d times' % mult) |
234
|
|
|
|
235
|
|
|
run_error = run_command(self.executable[2], # editconf |
236
|
|
|
['-f', 'query.gro', '-o', 'query_big.gro', |
237
|
|
|
'-box', ' '.join([str(i*mult) for i in box_size])], |
238
|
|
|
env=my_env, |
239
|
|
|
stdout_file=self.sandbox_dir + os.sep + 'out.txt', |
240
|
|
|
stderr_file=self.sandbox_dir + os.sep + 'err.txt' |
241
|
|
|
) |
242
|
|
|
|
243
|
|
|
if run_error: |
244
|
|
|
self.log('Run failed', 'error') |
245
|
|
|
self.log_stdout_stderr() |
246
|
|
|
os.chdir(old_path) |
247
|
|
|
else: |
248
|
|
|
if verbose: print('Increase box size: OK') |
249
|
|
|
|
250
|
|
|
# prepare energy computation again |
251
|
|
|
run_error = run_command(self.executable[3], # grompp |
252
|
|
|
['-c', 'query_big.gro', '-p', 'query.top', |
253
|
|
|
'-f', self.sandbox_dir + 'score.mdp', |
254
|
|
|
'-n', self.sandbox_dir + 'index.ndx', |
255
|
|
|
'-o', 'run.tpr', '-maxwarn', '15'], |
256
|
|
|
env=my_env, |
257
|
|
|
stdout_file=self.sandbox_dir + os.sep + 'out.txt', |
258
|
|
|
stderr_file=self.sandbox_dir + os.sep + 'err.txt', |
259
|
|
|
verbose=verbose) |
260
|
|
|
|
261
|
|
|
if run_error: |
262
|
|
|
if verbose: print('Grompp: FAIL') |
263
|
|
|
with open(self.sandbox_dir + os.sep + 'err.txt') as f: |
264
|
|
|
txt = f.read() |
265
|
|
|
if txt.find('referenced in the') > -1: |
266
|
|
|
if verbose: print(txt) #print 'Group X referenced -- error' |
267
|
|
|
return -2 # leave the loop |
268
|
|
|
elif txt.find('The cut-off length is longer') > -1: |
269
|
|
|
if verbose: print('The cuf-off length -- error') |
270
|
|
|
else: |
271
|
|
|
if verbose: print('unknown -- error, see ' + self.sandbox_dir + 'err.txt') |
272
|
|
|
#sys.exit(1) # don't kill the function |
273
|
|
|
pass |
274
|
|
|
|
275
|
|
|
self.log_stdout_stderr() |
276
|
|
|
else: |
277
|
|
|
if verbose: print('Grompp: OK') |
278
|
|
|
break # leave the loop! |
279
|
|
|
|
280
|
|
|
if not os.path.exists(self.sandbox_dir + os.sep + 'run.tpr'): |
281
|
|
|
self.log('Run failed %s' % name, 'error') |
282
|
|
|
self.log_stdout_stderr() |
283
|
|
|
os.chdir(old_path) |
284
|
|
|
|
285
|
|
|
self.log('compute energy') |
286
|
|
|
run_error = run_command(self.executable[4], |
287
|
|
|
['-table', 'table.xvg', '-tablep', 'tablep.xvg', |
288
|
|
|
'-nt', '1', |
289
|
|
|
'-s', self.sandbox_dir + 'run.tpr', '-e', self.sandbox_dir + 'run.edr'], |
290
|
|
|
env=my_env, |
291
|
|
|
stdout_file=self.sandbox_dir + os.sep + 'out.txt', |
292
|
|
|
stderr_file=self.sandbox_dir + os.sep + 'err.txt', |
293
|
|
|
verbose=verbose, |
294
|
|
|
) |
295
|
|
|
if not run_error: |
296
|
|
|
if verbose: print('compute energy: OK', run_error) |
297
|
|
|
else: |
298
|
|
|
if verbose: print('compute energy: FAIL', run_error) |
299
|
|
|
self.log('Run failed', 'error') |
300
|
|
|
self.log_stdout_stderr() |
301
|
|
|
os.chdir(old_path) |
302
|
|
|
#return 255 |
303
|
|
|
return [str(x) for x in [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]] |
304
|
|
|
try: |
305
|
|
|
os.chdir(old_pwd) |
|
|
|
|
306
|
|
|
except (OSError, NameError): |
307
|
|
|
pass |
308
|
|
|
|
309
|
|
|
# extract result and change format |
310
|
|
|
result = self._get_result(self.sandbox_dir + os.sep + 'md.log') |
311
|
|
|
result_sum = sum([float(result[k]) for k in result]) |
312
|
|
|
|
313
|
|
|
md_txt = open(self.sandbox_dir + os.sep + 'md.log').read() |
314
|
|
|
energies_txt = re.search('Statistics over.*Total Virial', md_txt, flags=re.MULTILINE|re.DOTALL).group(0) |
315
|
|
|
energies = energies_txt.split()[16:21]+energies_txt.split()[29:34]+[energies_txt.split()[39]] |
316
|
|
|
if isnan(result_sum): |
317
|
|
|
self.log('Result is NaN', 'error') |
318
|
|
|
os.chdir(old_path) |
319
|
|
|
return [str(x) for x in [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]] |
320
|
|
|
else: |
321
|
|
|
self.log('Run successfull %s' % name) |
322
|
|
|
os.chdir(old_path) |
323
|
|
|
return energies |
324
|
|
|
#return result_sum != 0 and result_sum or -1 |
325
|
|
|
|
326
|
|
|
if __name__ == '__main__': |
327
|
|
|
wrapper = RNAkb(sandbox=True) |
328
|
|
|
|
329
|
|
|
fn = '1dqf_noCA.pdb' # problem, C is shifted to the right |
330
|
|
|
fn = '1dqf_noCA+ResnShift.pdb' # OK |
331
|
|
|
#fn = '1msy_output4_01-000001_AA.pdb' # OK |
332
|
|
|
#fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clean_noC.pdb' # OK |
333
|
|
|
#fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clean_error_fx.pdb' |
334
|
|
|
#fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clx.pdb' |
335
|
|
|
#fn = 'C.pdb' |
336
|
|
|
|
337
|
|
|
# 5pt |
338
|
|
|
if False: |
339
|
|
|
result = wrapper.run('test_data' |
340
|
|
|
+ os.sep + fn , '5pt', verbose=False) |
341
|
|
|
print(fn, result) |
342
|
|
|
#wrapper.cleanup() |
343
|
|
|
|
344
|
|
|
# all |
345
|
|
|
result = wrapper.run('test_data' + os.sep + fn , '5pt', verbose=False) # or aa |
346
|
|
|
print(fn, result[6]) |
347
|
|
|
#wrapper.cleanup() |
348
|
|
|
|