1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
|
4
|
|
|
"""RNAkb (previous Gromacs) utils. |
5
|
|
|
|
6
|
|
|
A module with different functions needed for Gromacs/RNAkb merriage. |
7
|
|
|
|
8
|
|
|
Marcin Magnus |
9
|
|
|
Albert Bogdanowicz |
10
|
|
|
|
11
|
|
|
(1) prepare groups and then (2) mdp score file. |
12
|
|
|
""" |
13
|
|
|
|
14
|
|
|
import re |
15
|
|
|
import os |
16
|
|
|
import argparse |
17
|
|
|
import rna_tools.tools.pdb_formatix.PDBFile as pf |
18
|
|
|
|
19
|
|
|
LIB_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + os.sep |
20
|
|
|
VERBOSE = False |
21
|
|
|
MDP_TEMPLATE = 'data/score_rnakb_orig.mdp' |
22
|
|
|
|
23
|
|
|
GROMACS_ALLOWED_5 = ("H5T", "O5'", "C5'", "H5'1", "H5'2", "C4'", "H4'", |
24
|
|
|
"O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6", "N6", "H61", |
25
|
|
|
"H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'", "H2'1", |
26
|
|
|
"O2'", "HO'2", "O3'", |
27
|
|
|
# new atoms |
28
|
|
|
"O2", "O4", "O6", "N2", "N4", |
29
|
|
|
'OP1', 'OP2', 'OP3', 'H21', 'H22', "H2'", "H5'", "HO5'", "H5''", "HO2'", |
30
|
|
|
'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41', |
31
|
|
|
) |
32
|
|
|
|
33
|
|
|
GROMACS_ALLOWED_MIDDLE = ("P", "O1P", "O2P", "O5'", "C5'", "H5'1", "H5'2", |
34
|
|
|
"C4'", "H4'", "O4'", "C1'", "H1'", "N9", "C8", "H8", "N7", "C5", "C6", |
35
|
|
|
"N6", "H61", "H62", "N1", "C2", "H2", "N3", "C4", "C3'", "H3'", "C2'", |
36
|
|
|
"H2'1", "O2'", "HO'2", "O3'", |
37
|
|
|
# new atoms |
38
|
|
|
"O2", "O4", "O6", "N2", "N4", |
39
|
|
|
'OP1', 'OP2', 'OP3', 'H21', 'H22', 'O6', "H2'", "H5'", "HO5'", "H5''", "HO2'", |
40
|
|
|
'P', 'H3', 'H1', 'H6', 'H5', 'H42', 'H41', |
41
|
|
|
) |
42
|
|
|
|
43
|
|
|
GROMACS_REQUIRED_GROUPS = ("aP", "aC4s", "aC2", "aC4", "aC6", |
44
|
|
|
"uP", "uC4s", "uC2", "uC4", "uC6", |
45
|
|
|
"gP", "gC4s", "gC2", "gC4", "gC6", |
46
|
|
|
"cP", "cC4s", "cC2", "cC4", "cC6", |
47
|
|
|
"RNA_5pt", "other" |
48
|
|
|
) |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
def get_res_num(line): |
52
|
|
|
"""Extract residue number from a line of PDB file |
53
|
|
|
|
54
|
|
|
Arguments: |
55
|
|
|
* line = ATOM line from a PDB file |
56
|
|
|
|
57
|
|
|
Output: |
58
|
|
|
* residue number as an integer |
59
|
|
|
""" |
60
|
|
|
return int(''.join([x for x in line[22:27] if x.isdigit()])) |
61
|
|
|
|
62
|
|
|
|
63
|
|
|
def get_res_code(line): |
64
|
|
|
"""Get residue code from a line of a PDB file |
65
|
|
|
""" |
66
|
|
|
return line[17:20] |
67
|
|
|
|
68
|
|
|
def set_res_code(line, code): |
69
|
|
|
"""Set residue code from a line of a PDB file |
70
|
|
|
""" |
71
|
|
|
return line[:17] + code.rjust(3) + line[20:] |
72
|
|
|
|
73
|
|
|
def make_rna_gromacs_ready(pdb_string, verbose=VERBOSE): |
74
|
|
|
"""GROMACS has some special requirements for PDB files. |
75
|
|
|
|
76
|
|
|
Arguments: |
77
|
|
|
* pdb_string = contents of PDB file as a string |
78
|
|
|
|
79
|
|
|
Output: |
80
|
|
|
* new PDB returned as a string |
81
|
|
|
|
82
|
|
|
(!!!) # hmm... [ RA5 ] will not be detected based on it (!?) |
83
|
|
|
Hmm.. becase it detects if the structure is already prepared. |
84
|
|
|
""" |
85
|
|
|
#pdb_string = resname_3to1(pdb_string) |
86
|
|
|
#pdb_string = remove_hetatms(pdb_string) |
87
|
|
|
result = [] |
88
|
|
|
pdb_lines = pdb_string.split('\n') |
89
|
|
|
|
90
|
|
|
# find smallest residue number # ugly this does not work for 2 chains |
91
|
|
|
min_res = min(list(map(get_res_num, |
92
|
|
|
[l for l in pdb_lines if l.startswith('ATOM')]))) |
93
|
|
|
max_res = max(list(map(get_res_num, |
94
|
|
|
[l for l in pdb_lines if l.startswith('ATOM')]))) |
95
|
|
|
|
96
|
|
|
for l in pdb_lines: |
97
|
|
|
if l.startswith('ATOM') and l[17:20].strip() in ('A', 'U', 'C', 'G'): # hmm... [ RA5 ] will not be detected based on it (!?) |
98
|
|
|
res = get_res_code(l) |
99
|
|
|
#if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing |
100
|
|
|
# if verbose: |
101
|
|
|
# print('-- already gromacs ready') |
102
|
|
|
# return pdb_string |
103
|
|
|
|
104
|
|
|
if l.startswith('TER'): |
105
|
|
|
continue |
106
|
|
|
|
107
|
|
|
l = l.replace('*', '\'') |
108
|
|
|
l = l.replace('O1P', 'OP1') |
109
|
|
|
l = l.replace('O2P', 'OP2') |
110
|
|
|
|
111
|
|
|
res_num = get_res_num(l) |
112
|
|
|
atom_type = l.split()[2].strip() |
113
|
|
|
|
114
|
|
|
# remove P OP1 OP2 |
115
|
|
|
if res_num == min_res and atom_type == 'P': |
116
|
|
|
continue |
117
|
|
|
if res_num == min_res and atom_type == 'OP1': |
118
|
|
|
continue |
119
|
|
|
if res_num == min_res and atom_type == 'OP2': |
120
|
|
|
continue |
121
|
|
|
|
122
|
|
|
# convert G -> RG5, RG3 |
123
|
|
|
# do this only if res does not start with R |
124
|
|
|
cres = get_res_code(l).strip() |
125
|
|
|
# print(cres) |
126
|
|
|
# aaaaaaaaa |
127
|
|
|
if not cres.startswith('R'): |
128
|
|
|
if res_num == min_res: # RG5 |
129
|
|
|
l = set_res_code(l, 'R' + get_res_code(l).strip() + '5') |
130
|
|
|
elif res_num == max_res: # RG3 |
131
|
|
|
l = set_res_code(l, 'R' + get_res_code(l).strip() + '3') |
132
|
|
|
else: |
133
|
|
|
l = set_res_code(l, ' R' + get_res_code(l).strip()) |
134
|
|
|
|
135
|
|
|
if res_num == min_res: |
136
|
|
|
if atom_type in GROMACS_ALLOWED_5: |
137
|
|
|
result.append(l) |
138
|
|
|
else: |
139
|
|
|
print('Wrong start line: ', l, atom_type) |
140
|
|
|
else: |
141
|
|
|
if atom_type in GROMACS_ALLOWED_MIDDLE: |
142
|
|
|
result.append(l) |
143
|
|
|
else: |
144
|
|
|
print('Wrong middle line: ', l, atom_type) |
145
|
|
|
else: |
146
|
|
|
result.append(l) |
147
|
|
|
return '\n'.join(result) |
148
|
|
|
|
149
|
|
|
def make_rna_rnakb_ready(pdb_string, verbose=VERBOSE): |
150
|
|
|
"""RNAkb read (difference between this function and |
151
|
|
|
make_rna_gromacs_ready is ignoring R5U etc. RNAkb does not treat |
152
|
|
|
them differently so there is no point to distinguish them. |
153
|
|
|
|
154
|
|
|
Arguments: |
155
|
|
|
* pdb_string = contents of PDB file as a string |
156
|
|
|
|
157
|
|
|
Output: |
158
|
|
|
* new PDB returned as a string |
159
|
|
|
""" |
160
|
|
|
#pdb_string = resname_3to1(pdb_string) |
161
|
|
|
#pdb_string = remove_hetatms(pdb_string) |
162
|
|
|
result = [] |
163
|
|
|
pdb_lines = pdb_string.split('\n') |
164
|
|
|
|
165
|
|
|
# find smallest residue number |
166
|
|
|
min_res = min(list(map(get_res_num, |
167
|
|
|
[l for l in pdb_lines if l.startswith('ATOM')]))) |
168
|
|
|
max_res = max(list(map(get_res_num, |
169
|
|
|
[l for l in pdb_lines if l.startswith('ATOM')]))) |
170
|
|
|
|
171
|
|
|
for l in pdb_lines: |
172
|
|
|
if l.startswith('ATOM'):# and l[19] in ('A', 'U', 'C', 'G'): |
173
|
|
|
res = get_res_code(l) |
174
|
|
|
#if res.startswith('R') and res.endswith('5') : # it's RX5 file so skip fixing |
175
|
|
|
# if verbose: |
176
|
|
|
# print '-- already gromacs ready' |
177
|
|
|
# return pdb_string |
178
|
|
|
|
179
|
|
|
l = l.replace('*', '\'') |
180
|
|
|
l = l.replace('O1P', 'OP1') |
181
|
|
|
l = l.replace('O2P', 'OP2') |
182
|
|
|
|
183
|
|
|
res_num = get_res_num(l) |
184
|
|
|
atom_type = l.split()[2].strip() |
185
|
|
|
# remove P OP1 OP2 |
186
|
|
|
if res_num == min_res and atom_type == 'P': |
187
|
|
|
continue |
188
|
|
|
if res_num == min_res and atom_type == 'OP1': |
189
|
|
|
continue |
190
|
|
|
if res_num == min_res and atom_type == 'OP2': |
191
|
|
|
continue |
192
|
|
|
|
193
|
|
|
# convert G -> RG5, RG3 |
194
|
|
|
#if res_num == min_res: # RG5 |
195
|
|
|
# l = set_res_code(l, 'R' + get_res_code(l).strip() + '5') |
196
|
|
|
#elif res_num == max_res: # RG3 |
197
|
|
|
# l = set_res_code(l, 'R' + get_res_code(l).strip() + '3') |
198
|
|
|
#else: |
199
|
|
|
l = set_res_code(l, ' R' + get_res_code(l).strip().replace('R','').replace('3','').replace('5','')) # |
200
|
|
|
if res_num == min_res: |
201
|
|
|
if atom_type in GROMACS_ALLOWED_5: |
202
|
|
|
result.append(l) |
203
|
|
|
else: |
204
|
|
|
print(('Wrong start line: ', l, atom_type)) |
205
|
|
|
else: |
206
|
|
|
if atom_type in GROMACS_ALLOWED_MIDDLE: |
207
|
|
|
result.append(l) |
208
|
|
|
else: |
209
|
|
|
print(('Wrong middle line: ', l, atom_type)) |
210
|
|
|
else: # keep TER, etc. |
211
|
|
|
result.append(l) |
212
|
|
|
return '\n'.join(result) |
213
|
|
|
|
214
|
|
|
def fix_gromacs_gro(path, verbose=False): |
215
|
|
|
"""It's probably a bug in GROMACS, but box coordinates in gro files are |
216
|
|
|
not always separated by spaces. This function guesses how it should be |
217
|
|
|
separated and inserts spaces. |
218
|
|
|
|
219
|
|
|
Arguments: |
220
|
|
|
* path = path to gro file |
221
|
|
|
|
222
|
|
|
Output: |
223
|
|
|
* file is overwritten with a corrected one |
224
|
|
|
""" |
225
|
|
|
f = open(path) |
226
|
|
|
gro = f.read() |
227
|
|
|
f.close() |
228
|
|
|
gro_lines = gro.split('\n') |
229
|
|
|
last_line = gro_lines[-2] |
230
|
|
|
|
231
|
|
|
# check if there are a space |
232
|
|
|
if last_line.find(' ') == -1: |
233
|
|
|
dots = [i.start() for i in re.finditer('\\.', last_line)] |
234
|
|
|
# next 4 lines are a guess, I hope it works |
235
|
|
|
digits = len(last_line[dots[2]:]) |
236
|
|
|
box = [last_line[:dots[0] + digits], |
237
|
|
|
last_line[dots[0] + digits:dots[1] + digits], last_line[dots[1] + digits:]] |
238
|
|
|
gro_lines = gro_lines[:-2] |
239
|
|
|
gro_lines.append(' '.join(box)) |
240
|
|
|
gro_lines.append('') |
241
|
|
|
f = open(path, 'w') |
242
|
|
|
f.write('\n'.join(gro_lines)) |
243
|
|
|
f.close() |
244
|
|
|
|
245
|
|
|
|
246
|
|
|
def fix_gromacs_ndx(path): |
247
|
|
|
"""Sometimes, GROMACS index has some atoms in more than one group, or |
248
|
|
|
doesn't have all the groups grompp requires. This function fixes that. |
249
|
|
|
|
250
|
|
|
Arguments: |
251
|
|
|
* path = path to index file |
252
|
|
|
|
253
|
|
|
Output: |
254
|
|
|
* index is overwritten with a corrected one |
255
|
|
|
""" |
256
|
|
|
f = open(path) |
257
|
|
|
index = f.read() |
258
|
|
|
f.close() |
259
|
|
|
# split file into system group and the rest |
260
|
|
|
system_group = index.split('[ System ]')[1].split('[')[0] |
261
|
|
|
other_groups = ['[' + i for i in |
262
|
|
|
index.split('[ System ]')[1].split('[')[1:]] |
263
|
|
|
# remove duplicate numbers |
264
|
|
|
taken_atoms = [] |
265
|
|
|
for g in range(len(other_groups)): |
266
|
|
|
header = other_groups[g].split('\n')[0] |
267
|
|
|
group = other_groups[g].split('\n')[1].split() |
268
|
|
|
group = [a for a in group if a not in taken_atoms] |
269
|
|
|
taken_atoms.extend(group) |
270
|
|
|
other_groups[g] = header + '\n' + ' '.join(group) + '\n' |
271
|
|
|
# build result, part 1 |
272
|
|
|
result = ['[ System ]' + system_group] |
273
|
|
|
result.extend(other_groups) |
274
|
|
|
# add missing groups, leave them empty |
275
|
|
|
headers = [g.split('\n')[0][2:-2] for g in other_groups] |
276
|
|
|
missing_headers = [h for h in GROMACS_REQUIRED_GROUPS if h not in headers] |
277
|
|
|
result.extend(['[ %s ]\n' % h for h in missing_headers]) |
278
|
|
|
# write result to file |
279
|
|
|
result = ''.join(result) |
280
|
|
|
f = open(path, 'w') |
281
|
|
|
f.write(result) |
282
|
|
|
f.close() |
283
|
|
|
|
284
|
|
|
|
285
|
|
|
def prepare_groups(fn, gr_fn, potential='aa', verbose=False): |
286
|
|
|
""" Prepare an index for fn file. gr_fn is a file where gtxt is saved in. |
287
|
|
|
|
288
|
|
|
Get seq and uniq & sort it. |
289
|
|
|
``['RG5', 'RA', 'RA', 'RA', 'RG', 'RU', 'RA', 'RA', 'RC3'] set(['RU', 'RG', 'RC3', 'RG5', 'RA'])`` |
290
|
|
|
|
291
|
|
|
@todo RG5 etc -- done! |
292
|
|
|
|
293
|
|
|
gtxt:: |
294
|
|
|
|
295
|
|
|
del 1 |
296
|
|
|
r RU & a C1' |
297
|
|
|
name 1 uC1s |
298
|
|
|
r RU & a C2 |
299
|
|
|
name 2 uC2 |
300
|
|
|
r RU & a C2' |
301
|
|
|
name 3 uC2s |
302
|
|
|
... |
303
|
|
|
|
304
|
|
|
return, gtxt (groups_txt), energygrps . The result is saved to g_fn. |
305
|
|
|
energygrps: |
306
|
|
|
['uP', 'uC4s', 'uC2', 'uC4', 'uC6', 'gP', 'gC4s', 'gC2', 'gC4', 'gC6', 'aP', 'aC4s', 'aC2', 'aC4', 'aC6'] |
307
|
|
|
gtxt: |
308
|
|
|
RA |
309
|
|
|
del 1 |
310
|
|
|
r RU & a P |
311
|
|
|
name 1 uP |
312
|
|
|
r RU & a C4s |
313
|
|
|
name 2 uC4s |
314
|
|
|
r RU & a C2 |
315
|
|
|
name 3 uC2 |
316
|
|
|
r RU & a C4 |
317
|
|
|
[..] |
318
|
|
|
r RA & a C6 |
319
|
|
|
name 15 aC6 |
320
|
|
|
1|2|3|4|5|6|7|8|9|10|11|12|13|14|15 |
321
|
|
|
name 16 RNA_5pt |
322
|
|
|
0 & ! 16 |
323
|
|
|
name 17 other |
324
|
|
|
q |
325
|
|
|
""" |
326
|
|
|
p = pf.PDBFile(pdb_path = fn) |
327
|
|
|
seq = p.seq_from_amber_like_pdb().split() |
328
|
|
|
seq_uniq_sorted = set(seq) |
329
|
|
|
if verbose: print(('seq:', seq, seq_uniq_sorted)) |
330
|
|
|
seq_rnakb_order = [] |
331
|
|
|
if 'RA' in seq_uniq_sorted: |
332
|
|
|
seq_rnakb_order.append('RA') |
333
|
|
|
if 'RU' in seq_uniq_sorted: |
334
|
|
|
seq_rnakb_order.append('RU') |
335
|
|
|
if 'RG' in seq_uniq_sorted: |
336
|
|
|
seq_rnakb_order.append('RG') |
337
|
|
|
if 'RC' in seq_uniq_sorted: |
338
|
|
|
seq_rnakb_order.append('RC') |
339
|
|
|
if verbose:print(('seq_rnakb_order', seq_rnakb_order)) |
340
|
|
|
|
341
|
|
|
gtxt = 'del 1\n' |
342
|
|
|
c = 1 |
343
|
|
|
if potential == 'aa': |
344
|
|
|
# rg |
345
|
|
|
rg_atoms = "C3',C5,C4,C6,C8,O2',P,C2',O5',C5',C1',O3',O6,N2,N3,N1,N7,N9,C2,C4',O4',OP2,OP1".split(',') |
346
|
|
|
rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms] |
347
|
|
|
# ru |
348
|
|
|
ug_atoms = "C1',C2,C2',C3',C4,C4',C5,C5',C6,N1,N3,O2,O2',O3',O4,O4',O5',OP1,OP1,P".split(",") |
349
|
|
|
ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms] |
350
|
|
|
# ag |
351
|
|
|
ag_atoms = "O3',O2',N7,N1,N3,N9,C2',O5',N6,C5',C1',C2,C6,C5,C4,O4',C4',C8,C3',P,OP1,OP2".split(',') |
352
|
|
|
ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms] |
353
|
|
|
# cg |
354
|
|
|
cg_atoms = "C2',O2',C5',O5',C4,O2,C3',C2,O3',N4,N3,N1,P,C1',O4',C4',C5,C6,OP1,OP2".split(',') |
355
|
|
|
cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms] |
356
|
|
|
#N1, N3, O4', C5', O3', C2', C4, C1', O5', O1P, C4', C6, C5, C2, C3', P, O2P, O2, O4, O2' |
357
|
|
|
if verbose: print(('len-s:', len(rg_atoms), len(cg_atoms), len(ag_atoms), len(ug_atoms))) |
358
|
|
|
elif potential == '5pt': |
359
|
|
|
# rg |
360
|
|
|
rg_atoms = ["P", "C4'", "C2", "C4", "C6"] |
361
|
|
|
ug_atoms = ["P", "C4'", "C2", "C4", "C6"] |
362
|
|
|
ag_atoms = ["P", "C4'", "C2", "C4", "C6"] |
363
|
|
|
cg_atoms = ["P", "C4'", "C2", "C4", "C6"] |
364
|
|
|
rg_atoms2 = ['g' + a.strip().replace("'", 's') for a in rg_atoms] |
365
|
|
|
ug_atoms2 = ['u' + a.strip().replace("'", 's') for a in ug_atoms] |
366
|
|
|
ag_atoms2 = ['a' + a.strip().replace("'", 's') for a in ag_atoms] |
367
|
|
|
cg_atoms2 = ['c' + a.strip().replace("'", 's') for a in cg_atoms] |
368
|
|
|
else: |
369
|
|
|
raise Exception('Unknown potential, use all or 5pt') |
370
|
|
|
|
371
|
|
|
energygrps = [] |
372
|
|
|
|
373
|
|
|
for r in seq_rnakb_order: |
374
|
|
|
if r == 'RA': |
375
|
|
|
for x, y in zip(ag_atoms, ag_atoms2): |
376
|
|
|
gtxt += 'r RA & a %s\n' % x |
377
|
|
|
gtxt += 'name %i %s\n' % (c, y) |
378
|
|
|
c += 1 |
379
|
|
|
energygrps.extend(ag_atoms2) |
380
|
|
|
|
381
|
|
|
if r == 'RU': |
382
|
|
|
for x, y in zip(ug_atoms, ug_atoms2): |
383
|
|
|
gtxt += 'r RU & a %s\n' % x |
384
|
|
|
gtxt += 'name %i %s\n' % (c, y) |
385
|
|
|
c += 1 |
386
|
|
|
energygrps.extend(ug_atoms2) |
387
|
|
|
|
388
|
|
|
if r == 'RG': |
389
|
|
|
for x, y in zip(rg_atoms, rg_atoms2): |
390
|
|
|
gtxt += 'r RG & a %s\n' % x |
391
|
|
|
gtxt += 'name %i %s\n' % (c, y) |
392
|
|
|
c += 1 |
393
|
|
|
energygrps.extend(rg_atoms2) |
394
|
|
|
|
395
|
|
|
if r == 'RC': |
396
|
|
|
for x, y in zip(cg_atoms, cg_atoms2): |
397
|
|
|
gtxt += 'r RC & a %s\n' % x |
398
|
|
|
gtxt += 'name %i %s\n' % (c, y) |
399
|
|
|
c += 1 |
400
|
|
|
energygrps.extend(cg_atoms2) |
401
|
|
|
|
402
|
|
|
gtxt += '|'.join([str(x) for x in range(1, c)]) |
403
|
|
|
gtxt += '\nname %i RNA_%s' % (c, potential) # @todo |
404
|
|
|
gtxt += '\n0 & ! %i' % (c) |
405
|
|
|
gtxt += '\nname %i other' % (c + 1) |
406
|
|
|
gtxt += '\nq\n' |
407
|
|
|
if verbose: print(gtxt) |
408
|
|
|
|
409
|
|
|
with open(gr_fn, 'w') as f: |
410
|
|
|
f.write(gtxt) |
411
|
|
|
|
412
|
|
|
if verbose: print(('energygrps', energygrps)) |
413
|
|
|
return gtxt, energygrps, seq_rnakb_order |
414
|
|
|
|
415
|
|
|
|
416
|
|
|
def format_score_mdp(mdp_out, energygrps, seq, verbose=False): |
417
|
|
|
"""Get a template score mdp and replace energygrps |
418
|
|
|
(it can be generated with prepare_groups) |
419
|
|
|
and energygrp_table |
420
|
|
|
""" |
421
|
|
|
# load template |
422
|
|
|
with open(LIB_PATH + 'md/' + MDP_TEMPLATE, 'r') as f: |
423
|
|
|
txt = f.readlines() |
424
|
|
|
|
425
|
|
|
with open(LIB_PATH + 'md/data/rnakb_all.txt', 'r') as f: |
426
|
|
|
pairs = [i.strip() for i in f.readlines()] |
427
|
|
|
|
428
|
|
|
nmdp = '' |
429
|
|
|
for l in txt: |
430
|
|
|
if l.startswith('energygrps'): |
431
|
|
|
l = 'energygrps = ' + ' '.join(energygrps) + ' other' |
432
|
|
|
nmdp += l |
433
|
|
|
elif l.startswith('energygrp_table'): |
434
|
|
|
d = '' |
435
|
|
|
for x in energygrps: # ugly :-( |
436
|
|
|
for y in energygrps: |
437
|
|
|
s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path. |
438
|
|
|
s_reverse = '%s_%s' % (y, x) # try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg |
439
|
|
|
if s in pairs: |
440
|
|
|
d += '%s ' % s.replace('_', ' ') # '_' -> ' ' |
441
|
|
|
l = 'energygrp_table = ' + d.strip() |
442
|
|
|
nmdp += l |
443
|
|
|
elif l.startswith('energygrp_excl'): |
444
|
|
|
l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other' |
445
|
|
|
nmdp += l |
446
|
|
|
else: |
447
|
|
|
nmdp += l |
448
|
|
|
if verbose: print(nmdp) |
449
|
|
|
|
450
|
|
|
with open(mdp_out, 'w') as f: |
451
|
|
|
f.write(nmdp) |
452
|
|
|
|
453
|
|
|
|
454
|
|
|
def format_score_mdp_aa2(mdp_out, energygrps, seq, verbose=False): |
455
|
|
|
"""Get a template score mdp and replace energygrps |
456
|
|
|
(it can be generated with prepare_groups) |
457
|
|
|
and energygrp_table |
458
|
|
|
""" |
459
|
|
|
# load template |
460
|
|
|
with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f: |
461
|
|
|
txt = f.readlines() |
462
|
|
|
|
463
|
|
|
with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f: |
464
|
|
|
pairs = [i.strip() for i in f.readlines()] |
465
|
|
|
|
466
|
|
|
nmdp = '' |
467
|
|
|
for l in txt: |
468
|
|
|
if l.startswith('energygrps'): |
469
|
|
|
l = 'energygrps = ' + ' '.join(energygrps) + ' other' |
470
|
|
|
nmdp += l |
471
|
|
|
elif l.startswith('energygrp_table'): |
472
|
|
|
d = '' |
473
|
|
|
c = 1 |
474
|
|
|
used = '' |
475
|
|
|
for x in energygrps: # ugly :-( |
476
|
|
|
for y in energygrps: |
477
|
|
|
s = '%s_%s' % (x, y) # Library file table_uP_aP.xvg not found in current dir nor in your GMXLIB path. |
478
|
|
|
if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s): |
479
|
|
|
print(c, 'ok', s) |
480
|
|
|
else: |
481
|
|
|
s = '%s_%s' % (y, x) |
482
|
|
|
if os.path.isfile('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full/table_%s.xvg' % s): |
483
|
|
|
print(c, 're', s) |
484
|
|
|
print |
485
|
|
|
# try: /home/mqapRNA/mqaprna_env/db/RNA_5pt_full_sc1/table_aP_uP.xvg |
486
|
|
|
if s.replace('_', ' ') not in d: |
487
|
|
|
d += '%s ' % s.replace('_', ' ') # '_' -> ' ' |
488
|
|
|
print(c, '!!', s) |
489
|
|
|
c += 1 |
490
|
|
|
|
491
|
|
|
l = 'energygrp_table = ' + d.strip() |
492
|
|
|
print(len(l.split())) |
493
|
|
|
nmdp += l |
494
|
|
|
elif l.startswith('energygrp_excl'): |
495
|
|
|
l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other' |
496
|
|
|
nmdp += l |
497
|
|
|
else: |
498
|
|
|
nmdp += l |
499
|
|
|
if verbose: print(nmdp) |
500
|
|
|
|
501
|
|
|
with open(mdp_out, 'w') as f: |
502
|
|
|
f.write(nmdp) |
503
|
|
|
|
504
|
|
|
def format_score_mdp_aa(mdp_out, energygrps, seq, potential, verbose=False): |
505
|
|
|
"""Get a template score mdp and replace energygrps |
506
|
|
|
(it can be generated with prepare_groups) |
507
|
|
|
and energygrp_table |
508
|
|
|
""" |
509
|
|
|
# load template |
510
|
|
|
with open(LIB_PATH + 'rnakb_utils/' + MDP_TEMPLATE, 'r') as f: |
511
|
|
|
txt = f.readlines() |
512
|
|
|
|
513
|
|
|
with open(LIB_PATH + 'rnakb_utils/data/rnakb_all.txt', 'r') as f: |
514
|
|
|
pairs = [i.strip() for i in f.readlines()] |
515
|
|
|
|
516
|
|
|
nmdp = '' |
517
|
|
|
c = 1 |
518
|
|
|
for l in txt: |
519
|
|
|
if l.startswith('energygrps'): |
520
|
|
|
energygrps = [] |
521
|
|
|
for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'): |
522
|
|
|
if 'table_' in i: |
523
|
|
|
a, b = i.replace('table_', '').replace('.xvg', '').replace('_', ' ').split() |
524
|
|
|
if a not in energygrps: |
525
|
|
|
energygrps.append(a) |
526
|
|
|
if b not in energygrps: |
527
|
|
|
energygrps.append(b) |
528
|
|
|
l = 'energygrps = ' + ' '.join(energygrps) + ' other' |
529
|
|
|
nmdp += l |
530
|
|
|
elif l.startswith('energygrp_table'): |
531
|
|
|
groups = [] |
532
|
|
|
for i in os.listdir('/Users/magnus/work/papers/mqaprna/misc/mqaprna-db/RNA_aa_full'): |
533
|
|
|
if 'table_' in i: |
534
|
|
|
d = i.replace('table_', '').replace('.xvg', '').replace('_', ' ') |
535
|
|
|
if verbose: print(i.ljust(20), d) |
536
|
|
|
groups.append(d) |
537
|
|
|
nmdp += 'energygrp_table = ' + ' '.join(groups) |
538
|
|
|
elif l.startswith('energygrp_excl'): |
539
|
|
|
l = 'energygrp_excl = other other ' + ' other '.join(energygrps) + ' other' |
540
|
|
|
nmdp += l |
541
|
|
|
else: |
542
|
|
|
nmdp += l |
543
|
|
|
if verbose: print(nmdp) |
544
|
|
|
|
545
|
|
|
with open(mdp_out, 'w') as f: |
546
|
|
|
f.write(nmdp) |
547
|
|
|
|
548
|
|
|
|
549
|
|
|
def get_parser(): |
550
|
|
|
parser = argparse.ArgumentParser( |
551
|
|
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
552
|
|
|
|
553
|
|
|
#parser.add_argument('-', "--", help="", default="") |
554
|
|
|
parser.add_argument("-v", "--verbose", |
555
|
|
|
action="store_true", help="be verbose") |
556
|
|
|
parser.add_argument("file", help="", default="") |
557
|
|
|
return parser |
558
|
|
|
|
559
|
|
|
# main |
560
|
|
|
if __name__ == '__main__': |
561
|
|
|
#fn = 'test_data/decoy0165_amb_clx.pdb' |
562
|
|
|
fn = 'test_data/1duq.pdb' |
563
|
|
|
fn = 'test_data/1duq_rpr.pdb' |
564
|
|
|
# format_score_mdp('out.txt', '', '') |
565
|
|
|
# print(open('out.txt').read()) |
566
|
|
|
from rna_tools.rna_tools_lib import RNAStructure |
567
|
|
|
r = RNAStructure(fn) |
568
|
|
|
for i in r.get_all_chain_ids(): |
569
|
|
|
chain = r.get_chain(i) |
570
|
|
|
nchain = make_rna_gromacs_ready(chain) |
571
|
|
|
print(nchain).strip() |
572
|
|
|
|