|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
# -*- coding: utf-8 -*- |
|
3
|
|
|
"""mqaprna.py - a script for running all wrapers on each PDB file in a specified directory |
|
4
|
|
|
saves results to a CSV file. |
|
5
|
|
|
|
|
6
|
|
|
ss_agreement is ... |
|
7
|
|
|
|
|
8
|
|
|
ClashScore,AnalyzeGeometry,SimRNA_0,RNAscore,eSCORE,RNAkb,RASP,RNA3DCNN,Dfire,FARNA,FARFAR2,FARNA_hires,FARFAR2_hires |
|
9
|
|
|
|
|
10
|
|
|
The code is full of # hack and tricks. |
|
11
|
|
|
|
|
12
|
|
|
.. warning:: Uses global variables |
|
13
|
|
|
|
|
14
|
|
|
Install: |
|
15
|
|
|
|
|
16
|
|
|
csvsort |
|
17
|
|
|
|
|
18
|
|
|
Cmd:: |
|
19
|
|
|
|
|
20
|
|
|
# find . -iname 'FARFAR2*.csv' -exec cat {} + > FARFAR2_hires.csv |
|
21
|
|
|
$ rna_mq_collect.py -t FARFAR2_hires -m 4 -f -o FARFAR2_hires.csv -l all.txt x.pdb |
|
22
|
|
|
# fake x.pdb when -l is used, -l gets a list of files |
|
23
|
|
|
x.pdb |
|
24
|
|
|
y.pdb |
|
25
|
|
|
z.pdb |
|
26
|
|
|
|
|
27
|
|
|
88% (49329 of 55689) |############### | Elapsed Time: 0:45:23 ETA: 2 days, 18:42:16 |
|
28
|
|
|
|
|
29
|
|
|
For this python progressbar works:: |
|
30
|
|
|
|
|
31
|
|
|
Python 3.7.10 (default, Feb 26 2021, 18:47:35) |
|
32
|
|
|
[GCC 7.3.0] :: Anaconda, Inc. on linux |
|
33
|
|
|
|
|
34
|
|
|
""" |
|
35
|
|
|
MP_VERBOSE = False |
|
36
|
|
|
DEBUG_MODE = False |
|
37
|
|
|
|
|
38
|
|
|
################################################################################ |
|
39
|
|
|
import sys |
|
40
|
|
|
#sys.path.insert(0, "/Users/magnus/work/src/rna-tools/rna_tools/tools/mq/") # ugly! |
|
41
|
|
|
import progressbar |
|
42
|
|
|
# import mqaprna_score as mqs |
|
43
|
|
|
import time |
|
44
|
|
|
import os |
|
45
|
|
|
import copy |
|
46
|
|
|
from csvsort import csvsort |
|
47
|
|
|
|
|
48
|
|
|
import rna_tools.tools.mq.lib.shellgraphics.shellgraphics as sg |
|
49
|
|
|
sg.color_mode = False |
|
50
|
|
|
from rna_tools.tools.mq.lib.timex import timex |
|
51
|
|
|
#import rna_tools.tools.mq.mqaprna_config as Config |
|
52
|
|
|
import rna_tools.rna_tools_config as Config |
|
53
|
|
|
################################################################################ |
|
54
|
|
|
|
|
55
|
|
|
import rna_tools |
|
56
|
|
|
__version__ = rna_tools.__version__ |
|
57
|
|
|
|
|
58
|
|
|
import os |
|
59
|
|
|
import sys |
|
60
|
|
|
DIRNAME = os.path.dirname(__file__) |
|
61
|
|
|
PARENT_DIRNAME = os.path.abspath(os.path.join(DIRNAME, os.path.pardir)) |
|
62
|
|
|
sys.path.append(DIRNAME) |
|
63
|
|
|
sys.path.append(PARENT_DIRNAME) |
|
64
|
|
|
import csv |
|
65
|
|
|
import imp |
|
66
|
|
|
|
|
67
|
|
|
from optparse import OptionParser, OptionGroup |
|
68
|
|
|
from ctypes import c_int |
|
69
|
|
|
|
|
70
|
|
|
#import lib.rmsd_calc.rmsd_calc as rmsd_calc |
|
71
|
|
|
from multiprocessing import Pool, Lock, Value |
|
72
|
|
|
|
|
73
|
|
|
try: |
|
74
|
|
|
from wrappers.mqap_score.mqap_score import MqapScore |
|
75
|
|
|
except ImportError: |
|
76
|
|
|
pass |
|
77
|
|
|
|
|
78
|
|
|
# super-verbose logging |
|
79
|
|
|
if MP_VERBOSE: |
|
80
|
|
|
import multiprocessing |
|
81
|
|
|
logger = multiprocessing.log_to_stderr() |
|
82
|
|
|
logger.setLevel(multiprocessing.SUBDEBUG) |
|
83
|
|
|
|
|
84
|
|
|
# create wrappers for all the methods |
|
85
|
|
|
MODULES = {} |
|
86
|
|
|
for m in Config.METHOD_LIST: |
|
87
|
|
|
if m.find('_') > -1: |
|
88
|
|
|
m,n = m.split('_') |
|
89
|
|
|
wrapper_path = os.path.join(Config.WRAPPERS_PATH, m, m + '.py') |
|
90
|
|
|
module = imp.load_source(m, wrapper_path) |
|
91
|
|
|
MODULES[m] = module |
|
92
|
|
|
|
|
93
|
|
|
# global variable |
|
94
|
|
|
c = 0 |
|
95
|
|
|
methods = Config.METHOD_LIST |
|
96
|
|
|
cleanup = True |
|
97
|
|
|
|
|
98
|
|
|
counter = Value(c_int) |
|
99
|
|
|
counter_lock = Lock() |
|
100
|
|
|
|
|
101
|
|
|
# ['farna_rna_base_axis', 'farna_rna_backbone_backbone', 'farna_rna_base_stack_axis', 'farna_rna_base_stagger', 'farna_rna_base_stack', 'farna_rna_base_pair', 'farna_rna_repulsive', 'farna_rna_vdw', 'farna_rna_base_backbone', 'farna_score_lowres', 'farna_rna_data_backbone', 'farna_linear_chainbreak', 'farna_rna_rg', 'farna_atom_pair_constraint'], |
|
102
|
|
|
|
|
103
|
|
|
steps = '0' # |
|
104
|
|
|
attributes = { |
|
105
|
|
|
'QRNA' : [ 'qrna_' + steps + '_electro', 'qrna_' + steps ], |
|
106
|
|
|
#'RASP' : [ 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'] |
|
107
|
|
|
'RASP' : ['rasp_c3_pdb_energy', 'rasp_c3_no_contacts', 'rasp_c3_norm_energy', 'rasp_c3_mean_energy', 'rasp_c3_sd_energy', 'rasp_c3_zscore', 'rasp_bb_pdb_energy', 'rasp_bb_no_contacts', 'rasp_bb_norm_energy', 'rasp_bb_mean_energy', 'rasp_bb_sd_energy', 'rasp_bb_zscore', 'rasp_bbr_pdb_energy', 'rasp_bbr_no_contacts', 'rasp_bbr_norm_energy', 'rasp_bbr_mean_energy', 'rasp_bbr_sd_energy', 'rasp_bbr_zscore', 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'], |
|
108
|
|
|
|
|
109
|
|
|
'SimRNA_0' : ['simrna_steps', 'simrna_total_energy', 'simrna_base_base', 'simrna_short_stacking', 'simrna_base_backbone', 'simrna_local_geometry', 'simrna_bonds_dist_cp', 'simrna_bonds_dist_pc', 'simrna_flat_angles_cpc', 'simrna_flat_angles_pcp', 'simrna_tors_eta_theta', 'simrna_sphere_penalty', 'simrna_chain_energy'], |
|
110
|
|
|
'RNAkb' : ['rnakb_bond', 'rnakb_angle', 'rnakb_proper_dih', 'rnakb_improper_dih', 'rnakb_lj14', 'rnakb_coulomb14', 'rnakb_lj_sr', 'rnakb_coulomb_sr', |
|
111
|
|
|
'rnakb_potential', 'rnakb_kinetic_en', 'rnakb_total_energy'], |
|
112
|
|
|
'RNAkb_all' : ['rnakb_bond_all', 'rnakb_angle_all', 'rnakb_proper_dih_all', 'rnakb_improper_dih_all', 'rnakb_lj14_all', 'rnakb_coulomb14_all', 'rnakb_lj_sr_all', 'rnakb_coulomb_sr_all', |
|
113
|
|
|
'rnakb_potential_all', 'rnakb_kinetic_en_all', 'rnakb_total_energy_all'], |
|
114
|
|
|
|
|
115
|
|
|
'RNAscore' : ['x3rnascore'], |
|
116
|
|
|
'AnalyzeGeometry' : ['analyze_geometry'], |
|
117
|
|
|
'SSAgreement' : ['ss_disagreement'], |
|
118
|
|
|
'ClashScore' : ['clash_score'], |
|
119
|
|
|
'Ernwin_1' : [ 'ernwin_1' ], |
|
120
|
|
|
'Ernwin_1k' : [ 'ernwin_1k' ], |
|
121
|
|
|
'eSCORE' : ['escore'], |
|
122
|
|
|
'RNA3DCNN' : ['rna3dcnn'], |
|
123
|
|
|
'Dfire' : ['dfire'], |
|
124
|
|
|
|
|
125
|
|
|
'FARNA': ['farna_score_lowres', |
|
126
|
|
|
'farna_rna_data_backbone', |
|
127
|
|
|
'farna_rna_vdw', |
|
128
|
|
|
'farna_rna_base_backbone', |
|
129
|
|
|
'farna_rna_backbone_backbone', |
|
130
|
|
|
'farna_rna_repulsive', |
|
131
|
|
|
'farna_rna_base_pair', |
|
132
|
|
|
'farna_rna_base_axis', |
|
133
|
|
|
'farna_rna_base_stagger', |
|
134
|
|
|
'farna_rna_base_stack', |
|
135
|
|
|
'farna_rna_base_stack_axis', |
|
136
|
|
|
'farna_rna_rg', |
|
137
|
|
|
'farna_atom_pair_constraint', |
|
138
|
|
|
'farna_linear_chainbreak'], |
|
139
|
|
|
|
|
140
|
|
|
'FARNA_hires' : ['farna_score_hires', |
|
141
|
|
|
'farna_fa_atr', |
|
142
|
|
|
'farna_fa_rep', |
|
143
|
|
|
'farna_fa_intra_rep', |
|
144
|
|
|
'farna_lk_nonpolar', |
|
145
|
|
|
'farna_fa_elec_rna_phos_phos', |
|
146
|
|
|
'farna_ch_bond', |
|
147
|
|
|
'farna_rna_torsion', |
|
148
|
|
|
'farna_rna_sugar_close', |
|
149
|
|
|
'farna_hbond_sr_bb_sc', |
|
150
|
|
|
'farna_hbond_lr_bb_sc', |
|
151
|
|
|
'farna_hbond_sc', |
|
152
|
|
|
'farna_geom_sol', |
|
153
|
|
|
'farna_atom_pair_constraint_hires', |
|
154
|
|
|
'farna_linear_chainbreak_hires'], |
|
155
|
|
|
|
|
156
|
|
|
'FARFAR2' : ['farna_score_lowres', |
|
157
|
|
|
'farna_rna_vdw', |
|
158
|
|
|
'farna_rna_base_backbone', |
|
159
|
|
|
'farna_rna_backbone_backbone', |
|
160
|
|
|
'farna_rna_repulsive', |
|
161
|
|
|
'farna_rna_base_pair', |
|
162
|
|
|
'farna_rna_base_axis', |
|
163
|
|
|
'farna_rna_base_stagger', |
|
164
|
|
|
'farna_rna_base_stack', |
|
165
|
|
|
'farna_rna_base_stack_axis', |
|
166
|
|
|
'farna_rna_rg', |
|
167
|
|
|
'farna_atom_pair_constraint', |
|
168
|
|
|
'farna_linear_chainbreak'], |
|
169
|
|
|
|
|
170
|
|
|
'FARFAR2_hires': 'ff2_score_hires,ff2_fa_atr,ff2_fa_rep,ff2_fa_intra_rep,ff2_lk_nonpolar,ff2_fa_elec_rna_phos_phos,ff2_rna_torsion,ff2_suiteness_bonus,ff2_rna_sugar_close,ff2_fa_stack,ff2_stack_elec,ff2_geom_sol_fast,ff2_bond_sr_bb_sc,ff2_hbond_lr_bb_sc,ff2_hbond_sc,ff2_ref,ff2_free_suite,ff2_free_2HOprime,ff2_intermol,ff2_other_pose,ff2_loop_close,ff2_linear_chainbreak_hires'.split(','), |
|
171
|
|
|
|
|
172
|
|
|
#'SimRNA_0' : ['', 'simrna', '', '', '', '', '', '', '', '', '', '', ''], |
|
173
|
|
|
'rmsd_all': ['rmsd_all'], |
|
174
|
|
|
} |
|
175
|
|
|
|
|
176
|
|
View Code Duplication |
def single_run(filename): |
|
|
|
|
|
|
177
|
|
|
"""Start a mqaprna run for a given file |
|
178
|
|
|
with all methods (according to config file). |
|
179
|
|
|
|
|
180
|
|
|
[!] Use global cleanup = False to block cleaning up |
|
181
|
|
|
|
|
182
|
|
|
.. warning:: The function uses global variable. |
|
183
|
|
|
""" |
|
184
|
|
|
filename, filename_length = filename |
|
185
|
|
|
#print 'fn: ', filename |
|
186
|
|
|
|
|
187
|
|
|
global methods, c |
|
188
|
|
|
all_results = {} |
|
189
|
|
|
|
|
190
|
|
|
for m in methods: |
|
191
|
|
|
arguments = '' |
|
192
|
|
|
#if DEBUG_MODE: print 'method', m, arguments |
|
193
|
|
|
mfull = m |
|
194
|
|
|
if verbose: print(m + '...') # show method 'eSCORE...' |
|
|
|
|
|
|
195
|
|
|
|
|
196
|
|
|
if m == 'FARNA': |
|
197
|
|
|
mfull = m |
|
198
|
|
|
arguments = [filename] + [False] |
|
199
|
|
|
|
|
200
|
|
|
if m == 'FARNA_hires': |
|
201
|
|
|
m = 'FARNA' |
|
202
|
|
|
mfull = 'FARNA_hires' |
|
203
|
|
|
arguments = [filename] + [True] |
|
204
|
|
|
|
|
205
|
|
|
if m == 'FARFAR2': |
|
206
|
|
|
m = 'FARFAR2' |
|
207
|
|
|
mfull = 'FARFAR2' |
|
208
|
|
|
arguments = [filename] + [False] |
|
209
|
|
|
|
|
210
|
|
|
if m == 'FARFAR2_hires': |
|
211
|
|
|
m = 'FARFAR2' |
|
212
|
|
|
mfull = 'FARFAR2_hires' |
|
213
|
|
|
arguments = [filename] + [True] |
|
214
|
|
|
|
|
215
|
|
|
if m == 'RNAkb_all': |
|
216
|
|
|
m = 'RNAkb' |
|
217
|
|
|
mfull = 'RNAkb_all' |
|
218
|
|
|
arguments = [filename] + ['aa'] |
|
219
|
|
|
|
|
220
|
|
|
if m.find('_') > -1: |
|
221
|
|
|
m, n = m.split('_') |
|
222
|
|
|
n = n.replace('n', '') # n_XXX |
|
223
|
|
|
n = n.replace('k', '000') |
|
224
|
|
|
n = n.replace('m', '000000') |
|
225
|
|
|
arguments = [filename] + [n] |
|
226
|
|
|
|
|
227
|
|
|
if not arguments: |
|
228
|
|
|
arguments = [filename] + Config.WRAPPER_OPTIONS[m] |
|
229
|
|
|
|
|
230
|
|
|
if m == 'escore': |
|
231
|
|
|
m = 'eSCORE' |
|
232
|
|
|
wrapper = getattr(MODULES[m], m)()#verbose) # ref_seq, ref_ss, verbose) # for all wrappers but SSAgrement '','' is OK |
|
233
|
|
|
|
|
234
|
|
|
if m == 'NAST_pyro': |
|
235
|
|
|
lock.acquire() |
|
|
|
|
|
|
236
|
|
|
|
|
237
|
|
|
if DEBUG_MODE: |
|
238
|
|
|
result = wrapper.run(*arguments) |
|
239
|
|
|
if verbose: print(m, result) # ClashScore 12.256669 |
|
240
|
|
|
all_results[mfull] = result |
|
241
|
|
|
if cleanup: wrapper.cleanup() |
|
242
|
|
|
else: |
|
243
|
|
|
try: |
|
244
|
|
|
result = wrapper.run(*arguments) |
|
245
|
|
|
all_results[mfull] = result |
|
246
|
|
|
if cleanup: wrapper.cleanup() |
|
247
|
|
|
except: |
|
248
|
|
|
all_results[mfull] = 'error' |
|
249
|
|
|
if cleanup: wrapper.cleanup() |
|
250
|
|
|
|
|
251
|
|
|
# {'ClashScore': 12.256669} |
|
252
|
|
|
# {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581} |
|
253
|
|
|
# {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581, 'FARNA': '-20.008,-2.739,-13.175,-77.67,-10.652,-158.51,9.547,8.39,-16.246,-263.281,0.0,0.0,17.782,0.0'} |
|
254
|
|
|
#if verbose: print 'all_results:', all_results # this every each method showed |
|
255
|
|
|
|
|
256
|
|
|
if m == 'NAST_pyro': |
|
257
|
|
|
lock.release() |
|
258
|
|
|
|
|
259
|
|
|
# get rmsd |
|
260
|
|
|
if opt.native_pdb_filename: |
|
|
|
|
|
|
261
|
|
|
rmsd = rmsd_calc.get_rmsd(opt.native_pdb_filename, |
|
|
|
|
|
|
262
|
|
|
filename) |
|
263
|
|
|
all_results['rmsd'] = rmsd |
|
264
|
|
|
methods = methods + ['rmsd'] |
|
265
|
|
|
else: |
|
266
|
|
|
methods = methods |
|
267
|
|
|
|
|
268
|
|
|
# length |
|
269
|
|
|
length = len(ref_seq) |
|
|
|
|
|
|
270
|
|
|
all_results['length'] = length |
|
271
|
|
|
|
|
272
|
|
|
if opt.mqapscore: |
|
273
|
|
|
# meta-score |
|
274
|
|
|
ms = MqapScore(all_results) |
|
275
|
|
|
mqap_score = ms.get_score() |
|
276
|
|
|
methods = methods + ['SCORE'] |
|
277
|
|
|
all_results['SCORE'] = mqap_score |
|
278
|
|
|
|
|
279
|
|
|
if True: |
|
280
|
|
|
lock.acquire() |
|
281
|
|
|
|
|
282
|
|
|
global counter_lock |
|
283
|
|
|
#with counter_lock: |
|
284
|
|
|
counter.value += 1 |
|
285
|
|
|
|
|
286
|
|
|
if counter.value != 1: |
|
287
|
|
|
# @todo does not work |
|
288
|
|
|
#sys.stdout.write('\033[F') |
|
289
|
|
|
#sys.stdout.write('\033[F') |
|
290
|
|
|
pass |
|
291
|
|
|
|
|
292
|
|
|
#results = [str(round(all_results[mfull],2)).strip().rjust(9) for m in methods] |
|
293
|
|
|
|
|
294
|
|
|
results_str = str(all_results) # "{'AnalyzeGeometry': 0.0, 'eSCORE': 0.10661, 'FARNA': ['-2.411', '0.0', '0.0', '-9.672', '0.0', '-25.678', '0.0', '1.061', '0.0', '-32.098', '0.0', '0.0', '4.601', '0.0'], 'ClashScore': 36.458333, 'length': 0, 'SimRNA_0': ['0', '67.345305', '-37.428', '-23.073', '0.248', '104.524975', '87.955', '9.938', '5.669', '1.089', '-0.126', '', '67.345305'], 'FARNA_hires': ['0.0', '-13.107', '-0.711', '0.0', '5.22', '2.734', '-30.044', '0.223', '-10.511', '-0.173', '-4.719', '1.143', '0.0', '14.371', '9.358'], 'RNAscore': 8.11007, 'RASP': ['-0.1382', '15', '-0.00921333', '-0.0845115', '0.454033', '-0.118248', '-277.666', '949', '-0.292588', '-273.37', '2.51163', '-1.71042', '-584.451', '2144', '-0.272598', '-564.143', '5.77609', '-3.51588', '-1616.08', '6700', '-0.241206', '0', '0', '0'], 'RNAkb': -1}" |
|
295
|
|
|
|
|
296
|
|
|
results = [all_results[mfull] for m in methods] |
|
|
|
|
|
|
297
|
|
|
|
|
298
|
|
|
# progress bar |
|
299
|
|
|
#sys.stdout.write('\r') |
|
300
|
|
|
#sys.stdout.flush() |
|
301
|
|
|
#sys.stdout.write('\r' + ' ' * 110 + '\r' + filename.split(os.sep)[-1].ljust(50) + ' ' + ' '.join(results)) |
|
302
|
|
|
|
|
303
|
|
|
########### line with resluts ###################### |
|
304
|
|
|
bar.update(counter.value) |
|
|
|
|
|
|
305
|
|
|
## my old progress bar here: |
|
306
|
|
|
# print(sg.pprogress_line(counter.value, filename_length, ''))# , |
|
307
|
|
|
## print results, use --verbose now |
|
308
|
|
|
if verbose: |
|
309
|
|
|
print(filename.split(os.sep)[-1].ljust(20) + ' ' + results_str) |
|
310
|
|
|
|
|
311
|
|
|
## [ ] 1 7.14 % 14 3_solution_1.pdb {'AnalyzeGeometry': 0.0, 'eSCORE': 1.70264, 'FARNA': ['-31.498', '-11.589', '-32.7', '-123.708', '-25.514', '-271.337', '33.563', '2.957', '-36.699', '-471.864', '0.0', '0.0', '24.659', '0.0'], 'ClashScore': 2.201835, 'length': 0, 'SimRNA_0': ['0', '-1016.539381', '-599.475', '-223.162', '-3.935', '-413.129576', '-65.066', '-71.505', '-68.947', '-45.989', '-161.622', '', '-1016.539381'], 'FARNA_hires': ['0.0', '-541.374', '-0.59', '0.0', '1.85', '8.12', '-433.113', '17.811', '-229.203', '3.074', '-140.106', '13.875', '-17.245', '226.762', '7.39'], 'RNAscore': 26.7066, 'RASP': ['-9.3599', '987', '-0.00948318', '8.16333', '3.95157', '-4.4345', '-7976.88', '60547', '-0.131747', '-7274.73', '52.7448', '-13.3123', '-17537.5', '138719', '-0.126424', '-15578.4', '106.602', '-18.3777', '-34270.8', '483436', '-0.07089', '0', '0', '0'], 'RNAkb': -0.019507621989000006} |
|
312
|
|
|
|
|
313
|
|
|
#sys.stdout.flush() |
|
314
|
|
|
#print |
|
315
|
|
|
#sys.stdout.write(sg.pprogress_line(counter.value, filename_length)) |
|
316
|
|
|
#print sg.pprogress_line(counter.value, filename_length) |
|
317
|
|
|
#sys.stdout.flush() |
|
318
|
|
|
|
|
319
|
|
|
## for graphics debugging |
|
320
|
|
|
#import time |
|
321
|
|
|
#time.sleep(1) |
|
322
|
|
|
|
|
323
|
|
|
#format_line([filename.split(os.sep)[-1] + [all_results[m] for m in methods]]) # @todo Nice print with ShellGraphics |
|
324
|
|
|
|
|
325
|
|
|
cells = [counter.value, filename.split(os.sep)[-1]] # add id |
|
326
|
|
|
for m in methods: |
|
327
|
|
|
if type(all_results[m]) == list: |
|
328
|
|
|
cells.extend(all_results[m]) |
|
329
|
|
|
else: |
|
330
|
|
|
cells.append(all_results[m]) |
|
331
|
|
|
csv_writer.writerow(cells) |
|
|
|
|
|
|
332
|
|
|
|
|
333
|
|
|
#print 'mqaprna::filename: %i %s' % (counter.value, filename) |
|
334
|
|
|
csv_file.flush() |
|
|
|
|
|
|
335
|
|
|
lock.release() |
|
336
|
|
|
|
|
337
|
|
|
# hack |
|
338
|
|
|
try: |
|
339
|
|
|
methods.remove('SCORE') |
|
340
|
|
|
except ValueError: |
|
341
|
|
|
pass |
|
342
|
|
|
|
|
343
|
|
|
try: |
|
344
|
|
|
methods.remove('rmsd') |
|
345
|
|
|
except ValueError: |
|
346
|
|
|
pass |
|
347
|
|
|
|
|
348
|
|
|
|
|
349
|
|
View Code Duplication |
def option_parser(): |
|
|
|
|
|
|
350
|
|
|
"""Get options or show usage msg. |
|
351
|
|
|
""" |
|
352
|
|
|
description = '' |
|
353
|
|
|
version = __version__ |
|
354
|
|
|
usage = '\t%prog [-m <number_processes>] [-n <native_pdb_filename>] [-s <seq_ss_filename>] [-g <ignore_pdb_filename>] \ \n\t -o <output csv> <dir/*> # [!] no .csv! the file will get version of mqaprna \n\t' + __version__ |
|
355
|
|
|
parser = OptionParser(description=__doc__, |
|
356
|
|
|
version=version, |
|
357
|
|
|
usage=usage) |
|
358
|
|
|
|
|
359
|
|
|
parser.add_option("-q", "--mQapscore", |
|
360
|
|
|
action="store_true", default=False, dest="mqapscore", help="calculate mqapscore") |
|
361
|
|
|
|
|
362
|
|
|
parser.add_option("-v", "--verbose", |
|
363
|
|
|
action="store_true", default=False, dest="verbose", help="verbose") |
|
364
|
|
|
|
|
365
|
|
|
parser.add_option("--force", |
|
366
|
|
|
action="store_true", default=False) |
|
367
|
|
|
|
|
368
|
|
|
parser.add_option("-f", "--no-filename-version", |
|
369
|
|
|
action="store_true", default=False, dest="no_filename_version", help="don't add version of tool to csv filename") |
|
370
|
|
|
|
|
371
|
|
|
|
|
372
|
|
|
parser.add_option("-n", "--native_pdb_filename", |
|
373
|
|
|
action="store", type="string", dest="native_pdb_filename", help="native structure in PDB format to calculate RMSD") |
|
374
|
|
|
|
|
375
|
|
|
parser.add_option("-m", "--multiprocessing", |
|
376
|
|
|
action="store", type="int", dest="number_processes", default=8, |
|
377
|
|
|
help="set a number of processes, default=8, 0 is no multiprocessing") |
|
378
|
|
|
|
|
379
|
|
|
group2 = OptionGroup(parser, "Ignore pdbs, don't have empty lines here! Example", |
|
380
|
|
|
"""1xjrA_output3-000142_AA.pdb |
|
381
|
|
|
1xjrA_output3-000208_AA.pdb |
|
382
|
|
|
1xjrA_output3-000166_AA.pdb""") |
|
383
|
|
|
|
|
384
|
|
|
group2.add_option("-g", "--ignore-pdbs", |
|
385
|
|
|
action="store", type="string", dest="ignore_pdb_filename") |
|
386
|
|
|
|
|
387
|
|
|
group = OptionGroup(parser, "Seq-SS. Example", |
|
388
|
|
|
""">1xjrA |
|
389
|
|
|
GAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
|
390
|
|
|
.(((((((...((((.((((.....))..))..))).).)))))))""") |
|
391
|
|
|
|
|
392
|
|
|
group.add_option("-t", "--methods", |
|
393
|
|
|
action="store", type="string", dest="methods", help=', '.join(['RASP', 'SimRNA', 'AnalyzeGeometry','FARNA', 'QRNA', 'NAST_pyro', |
|
394
|
|
|
'radius_of_gyration', 'SSAgreement', 'ClashScore', 'RNAkb', |
|
395
|
|
|
'RNAkb_all', 'FARNA_hires', 'FARNA', 'FARFAR2', |
|
396
|
|
|
'FARFAR2_hires', 'Dfire', 'RNA3DCNN', 'eSCORE'])) |
|
397
|
|
|
|
|
398
|
|
|
group.add_option("-s", "--seq-ss", |
|
399
|
|
|
action="store", type="string", dest="seq_ss_filename", help="") |
|
400
|
|
|
|
|
401
|
|
|
group.add_option("-o", "--output", |
|
402
|
|
|
action="store", type="string", dest="output", help="output csv file") |
|
403
|
|
|
|
|
404
|
|
|
group.add_option("-l", "--list-of-files", |
|
405
|
|
|
action="store", type="string", dest="list_of_files", help="list of files") |
|
406
|
|
|
|
|
407
|
|
|
|
|
408
|
|
|
parser.add_option_group(group) |
|
409
|
|
|
parser.add_option_group(group2) |
|
410
|
|
|
|
|
411
|
|
|
(opt, arguments) = parser.parse_args() |
|
412
|
|
|
|
|
413
|
|
|
arguments = [f for f in arguments if f.endswith('.pdb')] |
|
414
|
|
|
|
|
415
|
|
|
if len(arguments) == 0: |
|
416
|
|
|
parser.print_help() |
|
417
|
|
|
print('\n Curr methods: ', ','.join(methods), end=' ') |
|
418
|
|
|
sys.exit(1) |
|
419
|
|
|
|
|
420
|
|
|
return arguments, opt |
|
421
|
|
|
|
|
422
|
|
|
|
|
423
|
|
|
class RunAllDirectory(): |
|
424
|
|
|
"""Class for running wrappers for all files in a directory |
|
425
|
|
|
""" |
|
426
|
|
|
def __init__(self): |
|
427
|
|
|
pass |
|
428
|
|
|
|
|
429
|
|
|
def run(self, filenames, csv_path, opt): |
|
430
|
|
|
"""Open csv (with appropriate headers), run methods, print & save csv |
|
431
|
|
|
|
|
432
|
|
|
There are two modes of execution: |
|
433
|
|
|
* multiprocessing |
|
434
|
|
|
* single |
|
435
|
|
|
|
|
436
|
|
|
.. warning:: Works on global variables: ref_seq, ref_ss, methods, lock, c |
|
437
|
|
|
""" |
|
438
|
|
|
global ref_seq, ref_ss, verbose, methods, lock, c |
|
439
|
|
|
|
|
440
|
|
View Code Duplication |
if opt.seq_ss_filename: |
|
|
|
|
|
|
441
|
|
|
pdb_id, ref_seq, ref_ss = [x.strip() for x in open(opt.seq_ss_filename).read().strip().split('\n')] |
|
442
|
|
|
#sg.phr_text('FASTA SEQ/SS') |
|
443
|
|
|
sg.poptions({'AnalyzeGeometry': True, 'SSAgreement' : True}) |
|
444
|
|
|
sg.poption('pdb_id', pdb_id) |
|
445
|
|
|
sg.poption('ref_seq', ref_seq) |
|
446
|
|
|
sg.poption('ref_ss', ref_ss) |
|
447
|
|
|
else: |
|
448
|
|
|
pdb_id, ref_seq, ref_ss = ['', '', ''] |
|
449
|
|
|
sg.poptions({'SSAgreement' : True}) |
|
450
|
|
|
# hack |
|
451
|
|
|
try: # if it's not on the list |
|
452
|
|
|
methods.remove('SSAgreement') |
|
453
|
|
|
except ValueError: |
|
454
|
|
|
pass |
|
455
|
|
|
|
|
456
|
|
|
verbose = opt.verbose |
|
457
|
|
|
|
|
458
|
|
|
global csv_file, csv_writer # hack |
|
459
|
|
|
# csv open & add header |
|
460
|
|
|
csv_file = open(csv_path, 'a') |
|
461
|
|
|
csv_writer = csv.writer(csv_file, delimiter=',') |
|
462
|
|
|
# make header |
|
463
|
|
|
headers = ['id', 'fn'] |
|
464
|
|
|
for m in methods: |
|
465
|
|
|
headers += attributes[m] |
|
466
|
|
|
|
|
467
|
|
|
if opt.native_pdb_filename: |
|
468
|
|
|
headers += ['RMSDALL'] |
|
469
|
|
|
if opt.mqapscore: |
|
470
|
|
|
headers += ['SCORE'] |
|
471
|
|
|
csv_writer.writerow(headers) |
|
472
|
|
|
csv_file.flush() |
|
473
|
|
|
|
|
474
|
|
|
# remove ~ and remove .out |
|
475
|
|
|
for f in copy.copy(filenames): |
|
476
|
|
|
if f.endswith('~'): |
|
477
|
|
|
filenames.remove(f) |
|
478
|
|
|
if f.endswith('.out'): |
|
479
|
|
|
filenames.remove(f) |
|
480
|
|
|
if f.find('._')>-1: |
|
481
|
|
|
filenames.remove(f) |
|
482
|
|
|
|
|
483
|
|
|
files_to_ignore = [] |
|
484
|
|
|
# or if not provided |
|
485
|
|
|
import glob |
|
486
|
|
|
|
|
487
|
|
|
if not opt.force: |
|
488
|
|
|
opt.ignore_pdb_filename = glob.glob('*' + opt.methods + '*.csv') |
|
489
|
|
|
for f in opt.ignore_pdb_filename: # do it for the list, that's nice! |
|
490
|
|
|
fn = open(f) |
|
491
|
|
|
for f in fn.read().strip().split('\n'): |
|
492
|
|
|
if 'error' in f: |
|
493
|
|
|
continue # don't add files with errors, so the program will be re-run for them |
|
494
|
|
|
# if there is an error, this will give error again quickly |
|
495
|
|
|
# but this solves when you kill the job, you get erros, but it's not rally errors |
|
496
|
|
|
# but stopped jobs |
|
497
|
|
|
if f.find('\t') > -1: |
|
498
|
|
|
f = f.split('\t')[1] # id, fn |
|
499
|
|
|
if f.find(',') > -1: |
|
500
|
|
|
f = f.split(',')[1] # id, fn |
|
501
|
|
|
files_to_ignore.append(os.path.basename(f)) |
|
502
|
|
|
|
|
503
|
|
|
## files to ignore |
|
504
|
|
|
print(' to ignore', len(files_to_ignore), files_to_ignore[:4]) |
|
505
|
|
|
|
|
506
|
|
|
filenames = [] |
|
507
|
|
|
for i, f in enumerate(input_files): |
|
|
|
|
|
|
508
|
|
|
# print(i, f) |
|
509
|
|
|
if f.startswith('_'): # skip |
|
510
|
|
|
continue |
|
511
|
|
|
if os.path.basename(f) not in files_to_ignore: |
|
512
|
|
|
filenames.append(f) |
|
513
|
|
|
## for fi in files_to_ignore: |
|
514
|
|
|
## for fn in copy.copy(filenames): |
|
515
|
|
|
## if os.path.basename(fn).startswith('._'): |
|
516
|
|
|
## filenames.remove(fn) |
|
517
|
|
|
## if os.path.basename(fn).startswith(fi.split('\t')[0]): # # hack, @todo <- re could be used here! to ignore ['fn,RASP,SimRNA,FARNA,NAST_pyro\r', '1ykv_1_ba_c.pdb,-0.104705,-504.468933,-306.245,122.7\r', '2esj_1_ba_c.pdb,-0.1522,-1,-266.217,46.7\r', '2quw_1_ba_c.pdb,-0.103789,-729.386726,-419.047,984.0\r |
|
518
|
|
|
## filenames.remove(fn) |
|
519
|
|
|
print(' files to analyze: %s' % len(filenames), filenames[:300]) |
|
520
|
|
|
## headers |
|
521
|
|
|
methods_to_print = copy.copy(methods) |
|
522
|
|
|
if opt.native_pdb_filename: |
|
523
|
|
|
methods_to_print += ['RMSDALL'] |
|
524
|
|
|
if opt.mqapscore: |
|
525
|
|
|
methods_to_print += ['SCORE'] |
|
526
|
|
|
|
|
527
|
|
|
## if verbose: print ''.ljust(80), ''.join([m[:9].ljust(10) for m in methods_to_print]) ## print headers |
|
528
|
|
|
|
|
529
|
|
|
sg.phr() |
|
530
|
|
|
|
|
531
|
|
|
lock = Lock() |
|
532
|
|
|
|
|
533
|
|
|
counter.value = len(files_to_ignore) |
|
534
|
|
|
|
|
535
|
|
|
flist = [] |
|
536
|
|
|
c = 1 |
|
537
|
|
|
# two running modes |
|
538
|
|
|
global filename_length |
|
539
|
|
|
filenames_length = len(filenames) + len(files_to_ignore) |
|
540
|
|
|
|
|
541
|
|
|
global bar |
|
542
|
|
|
bar = progressbar.ProgressBar(max_value=filenames_length) |
|
543
|
|
|
bar.update(len(files_to_ignore)) |
|
544
|
|
|
|
|
545
|
|
|
fl = [] |
|
546
|
|
|
for f in filenames: |
|
547
|
|
|
fl.append([f,filenames_length]) |
|
548
|
|
|
|
|
549
|
|
|
if opt.number_processes: |
|
550
|
|
|
p = Pool(opt.number_processes) |
|
551
|
|
|
p.map(single_run, fl) |
|
552
|
|
|
else: |
|
553
|
|
|
for filename,x in fl: |
|
554
|
|
|
single_run((filename,x)) |
|
555
|
|
|
|
|
556
|
|
|
#main |
|
557
|
|
|
if __name__ == '__main__': |
|
558
|
|
|
from icecream import ic |
|
559
|
|
|
import sys |
|
560
|
|
|
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr)) |
|
561
|
|
|
ic.configureOutput(prefix='> ') |
|
562
|
|
|
|
|
563
|
|
|
|
|
564
|
|
|
t = timex.Timex() |
|
565
|
|
|
t.start() |
|
566
|
|
|
|
|
567
|
|
|
arguments, opt = option_parser() |
|
568
|
|
|
|
|
569
|
|
|
# files |
|
570
|
|
|
input_files = arguments[:] |
|
571
|
|
|
if opt.list_of_files: |
|
572
|
|
|
for l in open(opt.list_of_files): |
|
573
|
|
|
input_files.append(l.strip()) |
|
574
|
|
|
#ic(input_files) |
|
575
|
|
|
|
|
576
|
|
|
if not opt.methods: |
|
577
|
|
|
opt.methods = ','.join(Config.METHOD_LIST) |
|
578
|
|
|
|
|
579
|
|
|
if opt.no_filename_version: |
|
580
|
|
|
output_csv = opt.output |
|
581
|
|
|
else: |
|
582
|
|
|
import platform |
|
583
|
|
|
platform = platform.node() |
|
584
|
|
|
if opt.output: |
|
585
|
|
|
output_csv = opt.output.replace('.csv','') + '-' + __version__ + '-' + platform + '.csv' |
|
586
|
|
|
else: |
|
587
|
|
|
output_csv = opt.methods + '-' + __version__ + '-' + platform + '.csv' |
|
588
|
|
|
|
|
589
|
|
|
sg.pbanner_simply(os.path.basename(sys.argv[0])) |
|
590
|
|
|
|
|
591
|
|
|
try: |
|
592
|
|
|
rnakb_option = Config.WRAPPER_OPTIONS['RNAkb'][0] |
|
593
|
|
|
except KeyError: |
|
594
|
|
|
rnakb_option = None |
|
595
|
|
|
try: |
|
596
|
|
|
rasp_option = Config.WRAPPER_OPTIONS['RASP'][0] |
|
597
|
|
|
except KeyError: |
|
598
|
|
|
rasp_option = None |
|
599
|
|
|
|
|
600
|
|
|
if opt.methods: |
|
601
|
|
|
methods = [x.strip() for x in opt.methods.split(',')] |
|
602
|
|
|
|
|
603
|
|
|
print('ver:', __version__ + '\n') |
|
604
|
|
|
print('start ', time.strftime("%Y-%m-%d %H:%M:%S")) |
|
605
|
|
|
|
|
606
|
|
|
opts = { |
|
607
|
|
|
'Input files': '#' + str(len(input_files)) + ' ' + str(input_files[:3]), |
|
608
|
|
|
'Multiprocessing': bool(opt.number_processes), |
|
609
|
|
|
'Output csv': output_csv, |
|
610
|
|
|
'Seq ss fn': opt.seq_ss_filename, |
|
611
|
|
|
'Ignore pdb fn': opt.ignore_pdb_filename, |
|
612
|
|
|
'Native pdb': opt.native_pdb_filename, |
|
613
|
|
|
'RNAkb' : rnakb_option, |
|
614
|
|
|
'RASP' : rasp_option, |
|
615
|
|
|
# 'rmsd' : rmsd_calc.RMSD_DEFAULT_METHOD, |
|
616
|
|
|
'Model path' : Config.ML_MODEL_PATH, |
|
617
|
|
|
'Methods' : ','.join(methods), |
|
618
|
|
|
'Verbose' : opt.verbose, |
|
619
|
|
|
} |
|
620
|
|
|
sg.poptions(opts) |
|
621
|
|
|
|
|
622
|
|
|
import platform |
|
623
|
|
|
print('python:', platform.python_version()) |
|
624
|
|
|
|
|
625
|
|
|
runner = RunAllDirectory() |
|
626
|
|
|
runner.run(input_files, output_csv, opt) |
|
627
|
|
|
# meta-scoring |
|
628
|
|
|
#output_csv = "test_data/1xjr_m500_m1.csv" |
|
629
|
|
|
#mqs.do_scoring(output_csv) |
|
630
|
|
|
|
|
631
|
|
|
log = t.end('process: %i' % opt.number_processes) |
|
632
|
|
|
print('\n', log) |
|
633
|
|
|
print('Output: %s \n' % output_csv) |
|
634
|
|
|
## log |
|
635
|
|
|
log_fn = output_csv.replace('.csv', '.log') |
|
636
|
|
|
f = open(log_fn, 'w') |
|
637
|
|
|
f.write(log + '\n') |
|
638
|
|
|
f.write(str(opts) + '\n') |
|
639
|
|
|
f.write('Output: %s\n' % output_csv) |
|
640
|
|
|
f.close() |
|
641
|
|
|
print('logging: %s' % log_fn) |
|
642
|
|
|
print('logging wrappers %s' % Config.LOG_DIRECTORY + os.sep) |
|
643
|
|
|
|