| Total Complexity | 55 |
| Total Lines | 643 |
| Duplicated Lines | 40.12 % |
| Changes | 0 | ||
Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like build.rna_tools.tools.mq.rna_mq_collect often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
| 1 | #!/usr/bin/env python |
||
| 2 | # -*- coding: utf-8 -*- |
||
| 3 | """mqaprna.py - a script for running all wrapers on each PDB file in a specified directory |
||
| 4 | saves results to a CSV file. |
||
| 5 | |||
| 6 | ss_agreement is ... |
||
| 7 | |||
| 8 | ClashScore,AnalyzeGeometry,SimRNA_0,RNAscore,eSCORE,RNAkb,RASP,RNA3DCNN,Dfire,FARNA,FARFAR2,FARNA_hires,FARFAR2_hires |
||
| 9 | |||
| 10 | The code is full of # hack and tricks. |
||
| 11 | |||
| 12 | .. warning:: Uses global variables |
||
| 13 | |||
| 14 | Install: |
||
| 15 | |||
| 16 | csvsort |
||
| 17 | |||
| 18 | Cmd:: |
||
| 19 | |||
| 20 | # find . -iname 'FARFAR2*.csv' -exec cat {} + > FARFAR2_hires.csv |
||
| 21 | $ rna_mq_collect.py -t FARFAR2_hires -m 4 -f -o FARFAR2_hires.csv -l all.txt x.pdb |
||
| 22 | # fake x.pdb when -l is used, -l gets a list of files |
||
| 23 | x.pdb |
||
| 24 | y.pdb |
||
| 25 | z.pdb |
||
| 26 | |||
| 27 | 88% (49329 of 55689) |############### | Elapsed Time: 0:45:23 ETA: 2 days, 18:42:16 |
||
| 28 | |||
| 29 | For this python progressbar works:: |
||
| 30 | |||
| 31 | Python 3.7.10 (default, Feb 26 2021, 18:47:35) |
||
| 32 | [GCC 7.3.0] :: Anaconda, Inc. on linux |
||
| 33 | |||
| 34 | """ |
||
| 35 | MP_VERBOSE = False |
||
| 36 | DEBUG_MODE = False |
||
| 37 | |||
| 38 | ################################################################################ |
||
| 39 | import sys |
||
| 40 | #sys.path.insert(0, "/Users/magnus/work/src/rna-tools/rna_tools/tools/mq/") # ugly! |
||
| 41 | import progressbar |
||
| 42 | # import mqaprna_score as mqs |
||
| 43 | import time |
||
| 44 | import os |
||
| 45 | import copy |
||
| 46 | from csvsort import csvsort |
||
| 47 | |||
| 48 | import rna_tools.tools.mq.lib.shellgraphics.shellgraphics as sg |
||
| 49 | sg.color_mode = False |
||
| 50 | from rna_tools.tools.mq.lib.timex import timex |
||
| 51 | #import rna_tools.tools.mq.mqaprna_config as Config |
||
| 52 | import rna_tools.rna_tools_config as Config |
||
| 53 | ################################################################################ |
||
| 54 | |||
| 55 | import rna_tools |
||
| 56 | __version__ = rna_tools.__version__ |
||
| 57 | |||
| 58 | import os |
||
| 59 | import sys |
||
| 60 | DIRNAME = os.path.dirname(__file__) |
||
| 61 | PARENT_DIRNAME = os.path.abspath(os.path.join(DIRNAME, os.path.pardir)) |
||
| 62 | sys.path.append(DIRNAME) |
||
| 63 | sys.path.append(PARENT_DIRNAME) |
||
| 64 | import csv |
||
| 65 | import imp |
||
| 66 | |||
| 67 | from optparse import OptionParser, OptionGroup |
||
| 68 | from ctypes import c_int |
||
| 69 | |||
| 70 | #import lib.rmsd_calc.rmsd_calc as rmsd_calc |
||
| 71 | from multiprocessing import Pool, Lock, Value |
||
| 72 | |||
| 73 | try: |
||
| 74 | from wrappers.mqap_score.mqap_score import MqapScore |
||
| 75 | except ImportError: |
||
| 76 | pass |
||
| 77 | |||
| 78 | # super-verbose logging |
||
| 79 | if MP_VERBOSE: |
||
| 80 | import multiprocessing |
||
| 81 | logger = multiprocessing.log_to_stderr() |
||
| 82 | logger.setLevel(multiprocessing.SUBDEBUG) |
||
| 83 | |||
| 84 | # create wrappers for all the methods |
||
| 85 | MODULES = {} |
||
| 86 | for m in Config.METHOD_LIST: |
||
| 87 | if m.find('_') > -1: |
||
| 88 | m,n = m.split('_') |
||
| 89 | wrapper_path = os.path.join(Config.WRAPPERS_PATH, m, m + '.py') |
||
| 90 | module = imp.load_source(m, wrapper_path) |
||
| 91 | MODULES[m] = module |
||
| 92 | |||
| 93 | # global variable |
||
| 94 | c = 0 |
||
| 95 | methods = Config.METHOD_LIST |
||
| 96 | cleanup = True |
||
| 97 | |||
| 98 | counter = Value(c_int) |
||
| 99 | counter_lock = Lock() |
||
| 100 | |||
| 101 | # ['farna_rna_base_axis', 'farna_rna_backbone_backbone', 'farna_rna_base_stack_axis', 'farna_rna_base_stagger', 'farna_rna_base_stack', 'farna_rna_base_pair', 'farna_rna_repulsive', 'farna_rna_vdw', 'farna_rna_base_backbone', 'farna_score_lowres', 'farna_rna_data_backbone', 'farna_linear_chainbreak', 'farna_rna_rg', 'farna_atom_pair_constraint'], |
||
| 102 | |||
| 103 | steps = '0' # |
||
| 104 | attributes = { |
||
| 105 | 'QRNA' : [ 'qrna_' + steps + '_electro', 'qrna_' + steps ], |
||
| 106 | #'RASP' : [ 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'] |
||
| 107 | 'RASP' : ['rasp_c3_pdb_energy', 'rasp_c3_no_contacts', 'rasp_c3_norm_energy', 'rasp_c3_mean_energy', 'rasp_c3_sd_energy', 'rasp_c3_zscore', 'rasp_bb_pdb_energy', 'rasp_bb_no_contacts', 'rasp_bb_norm_energy', 'rasp_bb_mean_energy', 'rasp_bb_sd_energy', 'rasp_bb_zscore', 'rasp_bbr_pdb_energy', 'rasp_bbr_no_contacts', 'rasp_bbr_norm_energy', 'rasp_bbr_mean_energy', 'rasp_bbr_sd_energy', 'rasp_bbr_zscore', 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'], |
||
| 108 | |||
| 109 | 'SimRNA_0' : ['simrna_steps', 'simrna_total_energy', 'simrna_base_base', 'simrna_short_stacking', 'simrna_base_backbone', 'simrna_local_geometry', 'simrna_bonds_dist_cp', 'simrna_bonds_dist_pc', 'simrna_flat_angles_cpc', 'simrna_flat_angles_pcp', 'simrna_tors_eta_theta', 'simrna_sphere_penalty', 'simrna_chain_energy'], |
||
| 110 | 'RNAkb' : ['rnakb_bond', 'rnakb_angle', 'rnakb_proper_dih', 'rnakb_improper_dih', 'rnakb_lj14', 'rnakb_coulomb14', 'rnakb_lj_sr', 'rnakb_coulomb_sr', |
||
| 111 | 'rnakb_potential', 'rnakb_kinetic_en', 'rnakb_total_energy'], |
||
| 112 | 'RNAkb_all' : ['rnakb_bond_all', 'rnakb_angle_all', 'rnakb_proper_dih_all', 'rnakb_improper_dih_all', 'rnakb_lj14_all', 'rnakb_coulomb14_all', 'rnakb_lj_sr_all', 'rnakb_coulomb_sr_all', |
||
| 113 | 'rnakb_potential_all', 'rnakb_kinetic_en_all', 'rnakb_total_energy_all'], |
||
| 114 | |||
| 115 | 'RNAscore' : ['x3rnascore'], |
||
| 116 | 'AnalyzeGeometry' : ['analyze_geometry'], |
||
| 117 | 'SSAgreement' : ['ss_disagreement'], |
||
| 118 | 'ClashScore' : ['clash_score'], |
||
| 119 | 'Ernwin_1' : [ 'ernwin_1' ], |
||
| 120 | 'Ernwin_1k' : [ 'ernwin_1k' ], |
||
| 121 | 'eSCORE' : ['escore'], |
||
| 122 | 'RNA3DCNN' : ['rna3dcnn'], |
||
| 123 | 'Dfire' : ['dfire'], |
||
| 124 | |||
| 125 | 'FARNA': ['farna_score_lowres', |
||
| 126 | 'farna_rna_data_backbone', |
||
| 127 | 'farna_rna_vdw', |
||
| 128 | 'farna_rna_base_backbone', |
||
| 129 | 'farna_rna_backbone_backbone', |
||
| 130 | 'farna_rna_repulsive', |
||
| 131 | 'farna_rna_base_pair', |
||
| 132 | 'farna_rna_base_axis', |
||
| 133 | 'farna_rna_base_stagger', |
||
| 134 | 'farna_rna_base_stack', |
||
| 135 | 'farna_rna_base_stack_axis', |
||
| 136 | 'farna_rna_rg', |
||
| 137 | 'farna_atom_pair_constraint', |
||
| 138 | 'farna_linear_chainbreak'], |
||
| 139 | |||
| 140 | 'FARNA_hires' : ['farna_score_hires', |
||
| 141 | 'farna_fa_atr', |
||
| 142 | 'farna_fa_rep', |
||
| 143 | 'farna_fa_intra_rep', |
||
| 144 | 'farna_lk_nonpolar', |
||
| 145 | 'farna_fa_elec_rna_phos_phos', |
||
| 146 | 'farna_ch_bond', |
||
| 147 | 'farna_rna_torsion', |
||
| 148 | 'farna_rna_sugar_close', |
||
| 149 | 'farna_hbond_sr_bb_sc', |
||
| 150 | 'farna_hbond_lr_bb_sc', |
||
| 151 | 'farna_hbond_sc', |
||
| 152 | 'farna_geom_sol', |
||
| 153 | 'farna_atom_pair_constraint_hires', |
||
| 154 | 'farna_linear_chainbreak_hires'], |
||
| 155 | |||
| 156 | 'FARFAR2' : ['farna_score_lowres', |
||
| 157 | 'farna_rna_vdw', |
||
| 158 | 'farna_rna_base_backbone', |
||
| 159 | 'farna_rna_backbone_backbone', |
||
| 160 | 'farna_rna_repulsive', |
||
| 161 | 'farna_rna_base_pair', |
||
| 162 | 'farna_rna_base_axis', |
||
| 163 | 'farna_rna_base_stagger', |
||
| 164 | 'farna_rna_base_stack', |
||
| 165 | 'farna_rna_base_stack_axis', |
||
| 166 | 'farna_rna_rg', |
||
| 167 | 'farna_atom_pair_constraint', |
||
| 168 | 'farna_linear_chainbreak'], |
||
| 169 | |||
| 170 | 'FARFAR2_hires': 'ff2_score_hires,ff2_fa_atr,ff2_fa_rep,ff2_fa_intra_rep,ff2_lk_nonpolar,ff2_fa_elec_rna_phos_phos,ff2_rna_torsion,ff2_suiteness_bonus,ff2_rna_sugar_close,ff2_fa_stack,ff2_stack_elec,ff2_geom_sol_fast,ff2_bond_sr_bb_sc,ff2_hbond_lr_bb_sc,ff2_hbond_sc,ff2_ref,ff2_free_suite,ff2_free_2HOprime,ff2_intermol,ff2_other_pose,ff2_loop_close,ff2_linear_chainbreak_hires'.split(','), |
||
| 171 | |||
| 172 | #'SimRNA_0' : ['', 'simrna', '', '', '', '', '', '', '', '', '', '', ''], |
||
| 173 | 'rmsd_all': ['rmsd_all'], |
||
| 174 | } |
||
| 175 | |||
| 176 | View Code Duplication | def single_run(filename): |
|
|
|
|||
| 177 | """Start a mqaprna run for a given file |
||
| 178 | with all methods (according to config file). |
||
| 179 | |||
| 180 | [!] Use global cleanup = False to block cleaning up |
||
| 181 | |||
| 182 | .. warning:: The function uses global variable. |
||
| 183 | """ |
||
| 184 | filename, filename_length = filename |
||
| 185 | #print 'fn: ', filename |
||
| 186 | |||
| 187 | global methods, c |
||
| 188 | all_results = {} |
||
| 189 | |||
| 190 | for m in methods: |
||
| 191 | arguments = '' |
||
| 192 | #if DEBUG_MODE: print 'method', m, arguments |
||
| 193 | mfull = m |
||
| 194 | if verbose: print(m + '...') # show method 'eSCORE...' |
||
| 195 | |||
| 196 | if m == 'FARNA': |
||
| 197 | mfull = m |
||
| 198 | arguments = [filename] + [False] |
||
| 199 | |||
| 200 | if m == 'FARNA_hires': |
||
| 201 | m = 'FARNA' |
||
| 202 | mfull = 'FARNA_hires' |
||
| 203 | arguments = [filename] + [True] |
||
| 204 | |||
| 205 | if m == 'FARFAR2': |
||
| 206 | m = 'FARFAR2' |
||
| 207 | mfull = 'FARFAR2' |
||
| 208 | arguments = [filename] + [False] |
||
| 209 | |||
| 210 | if m == 'FARFAR2_hires': |
||
| 211 | m = 'FARFAR2' |
||
| 212 | mfull = 'FARFAR2_hires' |
||
| 213 | arguments = [filename] + [True] |
||
| 214 | |||
| 215 | if m == 'RNAkb_all': |
||
| 216 | m = 'RNAkb' |
||
| 217 | mfull = 'RNAkb_all' |
||
| 218 | arguments = [filename] + ['aa'] |
||
| 219 | |||
| 220 | if m.find('_') > -1: |
||
| 221 | m, n = m.split('_') |
||
| 222 | n = n.replace('n', '') # n_XXX |
||
| 223 | n = n.replace('k', '000') |
||
| 224 | n = n.replace('m', '000000') |
||
| 225 | arguments = [filename] + [n] |
||
| 226 | |||
| 227 | if not arguments: |
||
| 228 | arguments = [filename] + Config.WRAPPER_OPTIONS[m] |
||
| 229 | |||
| 230 | if m == 'escore': |
||
| 231 | m = 'eSCORE' |
||
| 232 | wrapper = getattr(MODULES[m], m)()#verbose) # ref_seq, ref_ss, verbose) # for all wrappers but SSAgrement '','' is OK |
||
| 233 | |||
| 234 | if m == 'NAST_pyro': |
||
| 235 | lock.acquire() |
||
| 236 | |||
| 237 | if DEBUG_MODE: |
||
| 238 | result = wrapper.run(*arguments) |
||
| 239 | if verbose: print(m, result) # ClashScore 12.256669 |
||
| 240 | all_results[mfull] = result |
||
| 241 | if cleanup: wrapper.cleanup() |
||
| 242 | else: |
||
| 243 | try: |
||
| 244 | result = wrapper.run(*arguments) |
||
| 245 | all_results[mfull] = result |
||
| 246 | if cleanup: wrapper.cleanup() |
||
| 247 | except: |
||
| 248 | all_results[mfull] = 'error' |
||
| 249 | if cleanup: wrapper.cleanup() |
||
| 250 | |||
| 251 | # {'ClashScore': 12.256669} |
||
| 252 | # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581} |
||
| 253 | # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581, 'FARNA': '-20.008,-2.739,-13.175,-77.67,-10.652,-158.51,9.547,8.39,-16.246,-263.281,0.0,0.0,17.782,0.0'} |
||
| 254 | #if verbose: print 'all_results:', all_results # this every each method showed |
||
| 255 | |||
| 256 | if m == 'NAST_pyro': |
||
| 257 | lock.release() |
||
| 258 | |||
| 259 | # get rmsd |
||
| 260 | if opt.native_pdb_filename: |
||
| 261 | rmsd = rmsd_calc.get_rmsd(opt.native_pdb_filename, |
||
| 262 | filename) |
||
| 263 | all_results['rmsd'] = rmsd |
||
| 264 | methods = methods + ['rmsd'] |
||
| 265 | else: |
||
| 266 | methods = methods |
||
| 267 | |||
| 268 | # length |
||
| 269 | length = len(ref_seq) |
||
| 270 | all_results['length'] = length |
||
| 271 | |||
| 272 | if opt.mqapscore: |
||
| 273 | # meta-score |
||
| 274 | ms = MqapScore(all_results) |
||
| 275 | mqap_score = ms.get_score() |
||
| 276 | methods = methods + ['SCORE'] |
||
| 277 | all_results['SCORE'] = mqap_score |
||
| 278 | |||
| 279 | if True: |
||
| 280 | lock.acquire() |
||
| 281 | |||
| 282 | global counter_lock |
||
| 283 | #with counter_lock: |
||
| 284 | counter.value += 1 |
||
| 285 | |||
| 286 | if counter.value != 1: |
||
| 287 | # @todo does not work |
||
| 288 | #sys.stdout.write('\033[F') |
||
| 289 | #sys.stdout.write('\033[F') |
||
| 290 | pass |
||
| 291 | |||
| 292 | #results = [str(round(all_results[mfull],2)).strip().rjust(9) for m in methods] |
||
| 293 | |||
| 294 | results_str = str(all_results) # "{'AnalyzeGeometry': 0.0, 'eSCORE': 0.10661, 'FARNA': ['-2.411', '0.0', '0.0', '-9.672', '0.0', '-25.678', '0.0', '1.061', '0.0', '-32.098', '0.0', '0.0', '4.601', '0.0'], 'ClashScore': 36.458333, 'length': 0, 'SimRNA_0': ['0', '67.345305', '-37.428', '-23.073', '0.248', '104.524975', '87.955', '9.938', '5.669', '1.089', '-0.126', '', '67.345305'], 'FARNA_hires': ['0.0', '-13.107', '-0.711', '0.0', '5.22', '2.734', '-30.044', '0.223', '-10.511', '-0.173', '-4.719', '1.143', '0.0', '14.371', '9.358'], 'RNAscore': 8.11007, 'RASP': ['-0.1382', '15', '-0.00921333', '-0.0845115', '0.454033', '-0.118248', '-277.666', '949', '-0.292588', '-273.37', '2.51163', '-1.71042', '-584.451', '2144', '-0.272598', '-564.143', '5.77609', '-3.51588', '-1616.08', '6700', '-0.241206', '0', '0', '0'], 'RNAkb': -1}" |
||
| 295 | |||
| 296 | results = [all_results[mfull] for m in methods] |
||
| 297 | |||
| 298 | # progress bar |
||
| 299 | #sys.stdout.write('\r') |
||
| 300 | #sys.stdout.flush() |
||
| 301 | #sys.stdout.write('\r' + ' ' * 110 + '\r' + filename.split(os.sep)[-1].ljust(50) + ' ' + ' '.join(results)) |
||
| 302 | |||
| 303 | ########### line with resluts ###################### |
||
| 304 | bar.update(counter.value) |
||
| 305 | ## my old progress bar here: |
||
| 306 | # print(sg.pprogress_line(counter.value, filename_length, ''))# , |
||
| 307 | ## print results, use --verbose now |
||
| 308 | if verbose: |
||
| 309 | print(filename.split(os.sep)[-1].ljust(20) + ' ' + results_str) |
||
| 310 | |||
| 311 | ## [ ] 1 7.14 % 14 3_solution_1.pdb {'AnalyzeGeometry': 0.0, 'eSCORE': 1.70264, 'FARNA': ['-31.498', '-11.589', '-32.7', '-123.708', '-25.514', '-271.337', '33.563', '2.957', '-36.699', '-471.864', '0.0', '0.0', '24.659', '0.0'], 'ClashScore': 2.201835, 'length': 0, 'SimRNA_0': ['0', '-1016.539381', '-599.475', '-223.162', '-3.935', '-413.129576', '-65.066', '-71.505', '-68.947', '-45.989', '-161.622', '', '-1016.539381'], 'FARNA_hires': ['0.0', '-541.374', '-0.59', '0.0', '1.85', '8.12', '-433.113', '17.811', '-229.203', '3.074', '-140.106', '13.875', '-17.245', '226.762', '7.39'], 'RNAscore': 26.7066, 'RASP': ['-9.3599', '987', '-0.00948318', '8.16333', '3.95157', '-4.4345', '-7976.88', '60547', '-0.131747', '-7274.73', '52.7448', '-13.3123', '-17537.5', '138719', '-0.126424', '-15578.4', '106.602', '-18.3777', '-34270.8', '483436', '-0.07089', '0', '0', '0'], 'RNAkb': -0.019507621989000006} |
||
| 312 | |||
| 313 | #sys.stdout.flush() |
||
| 314 | |||
| 315 | #sys.stdout.write(sg.pprogress_line(counter.value, filename_length)) |
||
| 316 | #print sg.pprogress_line(counter.value, filename_length) |
||
| 317 | #sys.stdout.flush() |
||
| 318 | |||
| 319 | ## for graphics debugging |
||
| 320 | #import time |
||
| 321 | #time.sleep(1) |
||
| 322 | |||
| 323 | #format_line([filename.split(os.sep)[-1] + [all_results[m] for m in methods]]) # @todo Nice print with ShellGraphics |
||
| 324 | |||
| 325 | cells = [counter.value, filename.split(os.sep)[-1]] # add id |
||
| 326 | for m in methods: |
||
| 327 | if type(all_results[m]) == list: |
||
| 328 | cells.extend(all_results[m]) |
||
| 329 | else: |
||
| 330 | cells.append(all_results[m]) |
||
| 331 | csv_writer.writerow(cells) |
||
| 332 | |||
| 333 | #print 'mqaprna::filename: %i %s' % (counter.value, filename) |
||
| 334 | csv_file.flush() |
||
| 335 | lock.release() |
||
| 336 | |||
| 337 | # hack |
||
| 338 | try: |
||
| 339 | methods.remove('SCORE') |
||
| 340 | except ValueError: |
||
| 341 | pass |
||
| 342 | |||
| 343 | try: |
||
| 344 | methods.remove('rmsd') |
||
| 345 | except ValueError: |
||
| 346 | pass |
||
| 347 | |||
| 348 | |||
| 349 | View Code Duplication | def option_parser(): |
|
| 350 | """Get options or show usage msg. |
||
| 351 | """ |
||
| 352 | description = '' |
||
| 353 | version = __version__ |
||
| 354 | usage = '\t%prog [-m <number_processes>] [-n <native_pdb_filename>] [-s <seq_ss_filename>] [-g <ignore_pdb_filename>] \ \n\t -o <output csv> <dir/*> # [!] no .csv! the file will get version of mqaprna \n\t' + __version__ |
||
| 355 | parser = OptionParser(description=__doc__, |
||
| 356 | version=version, |
||
| 357 | usage=usage) |
||
| 358 | |||
| 359 | parser.add_option("-q", "--mQapscore", |
||
| 360 | action="store_true", default=False, dest="mqapscore", help="calculate mqapscore") |
||
| 361 | |||
| 362 | parser.add_option("-v", "--verbose", |
||
| 363 | action="store_true", default=False, dest="verbose", help="verbose") |
||
| 364 | |||
| 365 | parser.add_option("--force", |
||
| 366 | action="store_true", default=False) |
||
| 367 | |||
| 368 | parser.add_option("-f", "--no-filename-version", |
||
| 369 | action="store_true", default=False, dest="no_filename_version", help="don't add version of tool to csv filename") |
||
| 370 | |||
| 371 | |||
| 372 | parser.add_option("-n", "--native_pdb_filename", |
||
| 373 | action="store", type="string", dest="native_pdb_filename", help="native structure in PDB format to calculate RMSD") |
||
| 374 | |||
| 375 | parser.add_option("-m", "--multiprocessing", |
||
| 376 | action="store", type="int", dest="number_processes", default=8, |
||
| 377 | help="set a number of processes, default=8, 0 is no multiprocessing") |
||
| 378 | |||
| 379 | group2 = OptionGroup(parser, "Ignore pdbs, don't have empty lines here! Example", |
||
| 380 | """1xjrA_output3-000142_AA.pdb |
||
| 381 | 1xjrA_output3-000208_AA.pdb |
||
| 382 | 1xjrA_output3-000166_AA.pdb""") |
||
| 383 | |||
| 384 | group2.add_option("-g", "--ignore-pdbs", |
||
| 385 | action="store", type="string", dest="ignore_pdb_filename") |
||
| 386 | |||
| 387 | group = OptionGroup(parser, "Seq-SS. Example", |
||
| 388 | """>1xjrA |
||
| 389 | GAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU |
||
| 390 | .(((((((...((((.((((.....))..))..))).).)))))))""") |
||
| 391 | |||
| 392 | group.add_option("-t", "--methods", |
||
| 393 | action="store", type="string", dest="methods", help=', '.join(['RASP', 'SimRNA', 'AnalyzeGeometry','FARNA', 'QRNA', 'NAST_pyro', |
||
| 394 | 'radius_of_gyration', 'SSAgreement', 'ClashScore', 'RNAkb', |
||
| 395 | 'RNAkb_all', 'FARNA_hires', 'FARNA', 'FARFAR2', |
||
| 396 | 'FARFAR2_hires', 'Dfire', 'RNA3DCNN', 'eSCORE'])) |
||
| 397 | |||
| 398 | group.add_option("-s", "--seq-ss", |
||
| 399 | action="store", type="string", dest="seq_ss_filename", help="") |
||
| 400 | |||
| 401 | group.add_option("-o", "--output", |
||
| 402 | action="store", type="string", dest="output", help="output csv file") |
||
| 403 | |||
| 404 | group.add_option("-l", "--list-of-files", |
||
| 405 | action="store", type="string", dest="list_of_files", help="list of files") |
||
| 406 | |||
| 407 | |||
| 408 | parser.add_option_group(group) |
||
| 409 | parser.add_option_group(group2) |
||
| 410 | |||
| 411 | (opt, arguments) = parser.parse_args() |
||
| 412 | |||
| 413 | arguments = [f for f in arguments if f.endswith('.pdb')] |
||
| 414 | |||
| 415 | if len(arguments) == 0: |
||
| 416 | parser.print_help() |
||
| 417 | print('\n Curr methods: ', ','.join(methods), end=' ') |
||
| 418 | sys.exit(1) |
||
| 419 | |||
| 420 | return arguments, opt |
||
| 421 | |||
| 422 | |||
| 423 | class RunAllDirectory(): |
||
| 424 | """Class for running wrappers for all files in a directory |
||
| 425 | """ |
||
| 426 | def __init__(self): |
||
| 427 | pass |
||
| 428 | |||
| 429 | def run(self, filenames, csv_path, opt): |
||
| 430 | """Open csv (with appropriate headers), run methods, print & save csv |
||
| 431 | |||
| 432 | There are two modes of execution: |
||
| 433 | * multiprocessing |
||
| 434 | * single |
||
| 435 | |||
| 436 | .. warning:: Works on global variables: ref_seq, ref_ss, methods, lock, c |
||
| 437 | """ |
||
| 438 | global ref_seq, ref_ss, verbose, methods, lock, c |
||
| 439 | |||
| 440 | View Code Duplication | if opt.seq_ss_filename: |
|
| 441 | pdb_id, ref_seq, ref_ss = [x.strip() for x in open(opt.seq_ss_filename).read().strip().split('\n')] |
||
| 442 | #sg.phr_text('FASTA SEQ/SS') |
||
| 443 | sg.poptions({'AnalyzeGeometry': True, 'SSAgreement' : True}) |
||
| 444 | sg.poption('pdb_id', pdb_id) |
||
| 445 | sg.poption('ref_seq', ref_seq) |
||
| 446 | sg.poption('ref_ss', ref_ss) |
||
| 447 | else: |
||
| 448 | pdb_id, ref_seq, ref_ss = ['', '', ''] |
||
| 449 | sg.poptions({'SSAgreement' : True}) |
||
| 450 | # hack |
||
| 451 | try: # if it's not on the list |
||
| 452 | methods.remove('SSAgreement') |
||
| 453 | except ValueError: |
||
| 454 | pass |
||
| 455 | |||
| 456 | verbose = opt.verbose |
||
| 457 | |||
| 458 | global csv_file, csv_writer # hack |
||
| 459 | # csv open & add header |
||
| 460 | csv_file = open(csv_path, 'a') |
||
| 461 | csv_writer = csv.writer(csv_file, delimiter=',') |
||
| 462 | # make header |
||
| 463 | headers = ['id', 'fn'] |
||
| 464 | for m in methods: |
||
| 465 | headers += attributes[m] |
||
| 466 | |||
| 467 | if opt.native_pdb_filename: |
||
| 468 | headers += ['RMSDALL'] |
||
| 469 | if opt.mqapscore: |
||
| 470 | headers += ['SCORE'] |
||
| 471 | csv_writer.writerow(headers) |
||
| 472 | csv_file.flush() |
||
| 473 | |||
| 474 | # remove ~ and remove .out |
||
| 475 | for f in copy.copy(filenames): |
||
| 476 | if f.endswith('~'): |
||
| 477 | filenames.remove(f) |
||
| 478 | if f.endswith('.out'): |
||
| 479 | filenames.remove(f) |
||
| 480 | if f.find('._')>-1: |
||
| 481 | filenames.remove(f) |
||
| 482 | |||
| 483 | files_to_ignore = [] |
||
| 484 | # or if not provided |
||
| 485 | import glob |
||
| 486 | |||
| 487 | if not opt.force: |
||
| 488 | opt.ignore_pdb_filename = glob.glob('*' + opt.methods + '*.csv') |
||
| 489 | for f in opt.ignore_pdb_filename: # do it for the list, that's nice! |
||
| 490 | fn = open(f) |
||
| 491 | for f in fn.read().strip().split('\n'): |
||
| 492 | if 'error' in f: |
||
| 493 | continue # don't add files with errors, so the program will be re-run for them |
||
| 494 | # if there is an error, this will give error again quickly |
||
| 495 | # but this solves when you kill the job, you get erros, but it's not rally errors |
||
| 496 | # but stopped jobs |
||
| 497 | if f.find('\t') > -1: |
||
| 498 | f = f.split('\t')[1] # id, fn |
||
| 499 | if f.find(',') > -1: |
||
| 500 | f = f.split(',')[1] # id, fn |
||
| 501 | files_to_ignore.append(os.path.basename(f)) |
||
| 502 | |||
| 503 | ## files to ignore |
||
| 504 | print(' to ignore', len(files_to_ignore), files_to_ignore[:4]) |
||
| 505 | |||
| 506 | filenames = [] |
||
| 507 | for i, f in enumerate(input_files): |
||
| 508 | # print(i, f) |
||
| 509 | if f.startswith('_'): # skip |
||
| 510 | continue |
||
| 511 | if os.path.basename(f) not in files_to_ignore: |
||
| 512 | filenames.append(f) |
||
| 513 | ## for fi in files_to_ignore: |
||
| 514 | ## for fn in copy.copy(filenames): |
||
| 515 | ## if os.path.basename(fn).startswith('._'): |
||
| 516 | ## filenames.remove(fn) |
||
| 517 | ## if os.path.basename(fn).startswith(fi.split('\t')[0]): # # hack, @todo <- re could be used here! to ignore ['fn,RASP,SimRNA,FARNA,NAST_pyro\r', '1ykv_1_ba_c.pdb,-0.104705,-504.468933,-306.245,122.7\r', '2esj_1_ba_c.pdb,-0.1522,-1,-266.217,46.7\r', '2quw_1_ba_c.pdb,-0.103789,-729.386726,-419.047,984.0\r |
||
| 518 | ## filenames.remove(fn) |
||
| 519 | print(' files to analyze: %s' % len(filenames), filenames[:300]) |
||
| 520 | ## headers |
||
| 521 | methods_to_print = copy.copy(methods) |
||
| 522 | if opt.native_pdb_filename: |
||
| 523 | methods_to_print += ['RMSDALL'] |
||
| 524 | if opt.mqapscore: |
||
| 525 | methods_to_print += ['SCORE'] |
||
| 526 | |||
| 527 | ## if verbose: print ''.ljust(80), ''.join([m[:9].ljust(10) for m in methods_to_print]) ## print headers |
||
| 528 | |||
| 529 | sg.phr() |
||
| 530 | |||
| 531 | lock = Lock() |
||
| 532 | |||
| 533 | counter.value = len(files_to_ignore) |
||
| 534 | |||
| 535 | flist = [] |
||
| 536 | c = 1 |
||
| 537 | # two running modes |
||
| 538 | global filename_length |
||
| 539 | filenames_length = len(filenames) + len(files_to_ignore) |
||
| 540 | |||
| 541 | global bar |
||
| 542 | bar = progressbar.ProgressBar(max_value=filenames_length) |
||
| 543 | bar.update(len(files_to_ignore)) |
||
| 544 | |||
| 545 | fl = [] |
||
| 546 | for f in filenames: |
||
| 547 | fl.append([f,filenames_length]) |
||
| 548 | |||
| 549 | if opt.number_processes: |
||
| 550 | p = Pool(opt.number_processes) |
||
| 551 | p.map(single_run, fl) |
||
| 552 | else: |
||
| 553 | for filename,x in fl: |
||
| 554 | single_run((filename,x)) |
||
| 555 | |||
| 556 | #main |
||
| 557 | if __name__ == '__main__': |
||
| 558 | from icecream import ic |
||
| 559 | import sys |
||
| 560 | ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr)) |
||
| 561 | ic.configureOutput(prefix='> ') |
||
| 562 | |||
| 563 | |||
| 564 | t = timex.Timex() |
||
| 565 | t.start() |
||
| 566 | |||
| 567 | arguments, opt = option_parser() |
||
| 568 | |||
| 569 | # files |
||
| 570 | input_files = arguments[:] |
||
| 571 | if opt.list_of_files: |
||
| 572 | for l in open(opt.list_of_files): |
||
| 573 | input_files.append(l.strip()) |
||
| 574 | #ic(input_files) |
||
| 575 | |||
| 576 | if not opt.methods: |
||
| 577 | opt.methods = ','.join(Config.METHOD_LIST) |
||
| 578 | |||
| 579 | if opt.no_filename_version: |
||
| 580 | output_csv = opt.output |
||
| 581 | else: |
||
| 582 | import platform |
||
| 583 | platform = platform.node() |
||
| 584 | if opt.output: |
||
| 585 | output_csv = opt.output.replace('.csv','') + '-' + __version__ + '-' + platform + '.csv' |
||
| 586 | else: |
||
| 587 | output_csv = opt.methods + '-' + __version__ + '-' + platform + '.csv' |
||
| 588 | |||
| 589 | sg.pbanner_simply(os.path.basename(sys.argv[0])) |
||
| 590 | |||
| 591 | try: |
||
| 592 | rnakb_option = Config.WRAPPER_OPTIONS['RNAkb'][0] |
||
| 593 | except KeyError: |
||
| 594 | rnakb_option = None |
||
| 595 | try: |
||
| 596 | rasp_option = Config.WRAPPER_OPTIONS['RASP'][0] |
||
| 597 | except KeyError: |
||
| 598 | rasp_option = None |
||
| 599 | |||
| 600 | if opt.methods: |
||
| 601 | methods = [x.strip() for x in opt.methods.split(',')] |
||
| 602 | |||
| 603 | print('ver:', __version__ + '\n') |
||
| 604 | print('start ', time.strftime("%Y-%m-%d %H:%M:%S")) |
||
| 605 | |||
| 606 | opts = { |
||
| 607 | 'Input files': '#' + str(len(input_files)) + ' ' + str(input_files[:3]), |
||
| 608 | 'Multiprocessing': bool(opt.number_processes), |
||
| 609 | 'Output csv': output_csv, |
||
| 610 | 'Seq ss fn': opt.seq_ss_filename, |
||
| 611 | 'Ignore pdb fn': opt.ignore_pdb_filename, |
||
| 612 | 'Native pdb': opt.native_pdb_filename, |
||
| 613 | 'RNAkb' : rnakb_option, |
||
| 614 | 'RASP' : rasp_option, |
||
| 615 | # 'rmsd' : rmsd_calc.RMSD_DEFAULT_METHOD, |
||
| 616 | 'Model path' : Config.ML_MODEL_PATH, |
||
| 617 | 'Methods' : ','.join(methods), |
||
| 618 | 'Verbose' : opt.verbose, |
||
| 619 | } |
||
| 620 | sg.poptions(opts) |
||
| 621 | |||
| 622 | import platform |
||
| 623 | print('python:', platform.python_version()) |
||
| 624 | |||
| 625 | runner = RunAllDirectory() |
||
| 626 | runner.run(input_files, output_csv, opt) |
||
| 627 | # meta-scoring |
||
| 628 | #output_csv = "test_data/1xjr_m500_m1.csv" |
||
| 629 | #mqs.do_scoring(output_csv) |
||
| 630 | |||
| 631 | log = t.end('process: %i' % opt.number_processes) |
||
| 632 | print('\n', log) |
||
| 633 | print('Output: %s \n' % output_csv) |
||
| 634 | ## log |
||
| 635 | log_fn = output_csv.replace('.csv', '.log') |
||
| 636 | f = open(log_fn, 'w') |
||
| 637 | f.write(log + '\n') |
||
| 638 | f.write(str(opts) + '\n') |
||
| 639 | f.write('Output: %s\n' % output_csv) |
||
| 640 | f.close() |
||
| 641 | print('logging: %s' % log_fn) |
||
| 642 | print('logging wrappers %s' % Config.LOG_DIRECTORY + os.sep) |
||
| 643 |