build.rna_tools.tools.mq.rna_mq_collect   C
last analyzed

Complexity

Total Complexity 55

Size/Duplication

Total Lines 643
Duplicated Lines 40.12 %

Importance

Changes 0
Metric Value
eloc 393
dl 258
loc 643
rs 6
c 0
b 0
f 0
wmc 55

2 Functions

Rating   Name   Duplication   Size   Complexity  
F single_run() 171 171 28
B option_parser() 72 72 2

2 Methods

Rating   Name   Duplication   Size   Complexity  
F RunAllDirectory.run() 15 126 24
A RunAllDirectory.__init__() 0 2 1

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like build.rna_tools.tools.mq.rna_mq_collect often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""mqaprna.py - a script for running all wrapers on each PDB file in a specified directory
4
saves results to a CSV file.
5
6
ss_agreement is ...
7
8
ClashScore,AnalyzeGeometry,SimRNA_0,RNAscore,eSCORE,RNAkb,RASP,RNA3DCNN,Dfire,FARNA,FARFAR2,FARNA_hires,FARFAR2_hires
9
10
The code is full of # hack and tricks.
11
12
.. warning:: Uses global variables
13
14
Install: 
15
16
    csvsort
17
18
Cmd::
19
20
     # find . -iname 'FARFAR2*.csv' -exec cat {} + > FARFAR2_hires.csv
21
     $ rna_mq_collect.py -t FARFAR2_hires -m 4 -f -o FARFAR2_hires.csv -l all.txt x.pdb
22
     # fake x.pdb when -l is used, -l gets a list of files
23
     x.pdb
24
     y.pdb
25
     z.pdb
26
27
88% (49329 of 55689) |############### | Elapsed Time: 0:45:23 ETA:  2 days, 18:42:16
28
29
For this python progressbar works::
30
31
    Python 3.7.10 (default, Feb 26 2021, 18:47:35)
32
    [GCC 7.3.0] :: Anaconda, Inc. on linux
33
34
"""
35
MP_VERBOSE = False
36
DEBUG_MODE = False
37
38
################################################################################
39
import sys
40
#sys.path.insert(0, "/Users/magnus/work/src/rna-tools/rna_tools/tools/mq/")  # ugly!
41
import progressbar
42
# import mqaprna_score as mqs
43
import time
44
import os
45
import copy
46
from csvsort import csvsort
47
48
import rna_tools.tools.mq.lib.shellgraphics.shellgraphics as sg
49
sg.color_mode = False
50
from rna_tools.tools.mq.lib.timex import timex
51
#import rna_tools.tools.mq.mqaprna_config as Config
52
import rna_tools.rna_tools_config as Config
53
################################################################################
54
55
import rna_tools
56
__version__ = rna_tools.__version__
57
58
import os
59
import sys
60
DIRNAME = os.path.dirname(__file__)
61
PARENT_DIRNAME = os.path.abspath(os.path.join(DIRNAME, os.path.pardir))
62
sys.path.append(DIRNAME)
63
sys.path.append(PARENT_DIRNAME)
64
import csv
65
import imp
66
67
from optparse import OptionParser, OptionGroup
68
from ctypes import c_int
69
70
#import lib.rmsd_calc.rmsd_calc as rmsd_calc
71
from multiprocessing import Pool, Lock, Value
72
73
try:
74
    from wrappers.mqap_score.mqap_score import MqapScore
75
except ImportError:
76
    pass
77
78
# super-verbose logging
79
if MP_VERBOSE:
80
    import multiprocessing
81
    logger = multiprocessing.log_to_stderr()
82
    logger.setLevel(multiprocessing.SUBDEBUG)
83
84
# create wrappers for all the methods
85
MODULES = {}
86
for m in Config.METHOD_LIST:
87
    if m.find('_') > -1:
88
        m,n = m.split('_')
89
    wrapper_path = os.path.join(Config.WRAPPERS_PATH, m, m + '.py')
90
    module = imp.load_source(m, wrapper_path)
91
    MODULES[m] = module
92
93
# global variable
94
c = 0
95
methods = Config.METHOD_LIST
96
cleanup = True
97
98
counter = Value(c_int)
99
counter_lock = Lock()
100
101
# ['farna_rna_base_axis', 'farna_rna_backbone_backbone', 'farna_rna_base_stack_axis', 'farna_rna_base_stagger', 'farna_rna_base_stack', 'farna_rna_base_pair', 'farna_rna_repulsive', 'farna_rna_vdw', 'farna_rna_base_backbone', 'farna_score_lowres', 'farna_rna_data_backbone', 'farna_linear_chainbreak', 'farna_rna_rg', 'farna_atom_pair_constraint'],
102
103
steps = '0' #
104
attributes = {
105
    'QRNA' : [ 'qrna_' + steps + '_electro', 'qrna_' + steps ],
106
    #'RASP' : [ 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore']
107
    'RASP' : ['rasp_c3_pdb_energy', 'rasp_c3_no_contacts', 'rasp_c3_norm_energy', 'rasp_c3_mean_energy', 'rasp_c3_sd_energy', 'rasp_c3_zscore', 'rasp_bb_pdb_energy', 'rasp_bb_no_contacts', 'rasp_bb_norm_energy', 'rasp_bb_mean_energy', 'rasp_bb_sd_energy', 'rasp_bb_zscore', 'rasp_bbr_pdb_energy', 'rasp_bbr_no_contacts', 'rasp_bbr_norm_energy', 'rasp_bbr_mean_energy', 'rasp_bbr_sd_energy', 'rasp_bbr_zscore', 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'],
108
109
    'SimRNA_0' : ['simrna_steps', 'simrna_total_energy', 'simrna_base_base', 'simrna_short_stacking', 'simrna_base_backbone',  'simrna_local_geometry', 'simrna_bonds_dist_cp', 'simrna_bonds_dist_pc', 'simrna_flat_angles_cpc', 'simrna_flat_angles_pcp', 'simrna_tors_eta_theta', 'simrna_sphere_penalty', 'simrna_chain_energy'],
110
    'RNAkb' : ['rnakb_bond', 'rnakb_angle', 'rnakb_proper_dih', 'rnakb_improper_dih', 'rnakb_lj14', 'rnakb_coulomb14', 'rnakb_lj_sr', 'rnakb_coulomb_sr',
111
               'rnakb_potential', 'rnakb_kinetic_en', 'rnakb_total_energy'],
112
    'RNAkb_all' : ['rnakb_bond_all', 'rnakb_angle_all', 'rnakb_proper_dih_all', 'rnakb_improper_dih_all', 'rnakb_lj14_all', 'rnakb_coulomb14_all', 'rnakb_lj_sr_all', 'rnakb_coulomb_sr_all',
113
               'rnakb_potential_all', 'rnakb_kinetic_en_all', 'rnakb_total_energy_all'],
114
115
    'RNAscore' : ['x3rnascore'],
116
    'AnalyzeGeometry' : ['analyze_geometry'],
117
    'SSAgreement' : ['ss_disagreement'],
118
    'ClashScore' : ['clash_score'],
119
    'Ernwin_1' : [ 'ernwin_1' ],
120
    'Ernwin_1k' : [ 'ernwin_1k' ],
121
    'eSCORE' : ['escore'],
122
    'RNA3DCNN' : ['rna3dcnn'],
123
    'Dfire' : ['dfire'],
124
125
    'FARNA':                           ['farna_score_lowres',
126
                                       'farna_rna_data_backbone',
127
                                       'farna_rna_vdw',
128
                                       'farna_rna_base_backbone',
129
                                       'farna_rna_backbone_backbone',
130
                                       'farna_rna_repulsive',
131
                                       'farna_rna_base_pair',
132
                                       'farna_rna_base_axis',
133
                                       'farna_rna_base_stagger',
134
                                       'farna_rna_base_stack',
135
                                       'farna_rna_base_stack_axis',
136
                                       'farna_rna_rg',
137
                                       'farna_atom_pair_constraint',
138
                                       'farna_linear_chainbreak'],
139
     
140
    'FARNA_hires' : ['farna_score_hires',
141
                     'farna_fa_atr',
142
                     'farna_fa_rep',
143
                     'farna_fa_intra_rep',
144
                                       'farna_lk_nonpolar',
145
                                       'farna_fa_elec_rna_phos_phos',
146
                                       'farna_ch_bond',
147
                                       'farna_rna_torsion',
148
                                       'farna_rna_sugar_close',
149
                                       'farna_hbond_sr_bb_sc',
150
                                       'farna_hbond_lr_bb_sc',
151
                                       'farna_hbond_sc',
152
                                       'farna_geom_sol',
153
                                       'farna_atom_pair_constraint_hires',
154
                                       'farna_linear_chainbreak_hires'],
155
156
    'FARFAR2' : ['farna_score_lowres',
157
                 'farna_rna_vdw',
158
                 'farna_rna_base_backbone',
159
                 'farna_rna_backbone_backbone',
160
                 'farna_rna_repulsive',
161
                 'farna_rna_base_pair',
162
                 'farna_rna_base_axis',
163
                 'farna_rna_base_stagger',
164
                 'farna_rna_base_stack',
165
                 'farna_rna_base_stack_axis',
166
                 'farna_rna_rg',
167
                 'farna_atom_pair_constraint',
168
                 'farna_linear_chainbreak'],
169
170
    'FARFAR2_hires': 'ff2_score_hires,ff2_fa_atr,ff2_fa_rep,ff2_fa_intra_rep,ff2_lk_nonpolar,ff2_fa_elec_rna_phos_phos,ff2_rna_torsion,ff2_suiteness_bonus,ff2_rna_sugar_close,ff2_fa_stack,ff2_stack_elec,ff2_geom_sol_fast,ff2_bond_sr_bb_sc,ff2_hbond_lr_bb_sc,ff2_hbond_sc,ff2_ref,ff2_free_suite,ff2_free_2HOprime,ff2_intermol,ff2_other_pose,ff2_loop_close,ff2_linear_chainbreak_hires'.split(','),
171
    
172
    #'SimRNA_0' : ['', 'simrna', '', '', '',  '', '', '', '', '', '', '', ''],
173
    'rmsd_all': ['rmsd_all'],
174
}
175
176 View Code Duplication
def single_run(filename):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
177
    """Start a mqaprna run for a given file
178
    with all methods (according to config file).
179
180
    [!] Use global cleanup = False to block cleaning up
181
182
    .. warning:: The function uses global variable.
183
    """
184
    filename, filename_length = filename
185
    #print 'fn: ', filename
186
187
    global methods, c
188
    all_results = {}
189
190
    for m in methods:
191
            arguments = ''
192
            #if DEBUG_MODE: print 'method', m, arguments
193
            mfull = m
194
            if verbose: print(m + '...') # show method 'eSCORE...'
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable verbose does not seem to be defined.
Loading history...
195
196
            if m == 'FARNA':
197
                mfull = m
198
                arguments = [filename] + [False]
199
200
            if m == 'FARNA_hires':
201
                m = 'FARNA'
202
                mfull = 'FARNA_hires'
203
                arguments = [filename] + [True]
204
205
            if m == 'FARFAR2':
206
                m = 'FARFAR2'
207
                mfull = 'FARFAR2'
208
                arguments = [filename] + [False]
209
210
            if m == 'FARFAR2_hires':
211
                m = 'FARFAR2'
212
                mfull = 'FARFAR2_hires'
213
                arguments = [filename] + [True]
214
                
215
            if m == 'RNAkb_all':
216
                m = 'RNAkb'
217
                mfull = 'RNAkb_all'
218
                arguments = [filename] + ['aa']
219
220
            if m.find('_') > -1:
221
                m, n = m.split('_')
222
                n = n.replace('n', '') # n_XXX
223
                n = n.replace('k', '000')
224
                n = n.replace('m', '000000')
225
                arguments = [filename] + [n]
226
227
            if not arguments:
228
                arguments = [filename] + Config.WRAPPER_OPTIONS[m]
229
230
            if m == 'escore':
231
                m = 'eSCORE'
232
            wrapper = getattr(MODULES[m], m)()#verbose) # ref_seq, ref_ss, verbose)  # for all wrappers but SSAgrement '','' is OK
233
234
            if m == 'NAST_pyro':
235
                lock.acquire()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable lock does not seem to be defined.
Loading history...
236
237
            if DEBUG_MODE:
238
                result = wrapper.run(*arguments)
239
                if verbose: print(m, result) # ClashScore 12.256669
240
                all_results[mfull] = result
241
                if cleanup: wrapper.cleanup()
242
            else:
243
                try:
244
                    result = wrapper.run(*arguments)
245
                    all_results[mfull] = result
246
                    if cleanup: wrapper.cleanup()
247
                except:
248
                    all_results[mfull] = 'error'
249
                    if cleanup: wrapper.cleanup()
250
251
            # {'ClashScore': 12.256669}
252
            # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581}
253
            # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581, 'FARNA': '-20.008,-2.739,-13.175,-77.67,-10.652,-158.51,9.547,8.39,-16.246,-263.281,0.0,0.0,17.782,0.0'}
254
            #if verbose: print 'all_results:', all_results # this every each method showed
255
256
            if m == 'NAST_pyro':
257
                lock.release()
258
259
    # get rmsd
260
    if opt.native_pdb_filename:
0 ignored issues
show
introduced by
The variable opt does not seem to be defined in case __name__ == '__main__' on line 557 is False. Are you sure this can never be the case?
Loading history...
261
        rmsd = rmsd_calc.get_rmsd(opt.native_pdb_filename,
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable rmsd_calc does not seem to be defined.
Loading history...
262
                                     filename)
263
        all_results['rmsd'] = rmsd
264
        methods = methods + ['rmsd']
265
    else:
266
        methods = methods
267
268
    # length
269
    length = len(ref_seq)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable ref_seq does not seem to be defined.
Loading history...
270
    all_results['length'] = length
271
272
    if opt.mqapscore:
273
        # meta-score
274
        ms =  MqapScore(all_results)
275
        mqap_score = ms.get_score()
276
        methods = methods + ['SCORE']
277
        all_results['SCORE'] = mqap_score
278
279
    if True:
280
        lock.acquire()
281
282
        global counter_lock
283
        #with counter_lock:
284
        counter.value += 1
285
286
        if counter.value != 1:
287
            # @todo does not work
288
            #sys.stdout.write('\033[F')
289
            #sys.stdout.write('\033[F')
290
            pass
291
292
        #results = [str(round(all_results[mfull],2)).strip().rjust(9) for m in methods]
293
294
        results_str = str(all_results) # "{'AnalyzeGeometry': 0.0, 'eSCORE': 0.10661, 'FARNA': ['-2.411', '0.0', '0.0', '-9.672', '0.0', '-25.678', '0.0', '1.061', '0.0', '-32.098', '0.0', '0.0', '4.601', '0.0'], 'ClashScore': 36.458333, 'length': 0, 'SimRNA_0': ['0', '67.345305', '-37.428', '-23.073', '0.248', '104.524975', '87.955', '9.938', '5.669', '1.089', '-0.126', '', '67.345305'], 'FARNA_hires': ['0.0', '-13.107', '-0.711', '0.0', '5.22', '2.734', '-30.044', '0.223', '-10.511', '-0.173', '-4.719', '1.143', '0.0', '14.371', '9.358'], 'RNAscore': 8.11007, 'RASP': ['-0.1382', '15', '-0.00921333', '-0.0845115', '0.454033', '-0.118248', '-277.666', '949', '-0.292588', '-273.37', '2.51163', '-1.71042', '-584.451', '2144', '-0.272598', '-564.143', '5.77609', '-3.51588', '-1616.08', '6700', '-0.241206', '0', '0', '0'], 'RNAkb': -1}"
295
296
        results = [all_results[mfull] for m in methods]
0 ignored issues
show
introduced by
The variable mfull does not seem to be defined in case the for loop on line 190 is not entered. Are you sure this can never be the case?
Loading history...
297
298
        # progress bar
299
        #sys.stdout.write('\r')
300
        #sys.stdout.flush()
301
        #sys.stdout.write('\r' + ' ' * 110 + '\r' + filename.split(os.sep)[-1].ljust(50) + ' ' + ' '.join(results))
302
303
        ########### line with resluts ######################
304
        bar.update(counter.value)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable bar does not seem to be defined.
Loading history...
305
        ## my old progress bar here:
306
        # print(sg.pprogress_line(counter.value, filename_length, ''))# ,
307
        ## print results, use --verbose now
308
        if verbose:
309
            print(filename.split(os.sep)[-1].ljust(20) + ' ' + results_str)
310
        
311
        ## [          ]   1 7.14 % 14 3_solution_1.pdb     {'AnalyzeGeometry': 0.0, 'eSCORE': 1.70264, 'FARNA': ['-31.498', '-11.589', '-32.7', '-123.708', '-25.514', '-271.337', '33.563', '2.957', '-36.699', '-471.864', '0.0', '0.0', '24.659', '0.0'], 'ClashScore': 2.201835, 'length': 0, 'SimRNA_0': ['0', '-1016.539381', '-599.475', '-223.162', '-3.935', '-413.129576', '-65.066', '-71.505', '-68.947', '-45.989', '-161.622', '', '-1016.539381'], 'FARNA_hires': ['0.0', '-541.374', '-0.59', '0.0', '1.85', '8.12', '-433.113', '17.811', '-229.203', '3.074', '-140.106', '13.875', '-17.245', '226.762', '7.39'], 'RNAscore': 26.7066, 'RASP': ['-9.3599', '987', '-0.00948318', '8.16333', '3.95157', '-4.4345', '-7976.88', '60547', '-0.131747', '-7274.73', '52.7448', '-13.3123', '-17537.5', '138719', '-0.126424', '-15578.4', '106.602', '-18.3777', '-34270.8', '483436', '-0.07089', '0', '0', '0'], 'RNAkb': -0.019507621989000006}
312
313
        #sys.stdout.flush()
314
        #print
315
        #sys.stdout.write(sg.pprogress_line(counter.value, filename_length))
316
        #print sg.pprogress_line(counter.value, filename_length)
317
        #sys.stdout.flush()
318
319
        ## for graphics debugging
320
        #import time
321
        #time.sleep(1)
322
323
        #format_line([filename.split(os.sep)[-1] + [all_results[m] for m in methods]])  # @todo Nice print with ShellGraphics
324
325
        cells = [counter.value, filename.split(os.sep)[-1]] # add id 
326
        for m in methods:
327
            if type(all_results[m]) == list:
328
                cells.extend(all_results[m])
329
            else:
330
                cells.append(all_results[m])
331
        csv_writer.writerow(cells)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable csv_writer does not seem to be defined.
Loading history...
332
333
        #print 'mqaprna::filename: %i %s' % (counter.value, filename)
334
        csv_file.flush()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable csv_file does not seem to be defined.
Loading history...
335
        lock.release()
336
337
    # hack
338
    try:
339
        methods.remove('SCORE')
340
    except ValueError:
341
        pass
342
343
    try:
344
        methods.remove('rmsd')
345
    except ValueError:
346
        pass
347
348
349 View Code Duplication
def option_parser():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
350
    """Get options or show usage msg.
351
    """
352
    description = ''
353
    version = __version__
354
    usage = '\t%prog [-m <number_processes>] [-n <native_pdb_filename>] [-s <seq_ss_filename>] [-g <ignore_pdb_filename>] \ \n\t -o <output csv> <dir/*> # [!] no .csv! the file will get version of mqaprna \n\t' + __version__
355
    parser = OptionParser(description=__doc__,
356
                              version=version,
357
                              usage=usage)
358
359
    parser.add_option("-q", "--mQapscore",
360
                     action="store_true", default=False, dest="mqapscore", help="calculate mqapscore")
361
362
    parser.add_option("-v", "--verbose",
363
                     action="store_true", default=False, dest="verbose", help="verbose")
364
365
    parser.add_option("--force",
366
                     action="store_true", default=False)
367
368
    parser.add_option("-f", "--no-filename-version",
369
                     action="store_true", default=False, dest="no_filename_version", help="don't add version of tool to csv filename")
370
371
372
    parser.add_option("-n", "--native_pdb_filename",
373
                     action="store", type="string", dest="native_pdb_filename", help="native structure in PDB format to calculate RMSD")
374
375
    parser.add_option("-m", "--multiprocessing",
376
                     action="store", type="int", dest="number_processes", default=8,
377
                      help="set a number of processes, default=8, 0 is no multiprocessing")
378
379
    group2 = OptionGroup(parser, "Ignore pdbs, don't have empty lines here! Example",
380
                        """1xjrA_output3-000142_AA.pdb
381
                         1xjrA_output3-000208_AA.pdb
382
                         1xjrA_output3-000166_AA.pdb""")
383
384
    group2.add_option("-g", "--ignore-pdbs",
385
                  action="store", type="string", dest="ignore_pdb_filename")
386
387
    group = OptionGroup(parser, "Seq-SS. Example",
388
                        """>1xjrA
389
                        GAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU
390
                        .(((((((...((((.((((.....))..))..))).).)))))))""")
391
392
    group.add_option("-t", "--methods",
393
                  action="store", type="string", dest="methods", help=', '.join(['RASP',  'SimRNA', 'AnalyzeGeometry','FARNA', 'QRNA', 'NAST_pyro',
394
                                                                                 'radius_of_gyration', 'SSAgreement', 'ClashScore', 'RNAkb',
395
                                                                                 'RNAkb_all', 'FARNA_hires', 'FARNA', 'FARFAR2',
396
                                                                                 'FARFAR2_hires', 'Dfire', 'RNA3DCNN', 'eSCORE']))
397
398
    group.add_option("-s", "--seq-ss",
399
                  action="store", type="string", dest="seq_ss_filename", help="")
400
401
    group.add_option("-o", "--output",
402
                  action="store", type="string", dest="output", help="output csv file")
403
404
    group.add_option("-l", "--list-of-files",
405
                  action="store", type="string", dest="list_of_files", help="list of files")
406
407
408
    parser.add_option_group(group)
409
    parser.add_option_group(group2)
410
411
    (opt, arguments) = parser.parse_args()
412
413
    arguments = [f for f in arguments if f.endswith('.pdb')]
414
415
    if len(arguments) == 0:
416
        parser.print_help()
417
        print('\n   Curr methods: ', ','.join(methods), end=' ')
418
        sys.exit(1)
419
420
    return arguments, opt
421
422
423
class RunAllDirectory():
424
    """Class for running wrappers for all files in a directory
425
    """
426
    def __init__(self):
427
        pass
428
429
    def run(self, filenames, csv_path, opt):
430
        """Open csv (with appropriate headers), run methods, print & save csv
431
432
        There are two modes of execution:
433
         * multiprocessing
434
         * single
435
436
        .. warning:: Works on global variables: ref_seq, ref_ss, methods, lock, c
437
        """
438
        global ref_seq, ref_ss, verbose, methods, lock, c
439
440 View Code Duplication
        if opt.seq_ss_filename:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
441
            pdb_id, ref_seq, ref_ss = [x.strip() for x in open(opt.seq_ss_filename).read().strip().split('\n')]
442
            #sg.phr_text('FASTA SEQ/SS')
443
            sg.poptions({'AnalyzeGeometry': True, 'SSAgreement' : True})
444
            sg.poption('pdb_id', pdb_id)
445
            sg.poption('ref_seq', ref_seq)
446
            sg.poption('ref_ss', ref_ss)
447
        else:
448
            pdb_id, ref_seq, ref_ss = ['', '', '']
449
            sg.poptions({'SSAgreement' : True})
450
            # hack
451
            try: # if it's not on the list
452
                methods.remove('SSAgreement')
453
            except ValueError:
454
                pass
455
456
        verbose = opt.verbose
457
458
        global csv_file, csv_writer  # hack
459
        # csv open & add header
460
        csv_file = open(csv_path, 'a')
461
        csv_writer = csv.writer(csv_file,  delimiter=',')
462
        # make header
463
        headers = ['id', 'fn']
464
        for m in methods:
465
            headers += attributes[m]
466
467
        if opt.native_pdb_filename:
468
            headers += ['RMSDALL']
469
        if opt.mqapscore:
470
            headers += ['SCORE']
471
        csv_writer.writerow(headers)
472
        csv_file.flush()
473
474
        # remove ~ and remove .out
475
        for f in copy.copy(filenames):
476
            if f.endswith('~'):
477
                filenames.remove(f)
478
            if f.endswith('.out'):
479
                filenames.remove(f)
480
            if f.find('._')>-1:
481
                filenames.remove(f)
482
483
        files_to_ignore = []
484
        # or if not provided
485
        import glob
486
487
        if not opt.force:
488
            opt.ignore_pdb_filename = glob.glob('*' + opt.methods + '*.csv')
489
            for f in opt.ignore_pdb_filename:  # do it for the list, that's nice!
490
                fn = open(f)
491
                for f in fn.read().strip().split('\n'):
492
                    if 'error' in f:
493
                        continue  # don't add files with errors, so the program will be re-run for them
494
                    # if there is an error, this will give error again quickly
495
                    # but this solves when you kill the job, you get erros, but it's not rally errors
496
                    # but stopped jobs
497
                    if f.find('\t') > -1:
498
                        f = f.split('\t')[1] # id, fn
499
                    if f.find(',') > -1:
500
                        f = f.split(',')[1] # id, fn
501
                    files_to_ignore.append(os.path.basename(f))
502
503
        ## files to ignore
504
        print(' to ignore', len(files_to_ignore), files_to_ignore[:4])
505
506
        filenames = []
507
        for i, f in enumerate(input_files):
0 ignored issues
show
introduced by
The variable input_files does not seem to be defined in case __name__ == '__main__' on line 557 is False. Are you sure this can never be the case?
Loading history...
508
            # print(i, f)
509
            if f.startswith('_'):  # skip
510
                continue
511
            if os.path.basename(f) not in files_to_ignore:
512
                filenames.append(f)
513
        ## for fi in files_to_ignore:
514
        ##     for fn in copy.copy(filenames):
515
        ##         if os.path.basename(fn).startswith('._'):
516
        ##             filenames.remove(fn)
517
        ##         if os.path.basename(fn).startswith(fi.split('\t')[0]): # # hack,  @todo <- re could be used here!  to ignore ['fn,RASP,SimRNA,FARNA,NAST_pyro\r', '1ykv_1_ba_c.pdb,-0.104705,-504.468933,-306.245,122.7\r', '2esj_1_ba_c.pdb,-0.1522,-1,-266.217,46.7\r', '2quw_1_ba_c.pdb,-0.103789,-729.386726,-419.047,984.0\r
518
        ##             filenames.remove(fn)
519
        print(' files to analyze: %s' % len(filenames), filenames[:300])
520
        ## headers
521
        methods_to_print = copy.copy(methods)
522
        if opt.native_pdb_filename:
523
            methods_to_print += ['RMSDALL']
524
        if opt.mqapscore:
525
            methods_to_print += ['SCORE']
526
527
        ## if verbose: print ''.ljust(80), ''.join([m[:9].ljust(10) for m in methods_to_print]) ## print headers
528
529
        sg.phr()
530
531
        lock = Lock()
532
        
533
        counter.value = len(files_to_ignore)
534
535
        flist = []
536
        c  = 1
537
        # two running modes
538
        global filename_length
539
        filenames_length = len(filenames) + len(files_to_ignore)
540
541
        global bar
542
        bar = progressbar.ProgressBar(max_value=filenames_length)
543
        bar.update(len(files_to_ignore))
544
545
        fl = []
546
        for f in filenames:
547
            fl.append([f,filenames_length])
548
549
        if opt.number_processes:
550
            p = Pool(opt.number_processes)
551
            p.map(single_run, fl)
552
        else:
553
            for filename,x in fl:
554
                single_run((filename,x))
555
556
#main
557
if __name__ == '__main__':
558
    from icecream import ic
559
    import sys
560
    ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
561
    ic.configureOutput(prefix='> ')
562
563
564
    t = timex.Timex()
565
    t.start()
566
567
    arguments, opt = option_parser()
568
569
    # files
570
    input_files = arguments[:]
571
    if opt.list_of_files:
572
       for l in open(opt.list_of_files):
573
           input_files.append(l.strip())
574
    #ic(input_files)
575
    
576
    if not opt.methods:
577
       opt.methods = ','.join(Config.METHOD_LIST)
578
579
    if opt.no_filename_version:
580
        output_csv = opt.output
581
    else:
582
        import platform
583
        platform = platform.node()
584
        if opt.output:
585
            output_csv = opt.output.replace('.csv','') + '-' + __version__ + '-' + platform + '.csv'
586
        else:
587
            output_csv = opt.methods + '-' + __version__  + '-' + platform + '.csv'
588
589
    sg.pbanner_simply(os.path.basename(sys.argv[0]))
590
591
    try:
592
        rnakb_option = Config.WRAPPER_OPTIONS['RNAkb'][0]
593
    except KeyError:
594
        rnakb_option = None
595
    try:
596
        rasp_option = Config.WRAPPER_OPTIONS['RASP'][0]
597
    except KeyError:
598
        rasp_option = None
599
600
    if opt.methods:
601
        methods = [x.strip() for x in opt.methods.split(',')]
602
603
    print('ver:',  __version__ + '\n')
604
    print('start ', time.strftime("%Y-%m-%d %H:%M:%S"))
605
    
606
    opts = {
607
        'Input files': '#' + str(len(input_files)) + ' ' + str(input_files[:3]),
608
        'Multiprocessing': bool(opt.number_processes),
609
        'Output csv': output_csv,
610
        'Seq ss fn': opt.seq_ss_filename,
611
        'Ignore pdb fn': opt.ignore_pdb_filename,
612
        'Native pdb': opt.native_pdb_filename,
613
        'RNAkb' : rnakb_option,
614
        'RASP' : rasp_option,
615
     #   'rmsd' : rmsd_calc.RMSD_DEFAULT_METHOD,
616
        'Model path' : Config.ML_MODEL_PATH,
617
        'Methods' : ','.join(methods),
618
        'Verbose' : opt.verbose,
619
    }
620
    sg.poptions(opts)
621
622
    import platform
623
    print('python:', platform.python_version())
624
625
    runner = RunAllDirectory()
626
    runner.run(input_files, output_csv, opt)
627
    # meta-scoring
628
    #output_csv = "test_data/1xjr_m500_m1.csv"
629
    #mqs.do_scoring(output_csv)
630
631
    log = t.end('process: %i' % opt.number_processes)
632
    print('\n', log)
633
    print('Output: %s \n' % output_csv)
634
    ## log
635
    log_fn = output_csv.replace('.csv', '.log')
636
    f = open(log_fn, 'w')
637
    f.write(log + '\n')
638
    f.write(str(opts) + '\n')
639
    f.write('Output: %s\n' % output_csv)
640
    f.close()
641
    print('logging: %s' % log_fn)
642
    print('logging wrappers %s' % Config.LOG_DIRECTORY + os.sep)
643