build.rna_tools.tools.mq.rna_mq_collect_tqdm   C
last analyzed

Complexity

Total Complexity 57

Size/Duplication

Total Lines 630
Duplicated Lines 40 %

Importance

Changes 0
Metric Value
eloc 382
dl 252
loc 630
rs 5.04
c 0
b 0
f 0
wmc 57

2 Methods

Rating   Name   Duplication   Size   Complexity  
F RunAllDirectory.run() 15 140 26
A RunAllDirectory.__init__() 0 2 1

2 Functions

Rating   Name   Duplication   Size   Complexity  
F single_run() 168 168 28
B option_parser() 69 69 2

How to fix   Duplicated Code    Complexity   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

Complexity

 Tip:   Before tackling complexity, make sure that you eliminate any duplication first. This often can reduce the size of classes significantly.

Complex classes like build.rna_tools.tools.mq.rna_mq_collect_tqdm often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""mqaprna.py - a script for running all wrapers on each PDB file in a specified directory
4
saves results to a CSV file.
5
6
ss_agreement is ...
7
8
The code is full of # hack and tricks.
9
10
.. warning:: Uses global variables
11
12
Install: 
13
14
    csvsort
15
16
Cmd::
17
18
     # find . -iname 'FARFAR2*.csv' -exec cat {} + > FARFAR2_hires.csv
19
     $ rna_mq_collect.py -t FARFAR2_hires -m 4 -f -o FARFAR2_hires.csv -l all.txt x.pdb
20
     # fake x.pdb when -l is used, -l gets a list of files
21
     x.pdb
22
     y.pdb
23
     z.pdb
24
25
88% (49329 of 55689) |############### | Elapsed Time: 0:45:23 ETA:  2 days, 18:42:16
26
27
"""
28
MP_VERBOSE = False
29
DEBUG_MODE = False
30
31
################################################################################
32
import sys
33
#sys.path.insert(0, "/Users/magnus/work/src/rna-tools/rna_tools/tools/mq/")  # ugly!
34
import progressbar
35
# import mqaprna_score as mqs
36
import time
37
import os
38
import copy
39
from csvsort import csvsort
40
41
import rna_tools.tools.mq.lib.shellgraphics.shellgraphics as sg
42
sg.color_mode = False
43
from rna_tools.tools.mq.lib.timex import timex
44
#import rna_tools.tools.mq.mqaprna_config as Config
45
import rna_tools.rna_tools_config as Config
46
################################################################################
47
48
import rna_tools
49
__version__ = rna_tools.__version__
50
51
import os
52
import sys
53
DIRNAME = os.path.dirname(__file__)
54
PARENT_DIRNAME = os.path.abspath(os.path.join(DIRNAME, os.path.pardir))
55
sys.path.append(DIRNAME)
56
sys.path.append(PARENT_DIRNAME)
57
import csv
58
import imp
59
60
from optparse import OptionParser, OptionGroup
61
from ctypes import c_int
62
63
from icecream import ic
64
import sys
65
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
66
ic.configureOutput(prefix='> ')
67
68
69
#import lib.rmsd_calc.rmsd_calc as rmsd_calc
70
from multiprocessing import Pool, Lock, Value
71
72
try:
73
    from wrappers.mqap_score.mqap_score import MqapScore
74
except ImportError:
75
    pass
76
77
# super-verbose logging
78
MP_VERBOSE = 0
79
if MP_VERBOSE:
80
    import multiprocessing
81
    logger = multiprocessing.log_to_stderr()
82
    logger.setLevel(multiprocessing.SUBDEBUG)
83
84
# create wrappers for all the methods
85
MODULES = {}
86
for m in Config.METHOD_LIST:
87
    if m.find('_') > -1:
88
        m,n = m.split('_')
89
    wrapper_path = os.path.join(Config.WRAPPERS_PATH, m, m + '.py')
90
    module = imp.load_source(m, wrapper_path)
91
    MODULES[m] = module
92
93
# global variable
94
c = 0
95
methods = Config.METHOD_LIST
96
cleanup = True
97
98
counter = Value(c_int)
99
counter_lock = Lock()
100
101
# ['farna_rna_base_axis', 'farna_rna_backbone_backbone', 'farna_rna_base_stack_axis', 'farna_rna_base_stagger', 'farna_rna_base_stack', 'farna_rna_base_pair', 'farna_rna_repulsive', 'farna_rna_vdw', 'farna_rna_base_backbone', 'farna_score_lowres', 'farna_rna_data_backbone', 'farna_linear_chainbreak', 'farna_rna_rg', 'farna_atom_pair_constraint'],
102
103
steps = '0' #
104
attributes = {
105
    'QRNA' : [ 'qrna_' + steps + '_electro', 'qrna_' + steps ],
106
    #'RASP' : [ 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore']
107
    'RASP' : ['rasp_c3_pdb_energy', 'rasp_c3_no_contacts', 'rasp_c3_norm_energy', 'rasp_c3_mean_energy', 'rasp_c3_sd_energy', 'rasp_c3_zscore', 'rasp_bb_pdb_energy', 'rasp_bb_no_contacts', 'rasp_bb_norm_energy', 'rasp_bb_mean_energy', 'rasp_bb_sd_energy', 'rasp_bb_zscore', 'rasp_bbr_pdb_energy', 'rasp_bbr_no_contacts', 'rasp_bbr_norm_energy', 'rasp_bbr_mean_energy', 'rasp_bbr_sd_energy', 'rasp_bbr_zscore', 'rasp_all_pdb_energy', 'rasp_all_no_contacts', 'rasp_all_norm_energy', 'rasp_all_mean_energy', 'rasp_all_sd_energy', 'rasp_all_zscore'],
108
    'FARNA_hires' : ['farna_score_hires', 'farna_fa_atr', 'farna_fa_rep', 'farna_fa_intra_rep',
109
                                       'farna_lk_nonpolar',
110
                                       'farna_fa_elec_rna_phos_phos',
111
                                       'farna_ch_bond',
112
                                       'farna_rna_torsion',
113
                                       'farna_rna_sugar_close',
114
                                       'farna_hbond_sr_bb_sc',
115
                                       'farna_hbond_lr_bb_sc',
116
                                       'farna_hbond_sc',
117
                                       'farna_geom_sol',
118
                                       'farna_atom_pair_constraint_hires',
119
                                       'farna_linear_chainbreak_hires'],
120
121
    'SimRNA_0' : ['simrna_steps', 'simrna_total_energy', 'simrna_base_base', 'simrna_short_stacking', 'simrna_base_backbone',  'simrna_local_geometry', 'simrna_bonds_dist_cp', 'simrna_bonds_dist_pc', 'simrna_flat_angles_cpc', 'simrna_flat_angles_pcp', 'simrna_tors_eta_theta', 'simrna_sphere_penalty', 'simrna_chain_energy'],
122
    'RNAkb' : ['rnakb_bond', 'rnakb_angle', 'rnakb_proper_dih', 'rnakb_improper_dih', 'rnakb_lj14', 'rnakb_coulomb14', 'rnakb_lj_sr', 'rnakb_coulomb_sr',
123
               'rnakb_potential', 'rnakb_kinetic_en', 'rnakb_total_energy'],
124
    'RNAkb_all' : ['rnakb_bond_all', 'rnakb_angle_all', 'rnakb_proper_dih_all', 'rnakb_improper_dih_all', 'rnakb_lj14_all', 'rnakb_coulomb14_all', 'rnakb_lj_sr_all', 'rnakb_coulomb_sr_all',
125
               'rnakb_potential_all', 'rnakb_kinetic_en_all', 'rnakb_total_energy_all'],
126
127
    'RNAscore' : ['x3rnascore'],
128
    'AnalyzeGeometry' : ['analyze_geometry'],
129
    'SSAgreement' : ['ss_disagreement'],
130
    'ClashScore' : ['clash_score'],
131
    'Ernwin_1' : [ 'ernwin_1' ],
132
    'Ernwin_1k' : [ 'ernwin_1k' ],
133
    'eSCORE' : ['escore'],
134
    'RNA3DCNN' : ['rna3dcnn'],
135
    'Dfire' : ['dfire'],
136
137
    'FARNA':                           ['farna_score_lowres',
138
                                       'farna_rna_data_backbone',
139
                                       'farna_rna_vdw',
140
                                       'farna_rna_base_backbone',
141
                                       'farna_rna_backbone_backbone',
142
                                       'farna_rna_repulsive',
143
                                       'farna_rna_base_pair',
144
                                       'farna_rna_base_axis',
145
                                       'farna_rna_base_stagger',
146
                                       'farna_rna_base_stack',
147
                                       'farna_rna_base_stack_axis',
148
                                       'farna_rna_rg',
149
                                       'farna_atom_pair_constraint',
150
                                       'farna_linear_chainbreak'],
151
     
152
    'FARFAR2_hires': 'ff2_score_hires,ff2_fa_atr,ff2_fa_rep,ff2_fa_intra_rep,ff2_lk_nonpolar,ff2_fa_elec_rna_phos_phos,ff2_rna_torsion,ff2_suiteness_bonus,ff2_rna_sugar_close,ff2_fa_stack,ff2_stack_elec,ff2_geom_sol_fast,ff2_bond_sr_bb_sc,ff2_hbond_lr_bb_sc,ff2_hbond_sc,ff2_ref,ff2_free_suite,ff2_free_2HOprime,ff2_intermol,ff2_other_pose,ff2_loop_close,ff2_linear_chainbreak_hires'.split(','),
153
    
154
    #'SimRNA_0' : ['', 'simrna', '', '', '',  '', '', '', '', '', '', '', ''],
155
    'rmsd_all': ['rmsd_all'],
156
}
157
158
159 View Code Duplication
def single_run(lst):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
160
    """Start a mqaprna run for a given file
161
    with all methods (according to config file).
162
163
    [!] Use global cleanup = False to block cleaning up
164
165
    .. warning:: The function uses global variable.
166
    """
167
    filename, c, verbose, methods, opt, ref_seq = lst
168
    all_results = {}
169
170
    for m in methods:
171
172
            arguments = ''
173
            #if DEBUG_MODE: print 'method', m, arguments
174
            mfull = m
175
176
            if verbose: print(m + '...') # show method 'eSCORE...'
177
178
            if m == 'FARNA':
179
                mfull = m
180
                arguments = [filename] + [False]
181
182
            if m == 'FARNA_hires':
183
                m = 'FARNA'
184
                mfull = 'FARNA_hires'
185
                arguments = [filename] + [True]
186
187
            if m == 'FARFAR2':
188
                m = 'FARFAR2'
189
                mfull = 'FARFAR2'
190
                arguments = [filename] + [False]
191
192
            if m == 'FARFAR2_hires':
193
                m = 'FARFAR2'
194
                mfull = 'FARFAR2_hires'
195
                arguments = [filename] + [True]
196
                
197
            if m == 'RNAkb_all':
198
                m = 'RNAkb'
199
                mfull = 'RNAkb_all'
200
                arguments = [filename] + ['aa']
201
202
            if m.find('_') > -1:
203
                m, n = m.split('_')
204
                n = n.replace('n', '') # n_XXX
205
                n = n.replace('k', '000')
206
                n = n.replace('m', '000000')
207
                arguments = [filename] + [n]
208
209
            if not arguments:
210
                arguments = [filename] + Config.WRAPPER_OPTIONS[m]
211
212
            if m == 'escore':
213
                m = 'eSCORE'
214
            wrapper = getattr(MODULES[m], m)()#verbose) # ref_seq, ref_ss, verbose)  # for all wrappers but SSAgrement '','' is OK
215
216
            if m == 'NAST_pyro':
217
                lock.acquire()
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable lock does not seem to be defined.
Loading history...
218
219
            if DEBUG_MODE:
220
                result = wrapper.run(*arguments)
221
                if verbose: print(m, result) # ClashScore 12.256669
222
                all_results[mfull] = result
223
                if cleanup: wrapper.cleanup()
224
            else:
225
                try:
226
                    result = wrapper.run(*arguments)
227
                    all_results[mfull] = result
228
                    if cleanup: wrapper.cleanup()
229
                except:
230
                    all_results[mfull] = 'error'
231
                    if cleanup: wrapper.cleanup()
232
233
            # {'ClashScore': 12.256669}
234
            # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581}
235
            # {'ClashScore': 12.256669, 'AnalyzeGeometry': 32.5581, 'FARNA': '-20.008,-2.739,-13.175,-77.67,-10.652,-158.51,9.547,8.39,-16.246,-263.281,0.0,0.0,17.782,0.0'}
236
            #if verbose: print 'all_results:', all_results # this every each method showed
237
238
            if m == 'NAST_pyro':
239
                lock.release()
240
241
    # get rmsd
242
    if opt.native_pdb_filename:
243
        rmsd = rmsd_calc.get_rmsd(opt.native_pdb_filename,
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable rmsd_calc does not seem to be defined.
Loading history...
244
                                     filename)
245
        all_results['rmsd'] = rmsd
246
        methods = methods + ['rmsd']
247
    else:
248
        methods = methods
249
250
    # length
251
    length = len(ref_seq)
252
    all_results['length'] = length
253
254
    if opt.mqapscore:
255
        # meta-score
256
        ms =  MqapScore(all_results)
257
        mqap_score = ms.get_score()
258
        methods = methods + ['SCORE']
259
        all_results['SCORE'] = mqap_score
260
261
    if True:
262
        # lock.acquire()
263
264
        global counter_lock
265
        #with counter_lock:
266
        counter.value += 1
267
268
        if counter.value != 1:
269
            # @todo does not work
270
            #sys.stdout.write('\033[F')
271
            #sys.stdout.write('\033[F')
272
            pass
273
274
        #results = [str(round(all_results[mfull],2)).strip().rjust(9) for m in methods]
275
276
        results_str = str(all_results) # "{'AnalyzeGeometry': 0.0, 'eSCORE': 0.10661, 'FARNA': ['-2.411', '0.0', '0.0', '-9.672', '0.0', '-25.678', '0.0', '1.061', '0.0', '-32.098', '0.0', '0.0', '4.601', '0.0'], 'ClashScore': 36.458333, 'length': 0, 'SimRNA_0': ['0', '67.345305', '-37.428', '-23.073', '0.248', '104.524975', '87.955', '9.938', '5.669', '1.089', '-0.126', '', '67.345305'], 'FARNA_hires': ['0.0', '-13.107', '-0.711', '0.0', '5.22', '2.734', '-30.044', '0.223', '-10.511', '-0.173', '-4.719', '1.143', '0.0', '14.371', '9.358'], 'RNAscore': 8.11007, 'RASP': ['-0.1382', '15', '-0.00921333', '-0.0845115', '0.454033', '-0.118248', '-277.666', '949', '-0.292588', '-273.37', '2.51163', '-1.71042', '-584.451', '2144', '-0.272598', '-564.143', '5.77609', '-3.51588', '-1616.08', '6700', '-0.241206', '0', '0', '0'], 'RNAkb': -1}"
277
278
        results = [all_results[mfull] for m in methods]
0 ignored issues
show
introduced by
The variable mfull does not seem to be defined in case the for loop on line 170 is not entered. Are you sure this can never be the case?
Loading history...
279
        # progress bar
280
        #sys.stdout.write('\r')
281
        #sys.stdout.flush()
282
        #sys.stdout.write('\r' + ' ' * 110 + '\r' + filename.split(os.sep)[-1].ljust(50) + ' ' + ' '.join(results))
283
284
        ########### line with resluts ######################
285
        #bar.update(counter.value)
286
        ## my old progress bar here:
287
        # print(sg.pprogress_line(counter.value, filename_length, ''))# ,
288
        ## print results, use --verbose now
289
        if verbose:
290
            print(filename.split(os.sep)[-1].ljust(20) + ' ' + results_str)
291
        
292
        ## [          ]   1 7.14 % 14 3_solution_1.pdb     {'AnalyzeGeometry': 0.0, 'eSCORE': 1.70264, 'FARNA': ['-31.498', '-11.589', '-32.7', '-123.708', '-25.514', '-271.337', '33.563', '2.957', '-36.699', '-471.864', '0.0', '0.0', '24.659', '0.0'], 'ClashScore': 2.201835, 'length': 0, 'SimRNA_0': ['0', '-1016.539381', '-599.475', '-223.162', '-3.935', '-413.129576', '-65.066', '-71.505', '-68.947', '-45.989', '-161.622', '', '-1016.539381'], 'FARNA_hires': ['0.0', '-541.374', '-0.59', '0.0', '1.85', '8.12', '-433.113', '17.811', '-229.203', '3.074', '-140.106', '13.875', '-17.245', '226.762', '7.39'], 'RNAscore': 26.7066, 'RASP': ['-9.3599', '987', '-0.00948318', '8.16333', '3.95157', '-4.4345', '-7976.88', '60547', '-0.131747', '-7274.73', '52.7448', '-13.3123', '-17537.5', '138719', '-0.126424', '-15578.4', '106.602', '-18.3777', '-34270.8', '483436', '-0.07089', '0', '0', '0'], 'RNAkb': -0.019507621989000006}
293
294
        #sys.stdout.flush()
295
        #print
296
        #sys.stdout.write(sg.pprogress_line(counter.value, filename_length))
297
        #print sg.pprogress_line(counter.value, filename_length)
298
        #sys.stdout.flush()
299
300
        ## for graphics debugging
301
        #import time
302
        #time.sleep(1)
303
304
        #format_line([filename.split(os.sep)[-1] + [all_results[m] for m in methods]])  # @todo Nice print with ShellGraphics
305
        cells = [c, filename.split(os.sep)[-1]] # add id 
306
        for m in methods:
307
            if type(all_results[m]) == list:
308
                cells.extend(all_results[m])
309
            else:
310
                cells.append(all_results[m])
311
        #csv_writer.writerow(cells)
312
        return cells
313
        #print 'mqaprna::filename: %i %s' % (counter.value, filename)
314
        #csv_file.flush()
315
        #lock.release()
316
317
    # hack
318
    try:
319
        methods.remove('SCORE')
320
    except ValueError:
321
        pass
322
323
    try:
324
        methods.remove('rmsd')
325
    except ValueError:
326
        pass
327
328
329 View Code Duplication
def option_parser():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
330
    """Get options or show usage msg.
331
    """
332
    description = ''
333
    version = __version__
334
    usage = '\t%prog [-m <number_processes>] [-n <native_pdb_filename>] [-s <seq_ss_filename>] [-g <ignore_pdb_filename>] \ \n\t -o <output csv> <dir/*> # [!] no .csv! the file will get version of mqaprna \n\t' + __version__
335
    parser = OptionParser(description=__doc__,
336
                              version=version,
337
                              usage=usage)
338
339
    parser.add_option("-q", "--mQapscore",
340
                     action="store_true", default=False, dest="mqapscore", help="calculate mqapscore")
341
342
    parser.add_option("-v", "--verbose",
343
                     action="store_true", default=False, dest="verbose", help="verbose")
344
345
    parser.add_option("-f", "--no-filename-version",
346
                     action="store_true", default=False, dest="no_filename_version", help="don't add version of tool to csv filename")
347
348
349
    parser.add_option("-n", "--native_pdb_filename",
350
                     action="store", type="string", dest="native_pdb_filename", help="native structure in PDB format to calculate RMSD")
351
352
    parser.add_option("-m", "--multiprocessing",
353
                     action="store", type="int", dest="number_processes", default=1,
354
                      help="set a number of processes, default=8, 0 is no multiprocessing")
355
356
    group2 = OptionGroup(parser, "Ignore pdbs, don't have empty lines here! Example",
357
                        """1xjrA_output3-000142_AA.pdb
358
                         1xjrA_output3-000208_AA.pdb
359
                         1xjrA_output3-000166_AA.pdb""")
360
361
    group2.add_option("-g", "--ignore-pdbs",
362
                  action="store", type="string", dest="ignore_pdb_filename")
363
364
    group = OptionGroup(parser, "Seq-SS. Example",
365
                        """>1xjrA
366
                        GAGUUCACCGAGGCCACGCGGAGUACGAUCGAGGGUACAGUGAAUU
367
                        .(((((((...((((.((((.....))..))..))).).)))))))""")
368
369
    group.add_option("-t", "--methods",
370
                  action="store", type="string", dest="methods", help=', '.join(['RASP',  'SimRNA', 'AnalyzeGeometry','FARNA', 'QRNA', 'NAST_pyro',
371
                                                                                 'radius_of_gyration', 'SSAgreement', 'ClashScore', 'RNAkb',
372
                                                                                 'RNAkb_all', 'FARNA_hires', 'FARNA', 'FARFAR2',
373
                                                                                 'FARFAR2_hires', 'Dfire', 'RNA3DCNN', 'eSCORE']))
374
375
    group.add_option("-s", "--seq-ss",
376
                  action="store", type="string", dest="seq_ss_filename", help="")
377
378
    group.add_option("-o", "--output",
379
                  action="store", type="string", dest="output", help="output csv file")
380
381
    group.add_option("-l", "--list-of-files",
382
                  action="store", type="string", dest="list_of_files", help="list of files")
383
384
385
    parser.add_option_group(group)
386
    parser.add_option_group(group2)
387
388
    (opt, arguments) = parser.parse_args()
389
390
    arguments = [f for f in arguments if f.endswith('.pdb')]
391
392
    if len(arguments) == 0:
393
        parser.print_help()
394
        print('\n   Curr methods: ', ','.join(methods), end=' ')
395
        sys.exit(1)
396
397
    return arguments, opt
398
399
400
class RunAllDirectory():
401
    """Class for running wrappers for all files in a directory
402
    """
403
    def __init__(self):
404
        pass
405
406
    def run(self, filenames, csv_path, opt):
407
        """Open csv (with appropriate headers), run methods, print & save csv
408
409
        There are two modes of execution:
410
         * multiprocessing
411
         * single
412
413
        .. warning:: Works on global variables: ref_seq, ref_ss, methods, lock, c
414
        """
415
        global ref_seq, ref_ss, verbose, methods, lock, c
416
417 View Code Duplication
        if opt.seq_ss_filename:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
418
            pdb_id, ref_seq, ref_ss = [x.strip() for x in open(opt.seq_ss_filename).read().strip().split('\n')]
419
            #sg.phr_text('FASTA SEQ/SS')
420
            sg.poptions({'AnalyzeGeometry': True, 'SSAgreement' : True})
421
            sg.poption('pdb_id', pdb_id)
422
            sg.poption('ref_seq', ref_seq)
423
            sg.poption('ref_ss', ref_ss)
424
        else:
425
            pdb_id, ref_seq, ref_ss = ['', '', '']
426
            sg.poptions({'SSAgreement' : True})
427
            # hack
428
            try: # if it's not on the list
429
                methods.remove('SSAgreement')
430
            except ValueError:
431
                pass
432
433
        verbose = opt.verbose
434
435
        global csv_file, csv_writer  # hack
436
        # csv open & add header
437
        csv_file = open(csv_path, 'a')
438
        csv_writer = csv.writer(csv_file,  delimiter=',')
439
        # make header
440
        headers = ['id', 'fn']
441
        for m in methods:
442
            headers += attributes[m]
443
444
        if opt.native_pdb_filename:
445
            headers += ['RMSDALL']
446
        if opt.mqapscore:
447
            headers += ['SCORE']
448
        csv_writer.writerow(headers)
449
        csv_file.flush()
450
451
        # remove ~ and remove .out
452
        for f in copy.copy(filenames):
453
            if f.endswith('~'):
454
                filenames.remove(f)
455
            if f.endswith('.out'):
456
                filenames.remove(f)
457
            if f.find('._')>-1:
458
                filenames.remove(f)
459
460
        files_to_ignore = []
461
        # or if not provided
462
        import glob
463
        opt.ignore_pdb_filename = glob.glob('*' + opt.methods + '*.csv')
464
        for f in opt.ignore_pdb_filename:  # do it for the list, that's nice!
465
            fn = open(f)
466
            for f in fn.read().strip().split('\n'):
467
                if 'error' in f:
468
                    continue  # don't add files with errors, so the program will be re-run for them
469
                # if there is an error, this will give error again quickly
470
                # but this solves when you kill the job, you get erros, but it's not rally errors
471
                # but stopped jobs
472
                if f.find('\t') > -1:
473
                    f = f.split('\t')[1] # id, fn
474
                if f.find(',') > -1:
475
                    f = f.split(',')[1] # id, fn
476
                files_to_ignore.append(os.path.basename(f))
477
478
        ## files to ignore
479
        print(' to ignore', len(files_to_ignore), files_to_ignore[:4])
480
481
        filenames = []
482
        for i, f in enumerate(input_files):
0 ignored issues
show
introduced by
The variable input_files does not seem to be defined in case __name__ == '__main__' on line 548 is False. Are you sure this can never be the case?
Loading history...
483
            # print(i, f)
484
            if '/_' in f:  # skip
485
                continue
486
            if os.path.basename(f) not in files_to_ignore:
487
                filenames.append(f)
488
489
        with open('_mq_to_run_.txt', 'w') as f:
490
            f.write('\n'.join(filenames))
491
        print(' save filenames to run to _mq_to_run_.txt')
492
493
        ## for fi in files_to_ignore:
494
        ##     for fn in copy.copy(filenames):
495
        ##         if os.path.basename(fn).startswith('._'):
496
        ##             filenames.remove(fn)
497
        ##         if os.path.basename(fn).startswith(fi.split('\t')[0]): # # hack,  @todo <- re could be used here!  to ignore ['fn,RASP,SimRNA,FARNA,NAST_pyro\r', '1ykv_1_ba_c.pdb,-0.104705,-504.468933,-306.245,122.7\r', '2esj_1_ba_c.pdb,-0.1522,-1,-266.217,46.7\r', '2quw_1_ba_c.pdb,-0.103789,-729.386726,-419.047,984.0\r
498
        ##             filenames.remove(fn)
499
        print(' files to analyze: %s' % len(filenames), filenames[:5])
500
        ## headers
501
        methods_to_print = copy.copy(methods)
502
        if opt.native_pdb_filename:
503
            methods_to_print += ['RMSDALL']
504
        if opt.mqapscore:
505
            methods_to_print += ['SCORE']
506
507
        ## if verbose: print ''.ljust(80), ''.join([m[:9].ljust(10) for m in methods_to_print]) ## print headers
508
509
        sg.phr()
510
511
        lock = Lock()
512
        
513
        counter.value = len(files_to_ignore)
514
515
        flist = []
516
        c  = 1
517
        # two running modes
518
        global filename_length
519
        filenames_length = len(filenames) + len(files_to_ignore)
520
521
        global bar
522
        bar = progressbar.ProgressBar(max_value=filenames_length)
523
        bar.update(len(files_to_ignore))
524
525
        fl = []
526
        for f in filenames:
527
            fl.append([f,filenames_length])
528
529
        lst = []
530
        for f in fl:
531
            # ['test/1xjrA_M1.pdb', 1, True, ['RASP']]
532
            lst.append([f[0], f[1], verbose, methods, opt, ref_seq])
533
534
        if int(opt.number_processes) > 1:
535
            pool = Pool(opt.number_processes)
536
            from tqdm.contrib.concurrent import process_map
537
            #pool.map(single_run, lst)
538
            outputs = process_map(single_run, lst, max_workers=2)
539
            pool.close()
540
541
            for cells in outputs:
542
                csv_writer.writerow(cells)
543
        else:
544
            for l in lst:
545
                single_run(l)
546
547
#main
548
if __name__ == '__main__':
549
    from icecream import ic
550
    import sys
551
    ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
552
    ic.configureOutput(prefix='> ')
553
554
    t = timex.Timex()
555
    t.start()
556
557
    arguments, opt = option_parser()
558
559
    # files
560
    input_files = arguments[:]
561
    if opt.list_of_files:
562
       for l in open(opt.list_of_files):
563
           input_files.append(l.strip())
564
    #ic(input_files)
565
    
566
    if not opt.methods:
567
       opt.methods = ','.join(Config.METHOD_LIST)
568
569
    if opt.no_filename_version:
570
        output_csv = opt.output
571
    else:
572
        import platform
573
        platform = platform.node()
574
        if opt.output:
575
            output_csv = opt.output.replace('.csv','') + '-' + __version__ + '-' + platform + '.csv'
576
        else:
577
            output_csv = opt.methods + '-' + __version__  + '-' + platform + '.csv'
578
579
    sg.pbanner_simply(os.path.basename(sys.argv[0]))
580
581
    try:
582
        rnakb_option = Config.WRAPPER_OPTIONS['RNAkb'][0]
583
    except KeyError:
584
        rnakb_option = None
585
    try:
586
        rasp_option = Config.WRAPPER_OPTIONS['RASP'][0]
587
    except KeyError:
588
        rasp_option = None
589
590
    if opt.methods:
591
        methods = [x.strip() for x in opt.methods.split(',')]
592
593
    print('ver:',  __version__ + '\n')
594
    print('start ', time.strftime("%Y-%m-%d %H:%M:%S"))
595
    
596
    opts = {
597
        'Input files': '#' + str(len(input_files)) + ' ' + str(input_files[:3]),
598
        'Multiprocessing': True if opt.number_processes > 1 else False,
599
        'Output csv': output_csv,
600
        'Seq ss fn': opt.seq_ss_filename,
601
        'Ignore pdb fn': opt.ignore_pdb_filename,
602
        'Native pdb': opt.native_pdb_filename,
603
        'RNAkb' : rnakb_option,
604
        'RASP' : rasp_option,
605
     #   'rmsd' : rmsd_calc.RMSD_DEFAULT_METHOD,
606
        'Model path' : Config.ML_MODEL_PATH,
607
        'Methods' : ','.join(methods),
608
        'Verbose' : opt.verbose,
609
    }
610
    sg.poptions(opts)
611
612
    runner = RunAllDirectory()
613
    runner.run(input_files, output_csv, opt)
614
    # meta-scoring
615
    #output_csv = "test_data/1xjr_m500_m1.csv"
616
    #mqs.do_scoring(output_csv)
617
618
    log = t.end('process: %i' % opt.number_processes)
619
    print('\n', log)
620
    print('Output: %s \n' % output_csv)
621
    ## log
622
    log_fn = output_csv.replace('.csv', '.log')
623
    f = open(log_fn, 'w')
624
    f.write(log + '\n')
625
    f.write(str(opts) + '\n')
626
    f.write('Output: %s\n' % output_csv)
627
    f.close()
628
    print('logging: %s' % log_fn)
629
    print('logging wrappers %s' % Config.LOG_DIRECTORY + os.sep)
630
631
    #with open(output_csv) as f:
632
    #    print(f.read())
633