build.rna_tools.tools.mq.RNAkb.RNAkb   B
last analyzed

Complexity

Total Complexity 45

Size/Duplication

Total Lines 347
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 221
dl 0
loc 347
rs 8.8
c 0
b 0
f 0
wmc 45

4 Methods

Rating   Name   Duplication   Size   Complexity  
B RNAkb._get_result() 0 21 6
A RNAkb.log_stdout_stderr() 0 7 1
A RNAkb.__init__() 0 12 3
F RNAkb.run() 0 252 35

How to fix   Complexity   

Complexity

Complex classes like build.rna_tools.tools.mq.RNAkb.RNAkb often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""This module contains functions for computing RNAkb potential
4
5
It seems that this is impossible to run RNAkb in full atom mode. So this works only in 5 pt (5 points/atom per residue) mode.
6
7
https://gromacs.bioexcel.eu/t/fatal-error-an-input-file-contains-a-line-longer-than-4095-characters/1397/2
8
"""
9
import os
10
import sys
11
import re
12
13
from math import isnan
14
from shutil import copyfile
15
16
from rna_tools.tools.mq.lib.wrappers.SubprocessUtils import run_command
17
from rna_tools.tools.mq.lib.wrappers.base_wrappers import ProgramWrapper
18
from rna_tools.tools.md.rnakb_utils import make_rna_rnakb_ready, fix_gromacs_gro, prepare_groups, format_score_mdp
19
from rna_tools.rna_tools_config import GRMLIB_PATH, DB_AA_PATH, DB_5PT_PATH, WRAPPERS_PATH, GROMACS_LD_PATH, GROMACS_PATH_BIN, TMP_PATH
20
21
import tempfile
22
23
class RNAkb(ProgramWrapper):
24
    """Wrapper class for running RNAkb automatically.
25
    """
26
    executable = ['/usr/local/gromacs/bin/' + exe for exe in ['pdb2gmx', 'make_ndx', 'editconf', 'grompp', 'mdrun']]
27
28
    def __init__(self, job_id='',  sandbox=False):
29
        SANDBOX_PATH = tempfile.TemporaryDirectory().name # '.' or './sandbox' for sandbox here
30
        super(RNAkb, self).__init__('', '', job_id=job_id)
31
32
        # sandbox q-&-d
33
        if sandbox:
34
            try:
35
                os.mkdir(SANDBOX_PATH)
36
            except:
37
                pass
38
            # os.system('rm ' + SANDBOX_PATH + '/*')
39
            self.sandbox_dir= SANDBOX_PATH
40
41
    def log_stdout_stderr(self):
42
        stdout_file = open(self.sandbox_dir + os.sep + 'out.txt')
43
        stderr_file = open(self.sandbox_dir + os.sep + 'err.txt')
44
        stdout = stdout_file.read()
45
        stderr = stderr_file.read()
46
        self.log('STDOUT:\n' + stdout, 'debug')
47
        self.log('STDERR:\n' + stderr, 'debug')
48
49
    def _get_result(self, md_file):
50
        """Format results as a dictionary."""
51
        log_file = open(md_file)
52
        lines = log_file.readlines()
53
        log_file.close()
54
        res = []
55
        append = False
56
        headline = None
57
        for l in lines:
58
            if append:
59
                res.append(l)
60
            if 'LJ-SR' in l:
61
                headline = l
62
                append = True
63
            if 'other-other' in l:
64
                break
65
        res_index = headline.split().index('LJ-SR') - 1
66
        res_dict = {}
67
        for l in res:
68
            res_dict[l.split()[0]] = l.split()[res_index]
69
        return res_dict
70
71
72
    def run(self, name, potential_type, verbose=False):
73
        """Compute RNAkb potential for a single file
74
        
75
        Args:
76
77
           path (str): name of a PDB file
78
           potential_type (str): '5pt' for 5 point or 'aa' for all atom aa is off
79
80
        Returns:
81
82
           a list of energies (strings) ['2.57116e+05', '1.62131e+03', '7.82459e+02', '3.00789e-01', '0.00000e+00', '0.00000e+00', '-2.51238e-03', '0.00000e+00', '2.59520e+05', '2.54174e-09', '2.59520e+05']
83
84
        .. warning:: 'aa' does not work because of "a line longer than 4095 characters"
85
86
        The function parses::
87
88
            Statistics over 1 steps using 1 frames
89
            Energies (kJ/mol)
90
            Bond          Angle    Proper Dih.  Improper Dih.          LJ-14
91
            2.44111e+05    1.76069e+03    8.12947e+02    1.82656e-01    0.00000e+00
92
            Coulomb-14        LJ (SR)   Coulomb (SR)      Potential    Kinetic En.
93
            0.00000e+00   -1.94624e-03    0.00000e+00    2.46685e+05    2.43227e-09
94
            Total Energy    Temperature Pressure (bar)
95
            2.46685e+05    6.67884e-10   -5.94232e+04
96
            Total Virial
97
98
            # ['Statistics', 'over', '1', 'steps', 'using', '1', 'frames', 'Energies', '(kJ/mol)', 
99
            'Bond', 'Angle', 'Proper', 'Dih.', 'Improper', 'Dih.', 'LJ-14', '2.44111e+05', '1.76069e+03', 
100
            '8.12947e+02', '1.82656e-01', '0.00000e+00', 'Coulomb-14', 'LJ', '(SR)', 'Coulomb', '(SR)',
101
            'Potential', 'Kinetic', 'En.', '0.00000e+00', '-1.94624e-03', '0.00000e+00', '2.46685e+05', 
102
            '2.43227e-09', 'Total', 'Energy', 'Temperature', 'Pressure', '(bar)', '2.46685e+05', '6.67884e-10', 
103
            '-5.94232e+04', 'Total', 'Virial']
104
105
            result[6] is really the RNAkb
106
107
        """
108
        prep = False
109
110
        self.sandbox_dir = self.sandbox_dir + os.sep
111
112
        self.log('Run %s' % name)
113
        if verbose: print('input: ', name, '\n', '-' * 80)
114
        copyfile(name, self.sandbox_dir + 'query.pdb')
115
116
        f = open(name)
117
        pdb_string = f.read()
118
        f.close()
119
        old_pdb = pdb_string
120
        pdb_string = make_rna_rnakb_ready(pdb_string)
121
        if old_pdb != pdb_string:
122
            self.pdb_fixes.append('make_rna_rnakb_ready')
123
124
        fn = self.sandbox_dir + 'query.pdb'
125
        if verbose: print('Gromacs ready file created: %s' % fn)
126
        f = open(fn, 'w')
127
        f.write(pdb_string)
128
        f.close()
129
130
        try:
131
            old_path = os.getcwd()
132
        except OSError:
133
            pass
134
135
        os.chdir(self.sandbox_dir)
136
        my_env = os.environ.copy()
137
138
        if potential_type == 'aa':
139
            my_env['GRMLIB'] = GRMLIB_PATH + ':' + DB_AA_PATH
140
            my_env['GMXLIB'] = GRMLIB_PATH + ':' + DB_AA_PATH
141
        elif potential_type == '5pt':
142
            my_env['GRMLIB'] = GRMLIB_PATH + ':' + DB_5PT_PATH
143
            my_env['GMXLIB'] = GRMLIB_PATH + ':' + DB_5PT_PATH
144
        else:
145
            raise Exception('RNAkb -- select aa or 5pt')
146
147
        if verbose:
148
            print('ff_aa:', GRMLIB_PATH + ':' + DB_AA_PATH)
149
            print('ff_5pt:', GRMLIB_PATH + ':' + DB_5PT_PATH)
150
            
151
        my_env['GMX_MAXBACKUP'] = '-1'
152
        my_env['LD_LIBRARY_PATH'] = GROMACS_LD_PATH
153
154
        # create topology files
155
        self.log('create topology files')
156
        # pdb2gmx
157
        out = run_command(self.executable[0], # '-v',
158
                       ['-quiet', '-ff', 'amber03', '-water', 'none' , '-f', self.sandbox_dir + 'query.pdb', '-o', self.sandbox_dir + 'query.gro',
159
                        '-p', self.sandbox_dir + 'query.top',
160
                        ],
161
                       env=my_env,
162
                       cmd_input='1\n6\n',
163
                       stdout_file=self.sandbox_dir + os.sep + 'out.txt',
164
                       stderr_file=self.sandbox_dir + os.sep + 'err.txt',
165
                       verbose=verbose,
166
                       )
167
        if out:
168
            print(out)
169
            self.log('Run failed')
170
            self.log_stdout_stderr()
171
            os.chdir(old_path)
172
            return -1
173
174
        # make index files
175
        fix_gromacs_gro(self.sandbox_dir + os.sep + 'query.gro')
176
        self.log('make index files')
177
        command = 'GRMLIB=' + my_env['GRMLIB'] + ' '
178
        command += 'GMXLIB=' + my_env['GMXLIB'] + ' '
179
        command += 'LD_LIBRARY_PATH=' + my_env['LD_LIBRARY_PATH'] + ' '
180
181
182
        # generate sandbox/groups.txt @groups
183
        gr = self.sandbox_dir + 'groups.txt'
184
        gtxt, energygrps, seq_uniq = prepare_groups(self.sandbox_dir + 'query.pdb', gr, potential=potential_type)
185
        if verbose: print('Groups file created: %s' % gr)
186
187
        # generate sandbox/score.mdp @score
188
        fout = self.sandbox_dir + 'score.mdp'
189
190
        if potential_type == 'aa': # ugly
191
            #format_score_mdp_aa(fout, energygrps, seq_uniq)
192
            format_score_mdp(fout, energygrps, seq_uniq)
193
        else:
194
            format_score_mdp(fout, energygrps, seq_uniq)
195
196
        command += self.executable[1] + ' -f query.gro > /dev/null 2> /dev/null < ' + gr
197
        #command += self.executable[1] + ' -f query.gro <' + gr
198
        bash_script = "#!/bin/bash\n" + command + "\n"
199
        with open(self.sandbox_dir + os.sep + 'make_index.sh', 'w') as f:            
200
            f.write(bash_script)
201
        cmd = 'bash ' + self.sandbox_dir + 'make_index.sh'# >/dev/null'
202
        if verbose: print('index cmd:', cmd)
203
        os.system(cmd)
204
205
        ## energy computation
206
        self.log('prepare energy computation')
207
        run_error = run_command(self.executable[3], # grompp
208
                       ['-noh', '-c', self.sandbox_dir + 'query.gro', '-p', self.sandbox_dir + 'query.top',
209
                        '-time', '1',
210
                        '-f', self.sandbox_dir + 'score.mdp',
211
                        '-n', self.sandbox_dir + 'index.ndx',
212
                        '-o', self.sandbox_dir + 'run.tpr', '-maxwarn', '15'],
213
                       env=my_env,
214
                       stdout_file=self.sandbox_dir + 'out.txt',
215
                       stderr_file=self.sandbox_dir + 'err.txt',
216
                       verbose=verbose,
217
                       )
218
        if not run_error:
219
            if verbose: print('prepare energy computation: OK', run_error)
220
        else:
221
            if verbose: print('prepare compute energy: FAIL, try to increase the box')
222
            # something went wrong, probably following lines will fix it
223
            f = open(self.sandbox_dir + os.sep + 'query.gro')
224
            box_size = f.readlines()[-1].split()
225
            f.close()
226
227
            box_size = [float(i) for i in box_size]
228
            max_size = 10
229
230
            for mult in range(5, max_size):
231
                ## increase box size
232
                if verbose: print('> Increase box size %d times' % mult)
233
                self.log('Increase box size %d times' % mult)
234
235
                run_error = run_command(self.executable[2], # editconf
236
                            ['-f', 'query.gro', '-o', 'query_big.gro',
237
                            '-box', ' '.join([str(i*mult) for i in box_size])],
238
                            env=my_env,
239
                            stdout_file=self.sandbox_dir + os.sep + 'out.txt',
240
                            stderr_file=self.sandbox_dir + os.sep + 'err.txt'
241
                            )
242
243
                if run_error:
244
                    self.log('Run failed', 'error')
245
                    self.log_stdout_stderr()
246
                    os.chdir(old_path)
247
                else:
248
                    if verbose: print('Increase box size: OK')
249
250
                # prepare energy computation again
251
                run_error =  run_command(self.executable[3], # grompp
252
                           ['-c', 'query_big.gro', '-p', 'query.top',
253
                            '-f', self.sandbox_dir + 'score.mdp',
254
                            '-n', self.sandbox_dir + 'index.ndx',
255
                            '-o', 'run.tpr', '-maxwarn', '15'],
256
                            env=my_env,
257
                            stdout_file=self.sandbox_dir + os.sep + 'out.txt',
258
                            stderr_file=self.sandbox_dir + os.sep + 'err.txt',
259
                            verbose=verbose)
260
261
                if run_error:
262
                    if verbose: print('Grompp: FAIL')
263
                    with open(self.sandbox_dir + os.sep + 'err.txt') as f:
264
                        txt = f.read()
265
                        if txt.find('referenced in the') > -1:
266
                            if verbose: print(txt) #print 'Group X referenced -- error'
267
                            return -2 # leave the loop
268
                        elif txt.find('The cut-off length is longer') > -1:
269
                            if verbose: print('The cuf-off length -- error')
270
                        else:
271
                            if verbose: print('unknown -- error, see ' + self.sandbox_dir + 'err.txt')
272
                            #sys.exit(1) # don't kill the function
273
                            pass
274
275
                    self.log_stdout_stderr()
276
                else:
277
                    if verbose: print('Grompp: OK')
278
                    break # leave the loop!
279
280
                if not os.path.exists(self.sandbox_dir + os.sep + 'run.tpr'):
281
                    self.log('Run failed %s' % name, 'error')
282
                    self.log_stdout_stderr()
283
                    os.chdir(old_path)
284
285
        self.log('compute energy')
286
        run_error = run_command(self.executable[4],
287
                       ['-table', 'table.xvg', '-tablep', 'tablep.xvg',
288
                        '-nt', '1',
289
                        '-s', self.sandbox_dir + 'run.tpr', '-e', self.sandbox_dir + 'run.edr'],
290
                       env=my_env,
291
                       stdout_file=self.sandbox_dir + os.sep + 'out.txt',
292
                       stderr_file=self.sandbox_dir + os.sep + 'err.txt',
293
                       verbose=verbose,
294
                       )
295
        if not run_error:
296
            if verbose: print('compute energy: OK', run_error)
297
        else:
298
            if verbose: print('compute energy: FAIL', run_error)
299
            self.log('Run failed', 'error')
300
            self.log_stdout_stderr()
301
            os.chdir(old_path)
302
            #return 255
303
            return [str(x) for x in [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]]
304
        try:
305
            os.chdir(old_pwd)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable old_pwd does not seem to be defined.
Loading history...
306
        except (OSError, NameError):
307
            pass
308
309
        # extract result and change format
310
        result = self._get_result(self.sandbox_dir + os.sep + 'md.log')
311
        result_sum = sum([float(result[k]) for k in result])
312
313
        md_txt = open(self.sandbox_dir + os.sep + 'md.log').read()
314
        energies_txt = re.search('Statistics over.*Total Virial', md_txt, flags=re.MULTILINE|re.DOTALL).group(0)
315
        energies = energies_txt.split()[16:21]+energies_txt.split()[29:34]+[energies_txt.split()[39]]
316
        if isnan(result_sum):
317
            self.log('Result is NaN', 'error')
318
            os.chdir(old_path)
319
            return [str(x) for x in [-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1]]
320
        else:
321
            self.log('Run successfull %s' % name)
322
            os.chdir(old_path)
323
            return energies
324
            #return result_sum != 0 and result_sum or -1
325
326
if __name__ == '__main__':
327
    wrapper = RNAkb(sandbox=True)
328
329
    fn = '1dqf_noCA.pdb' # problem, C is shifted to the right
330
    fn = '1dqf_noCA+ResnShift.pdb' # OK
331
    #fn = '1msy_output4_01-000001_AA.pdb' # OK
332
    #fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clean_noC.pdb' # OK
333
    #fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clean_error_fx.pdb'
334
    #fn = 'cat_chunk003_2r8s_5kcycles_decoys_nonativefrags.cluster1.0_clx.pdb'
335
    #fn = 'C.pdb' 
336
337
    # 5pt
338
    if False:
339
        result = wrapper.run('test_data'
340
                             + os.sep + fn , '5pt', verbose=False) 
341
        print(fn, result)
342
        #wrapper.cleanup()
343
344
    # all
345
    result = wrapper.run('test_data' + os.sep + fn , '5pt', verbose=False) # or aa
346
    print(fn, result[6])
347
    #wrapper.cleanup()
348