Passed
Push — master ( df8bd2...5940fa )
by Marcin
06:11
created

build.rna_tools.tools.rna_calc_rmsd.rna_calc_rmsd_pymol   A

Complexity

Total Complexity 16

Size/Duplication

Total Lines 336
Duplicated Lines 34.52 %

Importance

Changes 0
Metric Value
eloc 182
dl 116
loc 336
rs 10
c 0
b 0
f 0
wmc 16

5 Functions

Rating   Name   Duplication   Size   Complexity  
B calc_rmsd() 45 45 5
A calc_rmsd_pymol() 0 83 3
A get_rna_models_from_dir() 0 17 2
A sort_nicely() 0 9 4
B get_parser() 71 71 2

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
#!/Applications/PyMOL3.app/Contents/bin/python3
2
#-*- coding: utf-8 -*-
3
4
"""rna_calc_rmsd_pymol.py - calculate RMSDs of structures with PyMOL
5
6
"""
7
from __future__ import print_function
8
9
import warnings
10
from Bio import BiopythonDeprecationWarning
11
warnings.filterwarnings("ignore", category=BiopythonDeprecationWarning)
12
13
import sys
14
import argparse
15
import sys
16
import math
17
import glob
18
import re
19
import os
20
21
try:
22
    import __main__
23
    __main__.pymol_argv = ['pymol', '-qc']
24
    import pymol  # import cmd, finish_launching
25
26
    from pymol import cmd
27
    #cmd.feedback("disable")
28
    cmd.set("logging", 0)  # optional; turns off logging
29
    cmd.set("suspend_updates", 1)  # optional; disables GUI redraws (if in GUI)
30
    pymol.finish_launching()
31
32
except ImportError:
33
    print('calc_rmsd_pymol: you need to have installed PyMOL')
34
    sys.exit(0) # no error
35
36
37
38
def get_rna_models_from_dir(files):
39
    """
40
    :param models: a list of filenames
41
42
    Example of the list::
43
44
       ['test_data/rp17/2_restr1_Michal1.pdb_clean.pdb', 'test_data/rp17/2a_nonrestr2_Michal1.pdb_clean.pdb',
45
       'test_data/rp17/3_nonrestr1_Michal1.pdb_clean.pdb', 'test_data/rp17/5_restr1_Michal3.pdb_clean.pdb']"""
46
47
    models = []
48
    #if not os.path.exists(directory):
49
    #    raise Exception('Dir does not exist! ', directory)
50
    #files = glob.glob(directory + "/*.pdb")
51
    files_sorted = sort_nicely(files)
52
    for f in files_sorted:
53
        models.append(f)
54
    return models
55
56
def sort_nicely( l ):
57
   """ Sort the given list in the way that humans expect.
58
59
   http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
60
   """
61
   convert = lambda text: int(text) if text.isdigit() else text
62
   alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
63
   l.sort( key=alphanum_key )
64
   return l
65
66
def calc_rmsd_pymol(pdb1, pdb2, method, verbose=False):
67
    """Calculate rmsd using PyMOL. Two methods are available: align and fit
68
69
    See:
70
71
    -  Align: <http://www.pymolwiki.org/index.php/Align>
72
    -  Fit:   <http://www.pymolwiki.org/index.php/Fit>
73
74
    Align can return a list with 7 items:
75
76
    - RMSD after refinement
77
    - Number of aligned atoms after refinement
78
    - Number of refinement cycles
79
    - RMSD before refinement
80
    - Number of aligned atoms before refinement
81
    - Raw alignment score
82
    - Number of residues aligned
83
84
    in this version of function, the function returns `RMSD before refinement`.
85
86
    Install on OSX: ``brew install brewsci/bio/pymol`` or get 
87
88
    If you have a problem::
89
90
      Match-Error: unable to open matrix file '/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/data/pymol/matrices/BLOSUM62'.
91
92
    then find BLOSUM62, e.g.::
93
94
        mdfind -name BLOSUM62 | grep pymol
95
        /Users/magnus/miniconda2/envs/py37/lib/python3.7/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62
96
        /usr/local/Cellar/pymol/2.4.0_3/libexec/lib/python3.9/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62
97
        /Users/magnus/miniconda2/pkgs/pymol-2.4.2-py37h06d7bae_0/share/pymol/data/pymol/matrices/BLOSUM62
98
        /Users/magnus/work/opt/pymol-open-source/data/pymol/matrices/BLOSUM62
99
100
    and then define ``PYMOL_DATA`` in your .bashrc/.zshrc, e.g.::
101
102
       export PYMOL_DATA="/Users/magnus/work/opt/pymol-open-source/data/pymol"
103
104
     """
105
106
    pymol.cmd.reinitialize()
107
    pymol.cmd.delete('all')
108
    pymol.cmd.load(pdb1, 's1')
109
    pymol.cmd.load(pdb2, 's2')
110
111
    if method == 'align':
112
        # experiments with align <https://pymolwiki.org/index.php/Align>
113
        # quiet = 0/1: suppress output {default: 0 in command mode, 1 in API}
114
        # (4.130036354064941, 60, 3, 4.813207626342773, 64, 30.0, 3)
115
        values = pymol.cmd.align('s1', 's2',quiet=1, object='aln')
116
117
        from Bio import pairwise2
118
119
        def get_sequence(obj):
120
            """Extract 1-letter FASTA sequence from PyMOL object"""
121
            fasta = cmd.get_fastastr(obj)
122
            lines = fasta.splitlines()
123
            return ''.join(lines[1:])  # Skip the >header line
124
125
        def calc_sequence_identity(seq1, seq2):
126
            """Calculate global sequence identity"""
127
            alignments = pairwise2.align.globalxx(seq1, seq2)
128
            best = alignments[0]
129
            matches = sum(a == b for a, b in zip(best.seqA, best.seqB))
130
            return matches / max(len(seq1), len(seq2))
131
132
        seq1 = get_sequence("s1")
133
        seq2 = get_sequence("s2")
134
        print(seq1, seq2)
135
        
136
        identity = calc_sequence_identity(seq1, seq2)
137
        print(f"Sequence identity: {identity:.2%}")
138
139
        print(values)
140
        return values[0], values[3], identity # (,#0) #, pymol.cmd.align('s1','s2')[4])
141
        #raw_aln = pymol.cmd.get_raw_alignment('aln')
142
        #print raw_aln
143
        #for idx1, idx2 in raw_aln:
144
        #    print '%s`%d -> %s`%d' % tuple(idx1 + idx2)
145
        #pymol.cmd.save('aln.aln', 'aln')
146
147
    if method == 'fit':
148
        return (pymol.cmd.fit('s1', 's2'), 'fit')
149
150 View Code Duplication
def calc_rmsd(a, b, target_selection, target_ignore_selection, model_selection, model_ignore_selection, way, verbose):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
151
    """
152
    Calculate RMSD between two XYZ files
153
154
    by: Jimmy Charnley Kromann <[email protected]> and Lars Andersen Bratholm <[email protected]>
155
    project: https://github.com/charnley/rmsd
156
    license: https://github.com/charnley/rmsd/blob/master/LICENSE
157
158
    a is model
159
    b is target
160
161
    :params: a = filename of structure a
162
    :params: b = filename of structure b
163
164
    :return: rmsd, number of atoms
165
    """
166
    if verbose: print('in:', a)
167
168
    atomsP, P, atoms = get_coordinates(a, model_selection, model_ignore_selection, 'pdb', True, way)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable get_coordinates does not seem to be defined.
Loading history...
169
    atomsQ, Q, atoms = get_coordinates(b, target_selection,target_ignore_selection,  'pdb', True, way)
170
171
    if verbose:
172
        print(atomsP, P)
173
        print(atomsQ, Q)
174
175
    if atomsQ != atomsP:
176
        print('Error: number of atoms is not equal target (' + b + '):' + str(atomsQ) + ' vs model (' + a + '):' + str(atomsP))
177
        return (-1,0) # skip this RNA
178
    # Calculate 'dumb' RMSD
179
    normal_rmsd = rmsd(P, Q, atoms)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable rmsd does not seem to be defined.
Loading history...
180
    # Create the centroid of P and Q which is the geometric center of a
181
    # N-dimensional region and translate P and Q onto that center.
182
    # http://en.wikipedia.org/wiki/Centroid
183
    Pc = centroid(P)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable centroid does not seem to be defined.
Loading history...
184
    Qc = centroid(Q)
185
    P -= Pc
186
    Q -= Qc
187
188
    if False:
189
        V = rotate(P, Q)
190
        V += Qc
191
        write_coordinates(atomsP, V)
192
        quit()
193
194
    return round(kabsch_rmsd(P, Q, atoms),2), atomsP
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable kabsch_rmsd does not seem to be defined.
Loading history...
195
196 View Code Duplication
def get_parser():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
197
    import argparse
198
    class SmartFormatter(argparse.HelpFormatter):
199
        def _split_lines(self, text, width):
200
            if text.startswith('R|'):
201
                return text[2:].splitlines()  
202
            # this is the RawTextHelpFormatter._split_lines
203
            return argparse.HelpFormatter._split_lines(self, text, width)
204
205
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=SmartFormatter)#formatter_class=argparse.RawDescriptionHelpFormatter)
206
207
    parser.add_argument('-t',"--target-fn",
208
                         default='', required = True,
209
                         help="pdb file")
210
211
    parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'', default='')
212
213
    parser.add_argument("--target-selection",
214
                            default='',
215
                         help="selection, e.g. A:10-16+20, where #16 residue is included")
216
217
    parser.add_argument("--target-ignore-selection",
218
                            default='',
219
                         help="A/10/O2\'")
220
221
    parser.add_argument("--model-selection",
222
                            default='',
223
                         help="selection, e.g. A:10-16+20, where #16 residue is included")
224
225
    parser.add_argument("--model-ignore-selection",
226
                            default='',
227
                         help="A/10/O2\'")
228
229
    parser.add_argument('-m', "--method",
230
                         default='all-atom-built-in',
231
                         help="align, fit")
232
233
    parser.add_argument('-o', "--rmsds-fn",
234
                         default='rmsds.csv',
235
                         help="ouput, matrix")
236
237
    parser.add_argument("-v", "--verbose", action="store_true",
238
                        help="verbose")
239
240
    parser.add_argument('-pr', '--print-results',
241
                         action="store_true")
242
243
    parser.add_argument('-sr', '--sort-results',
244
                         action="store_true")
245
246
    parser.add_argument('-pp', '--print-progress',
247
                         default=False,
248
                         action="store_true")
249
250
    parser.add_argument('--way', help="""R|c1p = C1'
251
backbone = P OP1 OP2 O5' C5' C4' C3' O3'
252
po = P OP1 OP2
253
no-backbone = all - po
254
bases, backbone+sugar, sugar
255
pooo = P OP1 OP2 O5'
256
alpha = P OP1 OP2 O5' C5'
257
""", default='all')
258
259
    parser.add_argument("--name-rmsd-column", help="default: fn,rmsd, with this cols will be fn,<name-rmsd-column>")
260
261
    parser.add_argument("--target-column-name", action="store_true",
262
                        help="")
263
264
    parser.add_argument('files', help='files', nargs='+')
265
266
    return parser
267
# main
268
if __name__ == '__main__':
269
    parser = get_parser()
270
    args = parser.parse_args()
271
272
    input_files = args.files  # opts.input_dir
273
    tmp = []
274
    if args.ignore_files:
275
        for f in input_files:
276
            if args.ignore_files in f:
277
                continue
278
            tmp.append(f)
279
        input_files = tmp
280
281
    rmsds_fn = args.rmsds_fn
282
    target_fn = args.target_fn
283
    method = args.method
284
        
285
    if args.verbose:
286
        print('method:', method)
287
288
    models = get_rna_models_from_dir(input_files)
289
290
    if args.verbose:
291
        print('target:', target_fn)
292
        print('of models:', len(models))
293
294
    f = open(rmsds_fn, 'w')
295
    #t = 'target:' + os.path.basename(target_fn) + ' , rmsd_all\n'
296
    if args.name_rmsd_column:
297
        t = 'fn,' + args.name_rmsd_column + '\n'
298
    elif args.target_column_name:
299
        t = 'fn,' + os.path.basename(args.target_fn) + '\n'
300
    else:
301
        t = 'fn,rmsd_all,seq_identity\n'
302
303
    if not args.verbose:
304
        t = ''
305
    c = 1
306
    for r1 in models:
307
        if method == 'align' or method == 'fit':
308
            rmsd_curr, atoms, seq_identity = calc_rmsd_pymol(r1, target_fn, method, args.verbose)
309
        r1_basename = os.path.basename(r1)
310
        if args.print_progress: print(r1_basename, rmsd_curr, atoms)
0 ignored issues
show
introduced by
The variable rmsd_curr does not seem to be defined for all execution paths.
Loading history...
introduced by
The variable atoms does not seem to be defined for all execution paths.
Loading history...
311
        t += r1_basename + ',' + str(round(rmsd_curr,3)) + ',' + str(seq_identity)
0 ignored issues
show
introduced by
The variable seq_identity does not seem to be defined for all execution paths.
Loading history...
312
        c += 1
313
        t += '\n'
314
315
    f.write(t)
316
    f.close()
317
318
    if args.verbose:
319
        print('number of atoms used:', atoms)
320
321
    try:
322
        import pandas as pdx
323
        pd.set_option('display.max_rows', 1000)
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable pd does not seem to be defined.
Loading history...
324
325
    except:
326
        print(t.strip()) # matrix
327
        sys.exit(0)
328
        
329
    df = pd.read_csv(rmsds_fn)
330
    df = df.round(2)
331
    if args.sort_results:
332
        df = df.sort_values('rmsd_all', ascending=True)
333
    if args.print_results:
334
        print(df)
335
    df.to_csv(rmsds_fn, sep=',', index=False)  # easy to set \t here!
336
337
    # print('# csv was created! ', rmsds_fn)
338