triplexibility2.RNAmodel.get_rmsd_to()   F
last analyzed

Complexity

Conditions 29

Size

Total Lines 169
Code Lines 104

Duplication

Lines 69
Ratio 40.83 %

Importance

Changes 0
Metric Value
cc 29
eloc 104
nop 6
dl 69
loc 169
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

Complexity

Complex classes like triplexibility2.RNAmodel.get_rmsd_to() often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
4
"""
5
v2 is full atom!
6
7
Examples::
8
9
  triplexibility.py -t 2nd_triplex_FB_ABC.pdb  triples-all-v2-rpr/*.pdb --save --result abc.csv
10
11
"""
12
from __future__ import print_function
13
14
import Bio.PDB.PDBParser
15
import Bio.PDB.Superimposer
16
import copy
17
import warnings
18
warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
19
warnings.filterwarnings('ignore', '.*with given element *',)
20
warnings.filterwarnings('ignore', '.*is discontinuous*',)
21
warnings.filterwarnings('ignore', '.*Ignoring unrecognized record*',)
22
23
import sys
24
from icecream import ic
25
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
26
ic.configureOutput(prefix='> ')
27
28
from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
29
from rna_tools.rna_tools_config import RT
30
31
import argparse
32
import sys
33
import glob
34
import re
35
import os
36
37
class RNAmodel:
38
    def __init__(self, fpath):
39
        # parser 1-5 -> 1 2 3 4 5
40
        self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
41
        self.__get_atoms()
42
        self.fpath = fpath
43
        self.fn = os.path.basename(fpath)
44
        # self.atoms = []
45
        # if save:
46
        #    self.save() # @save
47
48
    def __get_atoms(self):
49
        self.atoms = [[], [], []] # this will be only 3 resides
50
        # ugly but should work
51
        tseq = ''
52
        for index, res in enumerate(self.struc.get_residues()):
53
            self.atoms[index] = []
54
            for at in res:
55
                self.atoms[index].append(at)
56
            tseq += res.get_resname()
57
            ## # calc rmsd #
58
            ## if not tseq:
59
            ##     tseq = ''
60
            ##     rt = None
61
            ##     for a in self.atoms_for_rmsd:
62
            ##         r = a.get_parent()
63
            ##         if r != rt:
64
            ##             rt = r
65
            ##             tseq += r.get_resname().strip()
66
        self.tseq = tseq
67
        return self.atoms
68
69
    def __str__(self):
70
        return self.fn #+ ' # beads' + str(len(self.residues))
71
72
    def __repr__(self):
73
        return self.fn #+ ' # beads' + str(len(self.residues))
74
75
    def get_report(self):
76
        """Str a short report about rna model""" 
77
        t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
78
        for r,a in zip(self.residues, self.atoms ):
79
            t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
80
        return t
81
82
    def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
83
        """Calc rmsd P-atom based rmsd to other rna model
84
85
        sugar now 10 atoms ;-) """
86
        sup = Bio.PDB.Superimposer()
87
88
        other_atoms_for_rmsd = other_rnamodel.atoms
89
        
90
        if triple_mode:
91
            def chunks(lst, n):
92
                """Yield successive n-sized chunks from lst.
93
                https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
94
                """
95
                for i in range(0, len(lst), n):
96
                    yield lst[i:i + n]
97
98
            rmsd_min = 10000 # ugly
99
            import itertools
100
            # ok, for different residues now it's a problem  
101
            per = list(itertools.permutations([0, 1, 2])) # [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]
102
            #lst = list(chunks(other_atoms_for_rmsd,
103
            #                  int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
104
            #           # so so len is 3 atoms so / by 3 to get how many atoms per residue
105
106
            self.atoms_for_rmsd = []
107
            for i in self.atoms:
108
                self.atoms_for_rmsd.extend(i)
109
        
110
            sup_min = None
111
            seq_min = 'not yet obtained, rmsd rejected!'
112
            p_min = None
113
114
            rms = -1
115
116
            for p in per: # for each combo make a list of atoms
117
                
118
                patoms = []
119
                for i in p: # p=(1, 2, 3)
120
                    #patoms.extend(lst[i])
121
                    # list is not needed, just other_atoms !!!! it's a list of [1, 2, 3] residues
122
                    patoms.extend(other_atoms_for_rmsd[i])
123
124
                #print(self.atoms_for_rmsd)
125
                ## print('patoms')
126
                ## for a in patoms:
127
                ##     print(a, a.get_parent().get_id())
128
                ## print('self.atoms_for_rmsd')
129
                ## for a in self.atoms_for_rmsd:
130
                ##     print(a, a.get_parent().get_id())
131
                #sup.set_atoms(patoms, self.atoms_for_rmsd)
132
133
                rt = None
134
                seq = ''
135
                for a in patoms:
136
                    r = a.get_parent()
137
                    if r != rt:
138
                        rt = r
139
                        seq += r.get_resname().strip()
140
             
141
                ic(self.tseq.lower(), seq.lower(), other_rnamodel.fpath)
142
                # dont' even calc rmsd if the curr seq and tseq are not the same
143
                if self.tseq.lower() == seq.lower():  # only if seq is the same
144
145
                    sup.set_atoms(self.atoms_for_rmsd, patoms)
146
                    rms = round(sup.rms, 2)
147
                    if rms < rmsd_min:
148
                            rmsd_min = rms
149
                            sup_min = copy.copy(sup)
150
                            suffix = seq
151
                            p_min = p
152
                            seq_min = seq
153
                            if args.debug: 'set new rmsd_min', rmsd_min
0 ignored issues
show
introduced by
The variable args does not seem to be defined in case __name__ == '__main__' on line 299 is False. Are you sure this can never be the case?
Loading history...
154
155
                    if args.debug:
156
                        print(p, '', [i + 1 for i in p], end=' ')
157
                        print(seq, 'seq_min: ' + seq_min, rms)
158
159 View Code Duplication
            if p_min: # found target sequence
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
160
                # what is this? ;-)
161
                index = [0 ,0 ,0]
162
                index[0] = p_min.index(0)
163
                index[1] = p_min.index(1)
164
                index[2] = p_min.index(2)
165
166
                # ugly re-set 123 to crazy id ! + 100, so this will
167
                # fill up 1 2 3 for the second for
168
                rs = other_rnamodel.struc[0].get_residues()
169
                for i, r in enumerate(rs):
170
                    r.id = (' ', index[i] + 253, ' ') # ugly, some random offset
171
172
                for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
173
                    r.id = (' ', index[i] + 1, ' ')
174
                    if args.debug: print('r', r)
175
176
                io = Bio.PDB.PDBIO()
177
                sup_min.apply(other_rnamodel.struc.get_atoms())
178
                # if args.debug: print(p_min, [i + 1 for i in p_min])
179
                io.set_structure(other_rnamodel.struc)
180
181
                args.save_here = True
182
                if args.save_here and save:
183
                    folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
184
                    # print(f)
185
                    try:
186
                        os.mkdir(folder)
187
                    except:
188
                        pass
189
                    fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
190
                    #_s' + suffix + '.pdb'))
191
                else:
192
                    fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')
193
194
                if args.debug: print(fout)
195
196
                if save:
197
                    io.save(fout)
198
                    # ugly set chain to A
199
                    set_chain_for_struc(fout, 'A', save_file_inplace=True)
200
                    # and now run this to sort into 1 2 3
201
202
                    r = RNAStructure(fout)
203
                    remarks = r.get_remarks_text()
204
                    r1 = r.get_res_text('A', 1)
205
                    r2 = r.get_res_text('A', 2)
206
                    r3 = r.get_res_text('A', 3)
207
                    with open(fout, 'w') as f:
208
                        f.write(remarks)
209
                        f.write(r1)
210
                        f.write(r2)
211
                        f.write(r3)
212
                    r.reload()
213
                    r.get_rnapuzzle_ready()
214
                    if rmsd_min < 1:  # !!!!!!!!!!!! ugly
215
                         r.write()
216
                return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
217
        else:
218
            # check if number of the same atoms #
219
            # if not the same then return -1    #
220
            # print(len(self.atoms_for_rmsd), len(other_atoms_for_rmsd))
221
            if len(self.atoms_for_rmsd) != len(other_atoms_for_rmsd):
222
                return -1
223
            sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
224
            rms = round(sup.rms, 2)
225
226
            if save:
227
                ## io = Bio.PDB.PDBIO()
228
                ## sup.apply(self.struc.get_atoms())
229
                ## io.set_structure(self.struc)
230
                ## io.save("aligned.pdb")
231
232
                io = Bio.PDB.PDBIO()
233
                sup.apply(other_rnamodel.struc.get_atoms())
234
                io.set_structure(other_rnamodel.struc)
235
236
            args.save_here = True
237 View Code Duplication
            if args.save_here and save:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
238
                f = os.path.basename(self.fpath.replace('.pdb', '_aligned'))
239
                # print(f)
240
                try:
241
                    os.mkdir(f)
242
                except:
243
                    pass
244
                # fout = f + os.sep + os.path.basename(other_rnamodel.fpath.replace('.pdb', '_aligned_s' + suffix + '.pdb'))
245
                fout = f + os.sep + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '_aligned'))
246
            else:
247
                fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')
248
            if save:
249
               io.save(fout)
0 ignored issues
show
introduced by
The variable io does not seem to be defined in case save on line 226 is False. Are you sure this can never be the case?
Loading history...
250
        return rms
251
252
    
253 View Code Duplication
def get_rna_models_from_dir(directory):
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
254
    models = []
255
    if not os.path.exists(directory):
256
        raise Exception('Dir does not exist! ', directory)
257
    files = glob.glob(directory + "/*.pdb")
258
    files_sorted = sort_nicely(files)
259
    for f in files_sorted:
260
        #print f
261
        models.append(RNAmodel(f))
262
    return models
263
264
def sort_nicely( l ):
265
   """ Sort the given list in the way that humans expect.
266
267
   http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
268
   """
269
   convert = lambda text: int(text) if text.isdigit() else text
270
   alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
271
   l.sort( key=alphanum_key )
272
   return l
273
274
275 View Code Duplication
def get_parser():
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
276
    parser = argparse.ArgumentParser(
277
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
278
279
    parser.add_argument('-t',"--target",
280
                         help="target file")
281
    parser.add_argument('--result', #default='out.rmsd',
282
                         help="result file")
283
    parser.add_argument('--ignore-files', default='aligned', help="use to ignore files, .e.g. with 'aligned'")
284
    parser.add_argument('--suffix', default='aligned', help="used with --saved, by default: aligned")
285
    parser.add_argument('--way', help="e.g., backbone+sugar, c1, c1+Nx")
286
    parser.add_argument('--triple-mode', help="same crazy strategy to align triples", default=True, action="store_true")
287
    parser.add_argument('--column-name', help="name column for rmsd, by default 'rmsd', but can also be a name of the target file", default="rmsd")
288
    parser.add_argument("-s", "--save", action="store_true", help="set suffix with --suffix, by default: aligned")
289
    parser.add_argument("--folder-prefix", default = '', help="folder name, t2-3-UAU_NAME_aligned")
290
291
    parser.add_argument("--debug", action="store_true")
292
    parser.add_argument("--show", action="store_true", help="show the results in the terminal")
293
    parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending), --result must be set up')
294
    parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
295
    parser.add_argument('--files', help='files', nargs='+', default=RT + '/rna_tools/tools/triplexibility/db/triples-all-v2-rpr/*.pdb')
296
    return parser
297
298
# main
299 View Code Duplication
if __name__ == '__main__':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated in your project.
Loading history...
300
    parser = get_parser()
301
    args = parser.parse_args()
302
    if not args.debug:
303
        ic.disable()
304
305
    target = RNAmodel(args.target)
306
307
    # a trick to get files be default if there is only a path (so a string ;-))
308
    if not isinstance(args.files, str):
309
        models = args.files
310
    else:
311
        import glob
312
        models = glob.glob(args.files) # opts.input_dir
313
314
    ## tmp = []
315
    ## if args.ignore_files:
316
    ##     for f in args.files:
317
    ##         if args.debug: print(f)
318
    ##         if args.ignore_files in f:
319
    ##             continue
320
    ##         tmp.append(f)
321
    ##     models = tmp
322
 
323
    print('# of models:', len(models))
324
325
    c = 1
326
    t = 'fn,' + args.column_name + '\n' # ,aligned_seq, aligned_fn\n'
327
    for m in models:
328
        mrna = RNAmodel(m)
329
        #print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
330
        # print(m)
331
        rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
332
        #except:
333
        if 0:
334
            print(m)
335
            sys.exit(1)
336
        #print rmsd
337
        t += mrna.fn + ',' + str(rmsd) + '\n'
338
        #break    
339
340
    if args.show:
341
        print(t.strip()) # all the data
342
343
    if args.result:
344
        with open(args.result, 'w') as f:
345
            f.write(t)
346
347
        if args.sort:
348
            import pandas as pd
349
            df = pd.read_csv(args.result)
350
            df = df[df.rmsd != -1]
351
            df = df.sort_values('rmsd')
352
            df.to_csv(args.result, index=False)
353
            print('saved: %s' % args.result)
354
            # print(df.to_string(index=False))
355