triplexibility - Code Metrics - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

triplexibility F
last analyzed 2025-03-19 22:26 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	412
Duplicated Lines	37.62 %

Importance

Changes

Metric	Value
eloc	279
dl	155
loc	412
rs	2.48
c	0
b	0
f	0
wmc	74

6 Methods

Rating	Name	Duplication	Size	Complexity
F	RNAmodel.get_rmsd_to()	69	240	58
A	RNAmodel.__repr__()	0	2	1
A	RNAmodel.get_report()	0	6	2
A	RNAmodel.__get_atoms()	0	8	3
A	RNAmodel.__str__()	0	2	1
A	RNAmodel.__init__()	0	6	1

3 Functions

Rating	Name	Duplication	Size	Complexity
A	sort_nicely()	0	9	4
A	get_rna_models_from_dir()	10	10	3
A	get_parser()	21	21	1

How to fix Duplicated Code Complexity

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Examples::

  triplexibility.py -t 2nd_triplex_FB_ABC.pdb  triples-all-v2-rpr/*.pdb --way backbone+sugar --save --suffix ABC --result abc.csv

  triplexibility.py -t 2nd_triplex_FB_1AUA3_rpr.pdb test_data/triples2/Triple_cWW_tSH_GCA_exemplar_rpr_ren.pdb  --way backbone+sugar --save --column-name 'rmsd' --triple-mode > out.txt

"""
from __future__ import print_function

import Bio.PDB.PDBParser
import Bio.PDB.Superimposer
import copy
import warnings
warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
warnings.filterwarnings('ignore', '.*with given element *',)
warnings.filterwarnings('ignore', '.*is discontinuous*',)
warnings.filterwarnings('ignore', '.*Ignoring unrecognized record*',)

import sys
from icecream import ic
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')

from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
from rna_tools.rna_tools_config import RT

import argparse
import sys
import glob
import re
import os

class RNAmodel:
    def __init__(self, fpath):
        # parser 1-5 -> 1 2 3 4 5
        self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
        self.__get_atoms()
        self.fpath = fpath
        self.fn = os.path.basename(fpath)
        # self.atoms = []
        # if save:
        #    self.save() # @save

    def __get_atoms(self):
        self.atoms=[]
        # [<Atom P>, <Atom C5'>, <Atom O5'>, <Atom C4'>, <Atom O4'>, <Atom C3'>, <Atom O3'>, <Atom C2'>, <Atom O2'>, <Atom C1'>, <Atom N1>, <Atom C2>, <Atom N3>, <Atom C4>, <Atom C5>, <Atom C6>, <Atom N6>, <Atom N7>, <Atom C8>, <Atom N9>, <Atom OP1>, <Atom OP2>]
        for res in self.struc.get_residues():
            for at in res:
                #if args.debug: print(res.resname, res.get_id, at)
                self.atoms.append(at)
        return self.atoms

    def __str__(self):
        return self.fn #+ ' # beads' + str(len(self.residues))

    def __repr__(self):
        return self.fn #+ ' # beads' + str(len(self.residues))

    def get_report(self):
        """Str a short report about rna model""" 
        t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
        for r,a in zip(self.residues, self.atoms ):
            t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
        return t

    def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
        """Calc rmsd P-atom based rmsd to other rna model

        sugar now 10 atoms ;-) """
        sup = Bio.PDB.Superimposer()
        if way in ['c1', 'backbone+sugar', 'sugar']:
            if way == 'c1':
                atomslist = ["C1'"]# ,"C2'","O4'"] #, "C2'"]

            elif way == 'sugar':
                atomslist = "C5',O5',C4',O4',C3',O3',C2',O2',C1'".split(',')

            elif way == 'backbone+sugar':
                atomslist = "P,OP1,OP2,C5',O5',C4',O4',C3',O3',C2',O2',C1'".split(',')

            self.atoms_for_rmsd = []
            for a in self.atoms:
                nt = a.get_parent().get_resname().strip()
                if nt in ['G', 'A']:
                    atomslistx = atomslist + ['N9']

                if nt in ['C', 'U']:
                    atomslistx = atomslist + ['N1']
                if a.name in atomslistx:

                    self.atoms_for_rmsd.append(a)
            if args.debug: print('atoms_for_rmsd', len(self.atoms_for_rmsd))


            other_atoms_for_rmsd = []
            for a in other_rnamodel.atoms:
                nt = a.get_parent().get_resname().strip()
                if nt in ['G', 'A']:
                    atomslistx = atomslist + ['N9']
                if nt in ['C', 'U']:
                    atomslistx = atomslist + ['N1']
                if a.name in atomslistx:
                    other_atoms_for_rmsd.append(a)

            if args.debug: print('other_atoms_for_rmsd', len(other_atoms_for_rmsd))
                                 
        elif way == 'c1+Nx':
            self.atoms_for_rmsd = []
            for a in self.atoms:
                nt = a.get_parent().get_resname().strip() # G
                if nt in ['G', 'A']:
                    atomslist = ["C1'", 'N9'] # , 'N1']
                if nt in ['C', 'U']:
                    atomslist = ["C1'", 'N1'] # , 'N1']
                if a.name in atomslist:
                    self.atoms_for_rmsd.append(a)

            if args.debug: print('atoms_for_rmsd', len(self.atoms_for_rmsd))

            other_atoms_for_rmsd = []
            for a in other_rnamodel.atoms:
                nt = a.get_parent().get_resname().strip() # G
                if nt in ['G', 'A']:
                    atomslist = ["C1'", 'N9'] # , 'N1']
                if nt in ['C', 'U']:
                    atomslist = ["C1'", 'N1'] # , 'N1']
                if a.name in atomslist:
                    other_atoms_for_rmsd.append(a)
            if args.debug: print('other_atoms_for_rmsd', len(other_atoms_for_rmsd))

        else:
            self.atoms_for_rmsd = self.atoms
            other_atoms_for_rmsd = other_rnamodel.atoms

        # calc rmsd #
        if not tseq:
            tseq = ''
            rt = None
            for a in self.atoms_for_rmsd:
                r = a.get_parent()
                if r != rt:
                    rt = r
                    tseq += r.get_resname().strip()

        if triple_mode:
            def chunks(lst, n):
                """Yield successive n-sized chunks from lst.
                https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
                """
                for i in range(0, len(lst), n):
                    yield lst[i:i + n]

            rmsd_min = 10000 # ugly
            import itertools
            # ok, for different residues now it's a problem  
            per = list(itertools.permutations([0, 1, 2])) # [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]
            lst = list(chunks(other_atoms_for_rmsd,
                              int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
                       # so so len is 3 atoms so / by 3 to get how many atoms per residue
            print(lst)

            sup_min = None
            seq_min = 'not yet obtained, rmsd rejected!'
            p_min = None

            rms = -1

            for p in per:
                patoms = []
                for i in p: # p=(1, 2, 3)
                    patoms.extend(lst[i])

                #print(self.atoms_for_rmsd)
                ## print('patoms')
                ## for a in patoms:
                ##     print(a, a.get_parent().get_id())
                ## print('self.atoms_for_rmsd')
                ## for a in self.atoms_for_rmsd:
                ##     print(a, a.get_parent().get_id())
                #sup.set_atoms(patoms, self.atoms_for_rmsd)

                rt = None
                seq = ''
                for a in patoms:
                    r = a.get_parent()
                    if r != rt:
                        rt = r
                        seq += r.get_resname().strip()
             
                ic(tseq.lower(), seq.lower(), other_rnamodel.fpath)
                # dont' even calc rmsd if the curr seq and tseq are not the same
                if tseq.lower() == seq.lower():  # only if seq is the same
                    print(self.atoms_for_rmsd)
                    print(patoms)
                    print(len(self.atoms_for_rmsd), len(patoms))
                    for a, b in zip(self.atoms_for_rmsd, patoms):
                        print(a, a.get_parent().id, a.get_parent().get_resname(),
                           b.get_parent().id, b.get_parent().get_resname())
                    print(len(self.atoms_for_rmsd), len(patoms))
                    try:
                        sup.set_atoms(self.atoms_for_rmsd, patoms)
                    except:
                        pass
                    rms = round(sup.rms, 2)
                    if rms < rmsd_min:
                            rmsd_min = rms
                            sup_min = copy.copy(sup)
                            suffix = seq
                            p_min = p
                            seq_min = seq
                            if args.debug: 'set new rmsd_min', rmsd_min

                    if args.debug:
                        print(p, '', [i + 1 for i in p], end=' ')
                        print(seq, 'seq_min: ' + seq_min, rms)

            if p_min: # found target sequence

                # what is this? ;-)
                index = [0 ,0 ,0]
                index[0] = p_min.index(0)
                index[1] = p_min.index(1)
                index[2] = p_min.index(2)

                # ugly re-set 123 to crazy id ! + 100, so this will
                # fill up 1 2 3 for the second for
                rs = other_rnamodel.struc[0].get_residues()
                for i, r in enumerate(rs):
                    r.id = (' ', index[i] + 253, ' ') # ugly, some random offset

                for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
                    r.id = (' ', index[i] + 1, ' ')
                    if args.debug: print('r', r)

                io = Bio.PDB.PDBIO()
                sup_min.apply(other_rnamodel.struc.get_atoms())
                # if args.debug: print(p_min, [i + 1 for i in p_min])
                io.set_structure(other_rnamodel.struc)

                args.save_here = True
                if args.save_here and save:
                    folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
                    # print(f)
                    try:
                        os.mkdir(folder)
                    except:
                        pass
                    fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
                    #_s' + suffix + '.pdb'))
                else:
                    fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')

                if args.debug: print(fout)

                if save:
                    io.save(fout)
                    # ugly set chain to A
                    set_chain_for_struc(fout, 'A', save_file_inplace=True)
                    # and now run this to sort into 1 2 3

                    r = RNAStructure(fout)
                    remarks = r.get_remarks_text()
                    r1 = r.get_res_text('A', 1)
                    r2 = r.get_res_text('A', 2)
                    r3 = r.get_res_text('A', 3)
                    with open(fout, 'w') as f:
                        f.write(remarks)
                        f.write(r1)
                        f.write(r2)
                        f.write(r3)
                    r.reload()
                    r.get_rnapuzzle_ready()
                    if rmsd_min < 1:  # !!!!!!!!!!!! ugly
                         r.write()
                return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
        else:
            # check if number of the same atoms #
            # if not the same then return -1    #
            # print(len(self.atoms_for_rmsd), len(other_atoms_for_rmsd))
            if len(self.atoms_for_rmsd) != len(other_atoms_for_rmsd):
                return -1
            sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
            rms = round(sup.rms, 2)

            if save:
                ## io = Bio.PDB.PDBIO()
                ## sup.apply(self.struc.get_atoms())
                ## io.set_structure(self.struc)
                ## io.save("aligned.pdb")

                io = Bio.PDB.PDBIO()
                sup.apply(other_rnamodel.struc.get_atoms())
                io.set_structure(other_rnamodel.struc)

            args.save_here = True
            if args.save_here and save:

                f = os.path.basename(self.fpath.replace('.pdb', '_aligned'))
                # print(f)
                try:
                    os.mkdir(f)
                except:
                    pass
                # fout = f + os.sep + os.path.basename(other_rnamodel.fpath.replace('.pdb', '_aligned_s' + suffix + '.pdb'))
                fout = f + os.sep + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '_aligned'))
            else:
                fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')
            if save:
               io.save(fout)

        return rms

    
def get_rna_models_from_dir(directory):

    models = []
    if not os.path.exists(directory):
        raise Exception('Dir does not exist! ', directory)
    files = glob.glob(directory + "/*.pdb")
    files_sorted = sort_nicely(files)
    for f in files_sorted:
        #print f
        models.append(RNAmodel(f))
    return models

def sort_nicely( l ):
   """ Sort the given list in the way that humans expect.

   http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
   """
   convert = lambda text: int(text) if text.isdigit() else text
   alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
   l.sort( key=alphanum_key )
   return l


def get_parser():

    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('-t',"--target",
                         help="target file")
    parser.add_argument('--result', #default='out.rmsd',
                         help="result file")
    parser.add_argument('--ignore-files', default='aligned', help="use to ignore files, .e.g. with 'aligned'")
    parser.add_argument('--suffix', default='aligned', help="used with --saved, by default: aligned")
    parser.add_argument('--way', help="e.g., backbone+sugar, c1, c1+Nx")
    parser.add_argument('--triple-mode', help="same crazy strategy to align triples", default=True, action="store_true")
    parser.add_argument('--column-name', help="name column for rmsd, by default 'rmsd', but can also be a name of the target file", default="rmsd")
    parser.add_argument("-s", "--save", action="store_true", help="set suffix with --suffix, by default: aligned")
    parser.add_argument("--folder-prefix", default = '', help="folder name, t2-3-UAU_NAME_aligned")

    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending), --result must be set up')
    parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
    parser.add_argument('--files', help='files', nargs='+', default=RT + '/rna_tools/tools/triplexibility/db/triples-all-v2-rpr/*.pdb')
    return parser

# main
if __name__ == '__main__':

    parser = get_parser()
    args = parser.parse_args()
    if not args.debug:
        ic.disable()

    target = RNAmodel(args.target)

    # a trick to get files be default if there is only a path (so a string ;-))
    if not isinstance(args.files, str):
        models = args.files
    else:
        import glob
        models = glob.glob(args.files) # opts.input_dir

    ## tmp = []
    ## if args.ignore_files:
    ##     for f in args.files:
    ##         if args.debug: print(f)
    ##         if args.ignore_files in f:
    ##             continue
    ##         tmp.append(f)
    ##     models = tmp
 
    print('# of models:', len(models))

    c = 1
    t = 'fn,' + args.column_name + '\n' # ,aligned_seq, aligned_fn\n'
    for m in models:
        mrna = RNAmodel(m)
        #print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
        # print(m)
        rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
        #except:
        if 0:
            print(m)
            sys.exit(1)
        #print rmsd
        t += mrna.fn + ',' + str(rmsd) + '\n'
        #break    

    print(t.strip())

    if args.result:
        with open(args.result, 'w') as f:
            f.write(t)

        if args.sort:
            import pandas as pd
            df = pd.read_csv(args.result)
            df = df[df.rmsd != -1]
            df = df.sort_values('rmsd')
            df.to_csv(args.result, index=False)
            print('saved: %s' % args.result)
            print(df.to_string(index=False))


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3
4		"""
5		Examples::
6
7		triplexibility.py -t 2nd_triplex_FB_ABC.pdb triples-all-v2-rpr/*.pdb --way backbone+sugar --save --suffix ABC --result abc.csv
8
9		triplexibility.py -t 2nd_triplex_FB_1AUA3_rpr.pdb test_data/triples2/Triple_cWW_tSH_GCA_exemplar_rpr_ren.pdb --way backbone+sugar --save --column-name 'rmsd' --triple-mode > out.txt
10
11		"""
12		from __future__ import print_function
13
14		import Bio.PDB.PDBParser
15		import Bio.PDB.Superimposer
16		import copy
17		import warnings
18		warnings.filterwarnings('ignore', '.Invalid or missing.',)
19		warnings.filterwarnings('ignore', '.with given element ',)
20		warnings.filterwarnings('ignore', '.is discontinuous',)
21		warnings.filterwarnings('ignore', '.Ignoring unrecognized record',)
22
23		import sys
24		from icecream import ic
25		ic.configureOutput(outputFunction=lambda a: print(a, file=sys.stderr))
26		ic.configureOutput(prefix='> ')
27
28		from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
29		from rna_tools.rna_tools_config import RT
30
31		import argparse
32		import sys
33		import glob
34		import re
35		import os
36
37		class RNAmodel:
38		def __init__(self, fpath):
39		# parser 1-5 -> 1 2 3 4 5
40		self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
41		self.__get_atoms()
42		self.fpath = fpath
43		self.fn = os.path.basename(fpath)
44		# self.atoms = []
45		# if save:
46		# self.save() # @save
47
48		def __get_atoms(self):
49		self.atoms=[]
50		# [<Atom P>, <Atom C5'>, <Atom O5'>, <Atom C4'>, <Atom O4'>, <Atom C3'>, <Atom O3'>, <Atom C2'>, <Atom O2'>, <Atom C1'>, <Atom N1>, <Atom C2>, <Atom N3>, <Atom C4>, <Atom C5>, <Atom C6>, <Atom N6>, <Atom N7>, <Atom C8>, <Atom N9>, <Atom OP1>, <Atom OP2>]
51		for res in self.struc.get_residues():
52		for at in res:
53		#if args.debug: print(res.resname, res.get_id, at)
54		self.atoms.append(at)
55		return self.atoms
56
57		def __str__(self):
58		return self.fn #+ ' # beads' + str(len(self.residues))
59
60		def __repr__(self):
61		return self.fn #+ ' # beads' + str(len(self.residues))
62
63		def get_report(self):
64		"""Str a short report about rna model"""
65		t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
66		for r,a in zip(self.residues, self.atoms ):
67		t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
68		return t
69
70		def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
71		"""Calc rmsd P-atom based rmsd to other rna model
72
73		sugar now 10 atoms ;-) """
74		sup = Bio.PDB.Superimposer()
75		if way in ['c1', 'backbone+sugar', 'sugar']:
76		if way == 'c1':
77		atomslist = ["C1'"]# ,"C2'","O4'"] #, "C2'"]
78
79		elif way == 'sugar':
80		atomslist = "C5',O5',C4',O4',C3',O3',C2',O2',C1'".split(',')
81
82		elif way == 'backbone+sugar':
83		atomslist = "P,OP1,OP2,C5',O5',C4',O4',C3',O3',C2',O2',C1'".split(',')
84
85		self.atoms_for_rmsd = []
86		for a in self.atoms:
87		nt = a.get_parent().get_resname().strip()
88		if nt in ['G', 'A']:
89		atomslistx = atomslist + ['N9']
		0 ignored issues – show introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report The variable `atomslist` does not seem to be defined for all execution paths. Loading history...
90		if nt in ['C', 'U']:
91		atomslistx = atomslist + ['N1']
92		if a.name in atomslistx:
		0 ignored issues – show introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report The variable `atomslistx` does not seem to be defined for all execution paths. Loading history...
93		self.atoms_for_rmsd.append(a)
94		if args.debug: print('atoms_for_rmsd', len(self.atoms_for_rmsd))
		0 ignored issues – show introduced 2020-11-13 17:52 UTC by Report Bug Copy Issue Report The variable `args` does not seem to be defined in case `__name__ == '__main__'` on line `357` is `False`. Are you sure this can never be the case? Loading history...
95
96		other_atoms_for_rmsd = []
97		for a in other_rnamodel.atoms:
98		nt = a.get_parent().get_resname().strip()
99		if nt in ['G', 'A']:
100		atomslistx = atomslist + ['N9']
101		if nt in ['C', 'U']:
102		atomslistx = atomslist + ['N1']
103		if a.name in atomslistx:
104		other_atoms_for_rmsd.append(a)
105
106		if args.debug: print('other_atoms_for_rmsd', len(other_atoms_for_rmsd))
107
108		elif way == 'c1+Nx':
109		self.atoms_for_rmsd = []
110		for a in self.atoms:
111		nt = a.get_parent().get_resname().strip() # G
112		if nt in ['G', 'A']:
113		atomslist = ["C1'", 'N9'] # , 'N1']
114		if nt in ['C', 'U']:
115		atomslist = ["C1'", 'N1'] # , 'N1']
116		if a.name in atomslist:
117		self.atoms_for_rmsd.append(a)
118
119		if args.debug: print('atoms_for_rmsd', len(self.atoms_for_rmsd))
120
121		other_atoms_for_rmsd = []
122		for a in other_rnamodel.atoms:
123		nt = a.get_parent().get_resname().strip() # G
124		if nt in ['G', 'A']:
125		atomslist = ["C1'", 'N9'] # , 'N1']
126		if nt in ['C', 'U']:
127		atomslist = ["C1'", 'N1'] # , 'N1']
128		if a.name in atomslist:
129		other_atoms_for_rmsd.append(a)
130		if args.debug: print('other_atoms_for_rmsd', len(other_atoms_for_rmsd))
131
132		else:
133		self.atoms_for_rmsd = self.atoms
134		other_atoms_for_rmsd = other_rnamodel.atoms
135
136		# calc rmsd #
137		if not tseq:
138		tseq = ''
139		rt = None
140		for a in self.atoms_for_rmsd:
141		r = a.get_parent()
142		if r != rt:
143		rt = r
144		tseq += r.get_resname().strip()
145
146		if triple_mode:
147		def chunks(lst, n):
148		"""Yield successive n-sized chunks from lst.
149		https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
150		"""
151		for i in range(0, len(lst), n):
152		yield lst[i:i + n]
153
154		rmsd_min = 10000 # ugly
155		import itertools
156		# ok, for different residues now it's a problem
157		per = list(itertools.permutations([0, 1, 2])) # [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]
158		lst = list(chunks(other_atoms_for_rmsd,
159		int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
160		# so so len is 3 atoms so / by 3 to get how many atoms per residue
161		print(lst)
162
163		sup_min = None
164		seq_min = 'not yet obtained, rmsd rejected!'
165		p_min = None
166
167		rms = -1
168
169		for p in per:
170		patoms = []
171		for i in p: # p=(1, 2, 3)
172		patoms.extend(lst[i])
173
174		#print(self.atoms_for_rmsd)
175		## print('patoms')
176		## for a in patoms:
177		## print(a, a.get_parent().get_id())
178		## print('self.atoms_for_rmsd')
179		## for a in self.atoms_for_rmsd:
180		## print(a, a.get_parent().get_id())
181		#sup.set_atoms(patoms, self.atoms_for_rmsd)
182
183		rt = None
184		seq = ''
185		for a in patoms:
186		r = a.get_parent()
187		if r != rt:
188		rt = r
189		seq += r.get_resname().strip()
190
191		ic(tseq.lower(), seq.lower(), other_rnamodel.fpath)
192		# dont' even calc rmsd if the curr seq and tseq are not the same
193		if tseq.lower() == seq.lower(): # only if seq is the same
194		print(self.atoms_for_rmsd)
195		print(patoms)
196		print(len(self.atoms_for_rmsd), len(patoms))
197		for a, b in zip(self.atoms_for_rmsd, patoms):
198		print(a, a.get_parent().id, a.get_parent().get_resname(),
199		b.get_parent().id, b.get_parent().get_resname())
200		print(len(self.atoms_for_rmsd), len(patoms))
201		try:
202		sup.set_atoms(self.atoms_for_rmsd, patoms)
203		except:
204		pass
205		rms = round(sup.rms, 2)
206		if rms < rmsd_min:
207		rmsd_min = rms
208		sup_min = copy.copy(sup)
209		suffix = seq
210		p_min = p
211		seq_min = seq
212		if args.debug: 'set new rmsd_min', rmsd_min
213
214		if args.debug:
215		print(p, '', [i + 1 for i in p], end=' ')
216		print(seq, 'seq_min: ' + seq_min, rms)
217
218	View Code Duplication	if p_min: # found target sequence
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
219		# what is this? ;-)
220		index = [0 ,0 ,0]
221		index[0] = p_min.index(0)
222		index[1] = p_min.index(1)
223		index[2] = p_min.index(2)
224
225		# ugly re-set 123 to crazy id ! + 100, so this will
226		# fill up 1 2 3 for the second for
227		rs = other_rnamodel.struc[0].get_residues()
228		for i, r in enumerate(rs):
229		r.id = (' ', index[i] + 253, ' ') # ugly, some random offset
230
231		for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
232		r.id = (' ', index[i] + 1, ' ')
233		if args.debug: print('r', r)
234
235		io = Bio.PDB.PDBIO()
236		sup_min.apply(other_rnamodel.struc.get_atoms())
237		# if args.debug: print(p_min, [i + 1 for i in p_min])
238		io.set_structure(other_rnamodel.struc)
239
240		args.save_here = True
241		if args.save_here and save:
242		folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
243		# print(f)
244		try:
245		os.mkdir(folder)
246		except:
247		pass
248		fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
249		#_s' + suffix + '.pdb'))
250		else:
251		fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')
252
253		if args.debug: print(fout)
254
255		if save:
256		io.save(fout)
257		# ugly set chain to A
258		set_chain_for_struc(fout, 'A', save_file_inplace=True)
259		# and now run this to sort into 1 2 3
260
261		r = RNAStructure(fout)
262		remarks = r.get_remarks_text()
263		r1 = r.get_res_text('A', 1)
264		r2 = r.get_res_text('A', 2)
265		r3 = r.get_res_text('A', 3)
266		with open(fout, 'w') as f:
267		f.write(remarks)
268		f.write(r1)
269		f.write(r2)
270		f.write(r3)
271		r.reload()
272		r.get_rnapuzzle_ready()
273		if rmsd_min < 1: # !!!!!!!!!!!! ugly
274		r.write()
275		return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
276		else:
277		# check if number of the same atoms #
278		# if not the same then return -1 #
279		# print(len(self.atoms_for_rmsd), len(other_atoms_for_rmsd))
280		if len(self.atoms_for_rmsd) != len(other_atoms_for_rmsd):
281		return -1
282		sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
283		rms = round(sup.rms, 2)
284
285		if save:
286		## io = Bio.PDB.PDBIO()
287		## sup.apply(self.struc.get_atoms())
288		## io.set_structure(self.struc)
289		## io.save("aligned.pdb")
290
291		io = Bio.PDB.PDBIO()
292		sup.apply(other_rnamodel.struc.get_atoms())
293		io.set_structure(other_rnamodel.struc)
294
295		args.save_here = True
296	View Code Duplication	if args.save_here and save:
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
297		f = os.path.basename(self.fpath.replace('.pdb', '_aligned'))
298		# print(f)
299		try:
300		os.mkdir(f)
301		except:
302		pass
303		# fout = f + os.sep + os.path.basename(other_rnamodel.fpath.replace('.pdb', '_aligned_s' + suffix + '.pdb'))
304		fout = f + os.sep + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '_aligned'))
305		else:
306		fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')
307		if save:
308		io.save(fout)
		0 ignored issues – show introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report The variable `io` does not seem to be defined in case `save` on line `285` is `False`. Are you sure this can never be the case? Loading history...
309		return rms
310
311
312	View Code Duplication	def get_rna_models_from_dir(directory):
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
313		models = []
314		if not os.path.exists(directory):
315		raise Exception('Dir does not exist! ', directory)
316		files = glob.glob(directory + "/*.pdb")
317		files_sorted = sort_nicely(files)
318		for f in files_sorted:
319		#print f
320		models.append(RNAmodel(f))
321		return models
322
323		def sort_nicely( l ):
324		""" Sort the given list in the way that humans expect.
325
326		http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
327		"""
328		convert = lambda text: int(text) if text.isdigit() else text
329		alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
330		l.sort( key=alphanum_key )
331		return l
332
333
334	View Code Duplication	def get_parser():
		0 ignored issues – show Duplication introduced 2020-06-29 19:56 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
335		parser = argparse.ArgumentParser(
336		description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
337
338		parser.add_argument('-t',"--target",
339		help="target file")
340		parser.add_argument('--result', #default='out.rmsd',
341		help="result file")
342		parser.add_argument('--ignore-files', default='aligned', help="use to ignore files, .e.g. with 'aligned'")
343		parser.add_argument('--suffix', default='aligned', help="used with --saved, by default: aligned")
344		parser.add_argument('--way', help="e.g., backbone+sugar, c1, c1+Nx")
345		parser.add_argument('--triple-mode', help="same crazy strategy to align triples", default=True, action="store_true")
346		parser.add_argument('--column-name', help="name column for rmsd, by default 'rmsd', but can also be a name of the target file", default="rmsd")
347		parser.add_argument("-s", "--save", action="store_true", help="set suffix with --suffix, by default: aligned")
348		parser.add_argument("--folder-prefix", default = '', help="folder name, t2-3-UAU_NAME_aligned")
349
350		parser.add_argument("--debug", action="store_true")
351		parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending), --result must be set up')
352		parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
353		parser.add_argument('--files', help='files', nargs='+', default=RT + '/rna_tools/tools/triplexibility/db/triples-all-v2-rpr/*.pdb')
354		return parser
355
356		# main
357	View Code Duplication	if __name__ == '__main__':
		0 ignored issues – show Duplication introduced 2020-06-29 19:56 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
358		parser = get_parser()
359		args = parser.parse_args()
360		if not args.debug:
361		ic.disable()
362
363		target = RNAmodel(args.target)
364
365		# a trick to get files be default if there is only a path (so a string ;-))
366		if not isinstance(args.files, str):
367		models = args.files
368		else:
369		import glob
370		models = glob.glob(args.files) # opts.input_dir
371
372		## tmp = []
373		## if args.ignore_files:
374		## for f in args.files:
375		## if args.debug: print(f)
376		## if args.ignore_files in f:
377		## continue
378		## tmp.append(f)
379		## models = tmp
380
381		print('# of models:', len(models))
382
383		c = 1
384		t = 'fn,' + args.column_name + '\n' # ,aligned_seq, aligned_fn\n'
385		for m in models:
386		mrna = RNAmodel(m)
387		#print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
388		# print(m)
389		rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
390		#except:
391		if 0:
392		print(m)
393		sys.exit(1)
394		#print rmsd
395		t += mrna.fn + ',' + str(rmsd) + '\n'
396		#break
397
398		print(t.strip())
399
400		if args.result:
401		with open(args.result, 'w') as f:
402		f.write(t)
403
404		if args.sort:
405		import pandas as pd
406		df = pd.read_csv(args.result)
407		df = df[df.rmsd != -1]
408		df = df.sort_values('rmsd')
409		df.to_csv(args.result, index=False)
410		print('saved: %s' % args.result)
411		print(df.to_string(index=False))
412

mmagnus / rna-tools

triplexibility F last analyzed 2025-03-19 22:26 UTC

Complexity

Size/Duplication

Importance

6 Methods

3 Functions

How to fix Duplicated Code Complexity

Duplicated Code

Complexity

Duplication Side-by-Side

Filter issues like

triplexibility F
last analyzed 2025-03-19 22:26 UTC