triplexibility2.get_rna_models_from_dir() - Code Metrics - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

triplexibility2.get_rna_models_from_dir() A
last analyzed 2025-12-11 14:59 UTC

↳ Parent: triplexibility2

Complexity

Conditions

Size

Total Lines	10
Code Lines	9

Duplication

Lines	10
Ratio	100 %

Importance

Changes

Metric	Value
cc	3
eloc	9
nop	1
dl	10
loc	10
rs	9.95
c	0
b	0
f	0

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
v2 is full atom!

Examples::

  triplexibility.py -t 2nd_triplex_FB_ABC.pdb  triples-all-v2-rpr/*.pdb --save --result abc.csv

"""
from __future__ import print_function

import Bio.PDB.PDBParser
import Bio.PDB.Superimposer
import copy
import warnings
warnings.filterwarnings('ignore', '.*Invalid or missing.*',)
warnings.filterwarnings('ignore', '.*with given element *',)
warnings.filterwarnings('ignore', '.*is discontinuous*',)
warnings.filterwarnings('ignore', '.*Ignoring unrecognized record*',)

import sys
from icecream import ic
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr))
ic.configureOutput(prefix='> ')

from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
from rna_tools.rna_tools_config import RT

import argparse
import sys
import glob
import re
import os

class RNAmodel:
    def __init__(self, fpath):
        # parser 1-5 -> 1 2 3 4 5
        self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
        self.__get_atoms()
        self.fpath = fpath
        self.fn = os.path.basename(fpath)
        # self.atoms = []
        # if save:
        #    self.save() # @save

    def __get_atoms(self):
        self.atoms = [[], [], []] # this will be only 3 resides
        # ugly but should work
        tseq = ''
        for index, res in enumerate(self.struc.get_residues()):
            self.atoms[index] = []
            for at in res:
                self.atoms[index].append(at)
            tseq += res.get_resname()
            ## # calc rmsd #
            ## if not tseq:
            ##     tseq = ''
            ##     rt = None
            ##     for a in self.atoms_for_rmsd:
            ##         r = a.get_parent()
            ##         if r != rt:
            ##             rt = r
            ##             tseq += r.get_resname().strip()
        self.tseq = tseq
        return self.atoms

    def __str__(self):
        return self.fn #+ ' # beads' + str(len(self.residues))

    def __repr__(self):
        return self.fn #+ ' # beads' + str(len(self.residues))

    def get_report(self):
        """Str a short report about rna model""" 
        t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
        for r,a in zip(self.residues, self.atoms ):
            t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
        return t

    def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
        """Calc rmsd P-atom based rmsd to other rna model

        sugar now 10 atoms ;-) """
        sup = Bio.PDB.Superimposer()

        other_atoms_for_rmsd = other_rnamodel.atoms
        
        if triple_mode:
            def chunks(lst, n):
                """Yield successive n-sized chunks from lst.
                https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
                """
                for i in range(0, len(lst), n):
                    yield lst[i:i + n]

            rmsd_min = 10000 # ugly
            import itertools
            # ok, for different residues now it's a problem  
            per = list(itertools.permutations([0, 1, 2])) # [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]
            #lst = list(chunks(other_atoms_for_rmsd,
            #                  int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
            #           # so so len is 3 atoms so / by 3 to get how many atoms per residue

            self.atoms_for_rmsd = []
            for i in self.atoms:
                self.atoms_for_rmsd.extend(i)
        
            sup_min = None
            seq_min = 'not yet obtained, rmsd rejected!'
            p_min = None

            rms = -1

            for p in per: # for each combo make a list of atoms
                
                patoms = []
                for i in p: # p=(1, 2, 3)
                    #patoms.extend(lst[i])
                    # list is not needed, just other_atoms !!!! it's a list of [1, 2, 3] residues
                    patoms.extend(other_atoms_for_rmsd[i])

                #print(self.atoms_for_rmsd)
                ## print('patoms')
                ## for a in patoms:
                ##     print(a, a.get_parent().get_id())
                ## print('self.atoms_for_rmsd')
                ## for a in self.atoms_for_rmsd:
                ##     print(a, a.get_parent().get_id())
                #sup.set_atoms(patoms, self.atoms_for_rmsd)

                rt = None
                seq = ''
                for a in patoms:
                    r = a.get_parent()
                    if r != rt:
                        rt = r
                        seq += r.get_resname().strip()
             
                ic(self.tseq.lower(), seq.lower(), other_rnamodel.fpath)
                # dont' even calc rmsd if the curr seq and tseq are not the same
                if self.tseq.lower() == seq.lower():  # only if seq is the same

                    sup.set_atoms(self.atoms_for_rmsd, patoms)
                    rms = round(sup.rms, 2)
                    if rms < rmsd_min:
                            rmsd_min = rms
                            sup_min = copy.copy(sup)
                            suffix = seq
                            p_min = p
                            seq_min = seq
                            if args.debug: 'set new rmsd_min', rmsd_min


                    if args.debug:
                        print(p, '', [i + 1 for i in p], end=' ')
                        print(seq, 'seq_min: ' + seq_min, rms)

            if p_min: # found target sequence

                # what is this? ;-)
                index = [0 ,0 ,0]
                index[0] = p_min.index(0)
                index[1] = p_min.index(1)
                index[2] = p_min.index(2)

                # ugly re-set 123 to crazy id ! + 100, so this will
                # fill up 1 2 3 for the second for
                rs = other_rnamodel.struc[0].get_residues()
                for i, r in enumerate(rs):
                    r.id = (' ', index[i] + 253, ' ') # ugly, some random offset

                for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
                    r.id = (' ', index[i] + 1, ' ')
                    if args.debug: print('r', r)

                io = Bio.PDB.PDBIO()
                sup_min.apply(other_rnamodel.struc.get_atoms())
                # if args.debug: print(p_min, [i + 1 for i in p_min])
                io.set_structure(other_rnamodel.struc)

                args.save_here = True
                if args.save_here and save:
                    folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
                    # print(f)
                    try:
                        os.mkdir(folder)
                    except:
                        pass
                    fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
                    #_s' + suffix + '.pdb'))
                else:
                    fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')

                if args.debug: print(fout)

                if save:
                    io.save(fout)
                    # ugly set chain to A
                    set_chain_for_struc(fout, 'A', save_file_inplace=True)
                    # and now run this to sort into 1 2 3

                    r = RNAStructure(fout)
                    remarks = r.get_remarks_text()
                    r1 = r.get_res_text('A', 1)
                    r2 = r.get_res_text('A', 2)
                    r3 = r.get_res_text('A', 3)
                    with open(fout, 'w') as f:
                        f.write(remarks)
                        f.write(r1)
                        f.write(r2)
                        f.write(r3)
                    r.reload()
                    r.get_rnapuzzle_ready()
                    if rmsd_min < 1:  # !!!!!!!!!!!! ugly
                         r.write()
                return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
        else:
            # check if number of the same atoms #
            # if not the same then return -1    #
            # print(len(self.atoms_for_rmsd), len(other_atoms_for_rmsd))
            if len(self.atoms_for_rmsd) != len(other_atoms_for_rmsd):
                return -1
            sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
            rms = round(sup.rms, 2)

            if save:
                ## io = Bio.PDB.PDBIO()
                ## sup.apply(self.struc.get_atoms())
                ## io.set_structure(self.struc)
                ## io.save("aligned.pdb")

                io = Bio.PDB.PDBIO()
                sup.apply(other_rnamodel.struc.get_atoms())
                io.set_structure(other_rnamodel.struc)

            args.save_here = True
            if args.save_here and save:

                f = os.path.basename(self.fpath.replace('.pdb', '_aligned'))
                # print(f)
                try:
                    os.mkdir(f)
                except:
                    pass
                # fout = f + os.sep + os.path.basename(other_rnamodel.fpath.replace('.pdb', '_aligned_s' + suffix + '.pdb'))
                fout = f + os.sep + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '_aligned'))
            else:
                fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')
            if save:
               io.save(fout)

        return rms

    
def get_rna_models_from_dir(directory):

    models = []
    if not os.path.exists(directory):
        raise Exception('Dir does not exist! ', directory)
    files = glob.glob(directory + "/*.pdb")
    files_sorted = sort_nicely(files)
    for f in files_sorted:
        #print f
        models.append(RNAmodel(f))
    return models

def sort_nicely( l ):
   """ Sort the given list in the way that humans expect.

   http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
   """
   convert = lambda text: int(text) if text.isdigit() else text
   alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
   l.sort( key=alphanum_key )
   return l


def get_parser():

    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('-t',"--target",
                         help="target file")
    parser.add_argument('--result', #default='out.rmsd',
                         help="result file")
    parser.add_argument('--ignore-files', default='aligned', help="use to ignore files, .e.g. with 'aligned'")
    parser.add_argument('--suffix', default='aligned', help="used with --saved, by default: aligned")
    parser.add_argument('--way', help="e.g., backbone+sugar, c1, c1+Nx")
    parser.add_argument('--triple-mode', help="same crazy strategy to align triples", default=True, action="store_true")
    parser.add_argument('--column-name', help="name column for rmsd, by default 'rmsd', but can also be a name of the target file", default="rmsd")
    parser.add_argument("-s", "--save", action="store_true", help="set suffix with --suffix, by default: aligned")
    parser.add_argument("--folder-prefix", default = '', help="folder name, t2-3-UAU_NAME_aligned")

    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--show", action="store_true", help="show the results in the terminal")
    parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending), --result must be set up')
    parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
    parser.add_argument('--files', help='files', nargs='+', default=RT + '/rna_tools/tools/triplexibility/db/triples-all-v2-rpr/*.pdb')
    return parser

# main
if __name__ == '__main__':

    parser = get_parser()
    args = parser.parse_args()
    if not args.debug:
        ic.disable()

    target = RNAmodel(args.target)

    # a trick to get files be default if there is only a path (so a string ;-))
    if not isinstance(args.files, str):
        models = args.files
    else:
        import glob
        models = glob.glob(args.files) # opts.input_dir

    ## tmp = []
    ## if args.ignore_files:
    ##     for f in args.files:
    ##         if args.debug: print(f)
    ##         if args.ignore_files in f:
    ##             continue
    ##         tmp.append(f)
    ##     models = tmp
 
    print('# of models:', len(models))

    c = 1
    t = 'fn,' + args.column_name + '\n' # ,aligned_seq, aligned_fn\n'
    for m in models:
        mrna = RNAmodel(m)
        #print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
        # print(m)
        rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
        #except:
        if 0:
            print(m)
            sys.exit(1)
        #print rmsd
        t += mrna.fn + ',' + str(rmsd) + '\n'
        #break    

    if args.show:
        print(t.strip()) # all the data

    if args.result:
        with open(args.result, 'w') as f:
            f.write(t)

        if args.sort:
            import pandas as pd
            df = pd.read_csv(args.result)
            df = df[df.rmsd != -1]
            df = df.sort_values('rmsd')
            df.to_csv(args.result, index=False)
            print('saved: %s' % args.result)
            # print(df.to_string(index=False))


1		#!/usr/bin/env python
2		# -- coding: utf-8 --
3
4		"""
5		v2 is full atom!
6
7		Examples::
8
9		triplexibility.py -t 2nd_triplex_FB_ABC.pdb triples-all-v2-rpr/*.pdb --save --result abc.csv
10
11		"""
12		from __future__ import print_function
13
14		import Bio.PDB.PDBParser
15		import Bio.PDB.Superimposer
16		import copy
17		import warnings
18		warnings.filterwarnings('ignore', '.Invalid or missing.',)
19		warnings.filterwarnings('ignore', '.with given element ',)
20		warnings.filterwarnings('ignore', '.is discontinuous',)
21		warnings.filterwarnings('ignore', '.Ignoring unrecognized record',)
22
23		import sys
24		from icecream import ic
25		ic.configureOutput(outputFunction=lambda a: print(a, file=sys.stderr))
26		ic.configureOutput(prefix='> ')
27
28		from rna_tools.rna_tools_lib import set_chain_for_struc, RNAStructure
29		from rna_tools.rna_tools_config import RT
30
31		import argparse
32		import sys
33		import glob
34		import re
35		import os
36
37		class RNAmodel:
38		def __init__(self, fpath):
39		# parser 1-5 -> 1 2 3 4 5
40		self.struc = Bio.PDB.PDBParser().get_structure('', fpath)
41		self.__get_atoms()
42		self.fpath = fpath
43		self.fn = os.path.basename(fpath)
44		# self.atoms = []
45		# if save:
46		# self.save() # @save
47
48		def __get_atoms(self):
49		self.atoms = [[], [], []] # this will be only 3 resides
50		# ugly but should work
51		tseq = ''
52		for index, res in enumerate(self.struc.get_residues()):
53		self.atoms[index] = []
54		for at in res:
55		self.atoms[index].append(at)
56		tseq += res.get_resname()
57		## # calc rmsd #
58		## if not tseq:
59		## tseq = ''
60		## rt = None
61		## for a in self.atoms_for_rmsd:
62		## r = a.get_parent()
63		## if r != rt:
64		## rt = r
65		## tseq += r.get_resname().strip()
66		self.tseq = tseq
67		return self.atoms
68
69		def __str__(self):
70		return self.fn #+ ' # beads' + str(len(self.residues))
71
72		def __repr__(self):
73		return self.fn #+ ' # beads' + str(len(self.residues))
74
75		def get_report(self):
76		"""Str a short report about rna model"""
77		t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n'])
78		for r,a in zip(self.residues, self.atoms ):
79		t += ' '.join(['resi: ', str(r) ,' atom: ', str(a) , '\n' ])
80		return t
81
82		def get_rmsd_to(self, other_rnamodel, way="", triple_mode=False, save=True, tseq=''):
83		"""Calc rmsd P-atom based rmsd to other rna model
84
85		sugar now 10 atoms ;-) """
86		sup = Bio.PDB.Superimposer()
87
88		other_atoms_for_rmsd = other_rnamodel.atoms
89
90		if triple_mode:
91		def chunks(lst, n):
92		"""Yield successive n-sized chunks from lst.
93		https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
94		"""
95		for i in range(0, len(lst), n):
96		yield lst[i:i + n]
97
98		rmsd_min = 10000 # ugly
99		import itertools
100		# ok, for different residues now it's a problem
101		per = list(itertools.permutations([0, 1, 2])) # [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)]
102		#lst = list(chunks(other_atoms_for_rmsd,
103		# int(len(other_atoms_for_rmsd)/3))) # for 1 atom, this will be 1 x 3 [3 residues]
104		# # so so len is 3 atoms so / by 3 to get how many atoms per residue
105
106		self.atoms_for_rmsd = []
107		for i in self.atoms:
108		self.atoms_for_rmsd.extend(i)
109
110		sup_min = None
111		seq_min = 'not yet obtained, rmsd rejected!'
112		p_min = None
113
114		rms = -1
115
116		for p in per: # for each combo make a list of atoms
117
118		patoms = []
119		for i in p: # p=(1, 2, 3)
120		#patoms.extend(lst[i])
121		# list is not needed, just other_atoms !!!! it's a list of [1, 2, 3] residues
122		patoms.extend(other_atoms_for_rmsd[i])
123
124		#print(self.atoms_for_rmsd)
125		## print('patoms')
126		## for a in patoms:
127		## print(a, a.get_parent().get_id())
128		## print('self.atoms_for_rmsd')
129		## for a in self.atoms_for_rmsd:
130		## print(a, a.get_parent().get_id())
131		#sup.set_atoms(patoms, self.atoms_for_rmsd)
132
133		rt = None
134		seq = ''
135		for a in patoms:
136		r = a.get_parent()
137		if r != rt:
138		rt = r
139		seq += r.get_resname().strip()
140
141		ic(self.tseq.lower(), seq.lower(), other_rnamodel.fpath)
142		# dont' even calc rmsd if the curr seq and tseq are not the same
143		if self.tseq.lower() == seq.lower(): # only if seq is the same
144
145		sup.set_atoms(self.atoms_for_rmsd, patoms)
146		rms = round(sup.rms, 2)
147		if rms < rmsd_min:
148		rmsd_min = rms
149		sup_min = copy.copy(sup)
150		suffix = seq
151		p_min = p
152		seq_min = seq
153		if args.debug: 'set new rmsd_min', rmsd_min
		0 ignored issues – show introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report The variable `args` does not seem to be defined in case `__name__ == '__main__'` on line `299` is `False`. Are you sure this can never be the case? Loading history...
154
155		if args.debug:
156		print(p, '', [i + 1 for i in p], end=' ')
157		print(seq, 'seq_min: ' + seq_min, rms)
158
159	View Code Duplication	if p_min: # found target sequence
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
160		# what is this? ;-)
161		index = [0 ,0 ,0]
162		index[0] = p_min.index(0)
163		index[1] = p_min.index(1)
164		index[2] = p_min.index(2)
165
166		# ugly re-set 123 to crazy id ! + 100, so this will
167		# fill up 1 2 3 for the second for
168		rs = other_rnamodel.struc[0].get_residues()
169		for i, r in enumerate(rs):
170		r.id = (' ', index[i] + 253, ' ') # ugly, some random offset
171
172		for i, r in enumerate(other_rnamodel.struc[0].get_residues()):
173		r.id = (' ', index[i] + 1, ' ')
174		if args.debug: print('r', r)
175
176		io = Bio.PDB.PDBIO()
177		sup_min.apply(other_rnamodel.struc.get_atoms())
178		# if args.debug: print(p_min, [i + 1 for i in p_min])
179		io.set_structure(other_rnamodel.struc)
180
181		args.save_here = True
182		if args.save_here and save:
183		folder = os.path.basename(self.fpath.replace('.pdb', '_' + args.folder_prefix + '_aligned'))
184		# print(f)
185		try:
186		os.mkdir(folder)
187		except:
188		pass
189		fout = folder + os.sep + "{:1.2f}".format(rmsd_min) + '-' + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '-' + str(rms) + '.pdb'))
190		#_s' + suffix + '.pdb'))
191		else:
192		fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')#_s' + suffix + '.pdb')
193
194		if args.debug: print(fout)
195
196		if save:
197		io.save(fout)
198		# ugly set chain to A
199		set_chain_for_struc(fout, 'A', save_file_inplace=True)
200		# and now run this to sort into 1 2 3
201
202		r = RNAStructure(fout)
203		remarks = r.get_remarks_text()
204		r1 = r.get_res_text('A', 1)
205		r2 = r.get_res_text('A', 2)
206		r3 = r.get_res_text('A', 3)
207		with open(fout, 'w') as f:
208		f.write(remarks)
209		f.write(r1)
210		f.write(r2)
211		f.write(r3)
212		r.reload()
213		r.get_rnapuzzle_ready()
214		if rmsd_min < 1: # !!!!!!!!!!!! ugly
215		r.write()
216		return str(rmsd_min)# + ',s' + seq_min + ',' + os.path.basename(fout)
217		else:
218		# check if number of the same atoms #
219		# if not the same then return -1 #
220		# print(len(self.atoms_for_rmsd), len(other_atoms_for_rmsd))
221		if len(self.atoms_for_rmsd) != len(other_atoms_for_rmsd):
222		return -1
223		sup.set_atoms(self.atoms_for_rmsd, other_atoms_for_rmsd)
224		rms = round(sup.rms, 2)
225
226		if save:
227		## io = Bio.PDB.PDBIO()
228		## sup.apply(self.struc.get_atoms())
229		## io.set_structure(self.struc)
230		## io.save("aligned.pdb")
231
232		io = Bio.PDB.PDBIO()
233		sup.apply(other_rnamodel.struc.get_atoms())
234		io.set_structure(other_rnamodel.struc)
235
236		args.save_here = True
237	View Code Duplication	if args.save_here and save:
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
238		f = os.path.basename(self.fpath.replace('.pdb', '_aligned'))
239		# print(f)
240		try:
241		os.mkdir(f)
242		except:
243		pass
244		# fout = f + os.sep + os.path.basename(other_rnamodel.fpath.replace('.pdb', '_aligned_s' + suffix + '.pdb'))
245		fout = f + os.sep + os.path.basename(other_rnamodel.fpath)#.replace('.pdb', '_aligned'))
246		else:
247		fout = other_rnamodel.fpath.replace('.pdb', '_aligned.pdb')
248		if save:
249		io.save(fout)
		0 ignored issues – show introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report The variable `io` does not seem to be defined in case `save` on line `226` is `False`. Are you sure this can never be the case? Loading history...
250		return rms
251
252
253	View Code Duplication	def get_rna_models_from_dir(directory):
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
254		models = []
255		if not os.path.exists(directory):
256		raise Exception('Dir does not exist! ', directory)
257		files = glob.glob(directory + "/*.pdb")
258		files_sorted = sort_nicely(files)
259		for f in files_sorted:
260		#print f
261		models.append(RNAmodel(f))
262		return models
263
264		def sort_nicely( l ):
265		""" Sort the given list in the way that humans expect.
266
267		http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/
268		"""
269		convert = lambda text: int(text) if text.isdigit() else text
270		alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
271		l.sort( key=alphanum_key )
272		return l
273
274
275	View Code Duplication	def get_parser():
		0 ignored issues – show Duplication introduced 2020-06-29 19:56 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
276		parser = argparse.ArgumentParser(
277		description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
278
279		parser.add_argument('-t',"--target",
280		help="target file")
281		parser.add_argument('--result', #default='out.rmsd',
282		help="result file")
283		parser.add_argument('--ignore-files', default='aligned', help="use to ignore files, .e.g. with 'aligned'")
284		parser.add_argument('--suffix', default='aligned', help="used with --saved, by default: aligned")
285		parser.add_argument('--way', help="e.g., backbone+sugar, c1, c1+Nx")
286		parser.add_argument('--triple-mode', help="same crazy strategy to align triples", default=True, action="store_true")
287		parser.add_argument('--column-name', help="name column for rmsd, by default 'rmsd', but can also be a name of the target file", default="rmsd")
288		parser.add_argument("-s", "--save", action="store_true", help="set suffix with --suffix, by default: aligned")
289		parser.add_argument("--folder-prefix", default = '', help="folder name, t2-3-UAU_NAME_aligned")
290
291		parser.add_argument("--debug", action="store_true")
292		parser.add_argument("--show", action="store_true", help="show the results in the terminal")
293		parser.add_argument("--sort", action="store_true", help='sort results based on rmsd (ascending), --result must be set up')
294		parser.add_argument("--tseq", help='target sequence, e.g. acu, find only triples of this sequence [use the order for the seq taken from the input PDB file, literally order of residues in a pdb file]')
295		parser.add_argument('--files', help='files', nargs='+', default=RT + '/rna_tools/tools/triplexibility/db/triples-all-v2-rpr/*.pdb')
296		return parser
297
298		# main
299	View Code Duplication	if __name__ == '__main__':
		0 ignored issues – show Duplication introduced 2022-03-26 14:52 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
300		parser = get_parser()
301		args = parser.parse_args()
302		if not args.debug:
303		ic.disable()
304
305		target = RNAmodel(args.target)
306
307		# a trick to get files be default if there is only a path (so a string ;-))
308		if not isinstance(args.files, str):
309		models = args.files
310		else:
311		import glob
312		models = glob.glob(args.files) # opts.input_dir
313
314		## tmp = []
315		## if args.ignore_files:
316		## for f in args.files:
317		## if args.debug: print(f)
318		## if args.ignore_files in f:
319		## continue
320		## tmp.append(f)
321		## models = tmp
322
323		print('# of models:', len(models))
324
325		c = 1
326		t = 'fn,' + args.column_name + '\n' # ,aligned_seq, aligned_fn\n'
327		for m in models:
328		mrna = RNAmodel(m)
329		#print r1.fn, r2.fn, r1.get_rmsd_to(r2)#, 'tmp.pdb')
330		# print(m)
331		rmsd = target.get_rmsd_to(mrna, args.way, args.triple_mode, args.save, args.tseq) #, 'tmp.pdb')
332		#except:
333		if 0:
334		print(m)
335		sys.exit(1)
336		#print rmsd
337		t += mrna.fn + ',' + str(rmsd) + '\n'
338		#break
339
340		if args.show:
341		print(t.strip()) # all the data
342
343		if args.result:
344		with open(args.result, 'w') as f:
345		f.write(t)
346
347		if args.sort:
348		import pandas as pd
349		df = pd.read_csv(args.result)
350		df = df[df.rmsd != -1]
351		df = df.sort_values('rmsd')
352		df.to_csv(args.result, index=False)
353		print('saved: %s' % args.result)
354		# print(df.to_string(index=False))
355

mmagnus / rna-tools

triplexibility2.get_rna_models_from_dir() A last analyzed 2025-12-11 14:59 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

triplexibility2.get_rna_models_from_dir() A
last analyzed 2025-12-11 14:59 UTC