Code Duplication - mmagnus/rna-tools - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 106-106 lines in 2 locations

rna_tools/rna_cif2pdb.py 1 location


    return parser, version


if __name__ == '__main__':
    parser, version = get_parser()
    args = parser.parse_args()

    if list != type(args.file):
        args.file = [args.file]

    for cif_file in args.file:
        from Bio.PDB import MMCIFParser, PDBIO
        parser = MMCIFParser()
        structure = parser.get_structure("structure_id", cif_file)
        pdb_file = cif_file.replace('.cif', '_fCIF.pdb')

        try:
            # Save to PDB format
            io = PDBIO()
            io.set_structure(structure)
            io.save(pdb_file)

            print(f'saved: {pdb_file}')
            # open a file add remarks
            new_file = ''
            with open(pdb_file, 'r') as f:
                if not args.no_hr:
                    new_file += add_header(version) + '\n'
                new_file += f.read()

            with open(pdb_file, 'w') as f:
                f.write(new_file)

        except:
            print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
            def has_high_rna_content(chain, threshold=0.8):
                # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
                rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
                total_residues = 0
                rna_residues = 0

                # Count the total number of residues and RNA-like residues
                for residue in chain:
                    total_residues += 1
                    if residue.get_resname().strip() in rna_nucleotides:
                        rna_residues += 1

                # Calculate the proportion of RNA residues
                if total_residues == 0:
                    return False  # Avoid division by zero if chain has no residues

                rna_percentage = rna_residues / total_residues

                # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
                return rna_percentage >= threshold

            from Bio.PDB.MMCIFParser import MMCIFParser
            from Bio.PDB import MMCIFParser, Structure, Model, Chain

            # Initialize the parser
            parser = MMCIFParser()

            # Parse the structure
            structure = parser.get_structure("structure", cif_file)

            # Create a list of single-letter chain identifiers
            import string
            letters = list(string.ascii_uppercase)

            for model in structure:
                for chain in model:
                    if has_high_rna_content(chain):
                        # New structure
                        new_structure = Structure.Structure("new_structure")
                        new_model = Model.Model(0)  # Create a new model
                        new_structure.add(new_model)  # Add the new model to the new structure

                        chain_id_new = letters.pop(0)
                        chain_id = chain.get_id()

                        atom_count = 0
                        for residue in chain:
                              for atom in residue:
                                   atom_count += 1

                        remarks = []
                        remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')

                        pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
                        print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')

                        chain.id = chain_id_new
                        new_model.add(chain)

                        io = PDBIO()
                        io.set_structure(new_structure)

                        io.save(pdb_file)
                        # open a file add remarks
                        new_file = ''
                        with open(pdb_file, 'r') as f:
                            if not args.no_hr:
                                new_file += add_header(version) + '\n'
                            if remarks:
                                new_file += '\n'.join(remarks) + '\n'
                            new_file += f.read()

                        with open(pdb_file, 'w') as f:
                            f.write(new_file)


rna_tools/rna_pdb_tools.py 1 location


            
    from rna_tools.rna_tools_config import PYMOL_PATH
    sys.path.insert(0, PYMOL_PATH)
    if args.cif2pdb:
        # quick fix - make a list on the spot
        if list != type(args.file):
            args.file = [args.file]
        ##################################
        for cif_file in args.file:
            from Bio.PDB import MMCIFParser, PDBIO
            parser = MMCIFParser()
            structure = parser.get_structure("structure_id", cif_file)
            pdb_file = cif_file.replace('.cif', '_fCIF.pdb')


            try:
                # Save to PDB format
                io = PDBIO()
                io.set_structure(structure)
                io.save(pdb_file)

                print(f'saved: {pdb_file}')
                # open a file add remarks
                new_file = ''
                with open(pdb_file, 'r') as f:
                    if not args.no_hr:
                        new_file += add_header(version) + '\n'
                    new_file += f.read()

                with open(pdb_file, 'w') as f:
                    f.write(new_file)

            except:
                print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')

                def has_high_rna_content(chain, threshold=0.8):
                    # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
                    rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
                    total_residues = 0
                    rna_residues = 0

                    # Count the total number of residues and RNA-like residues
                    for residue in chain:
                        total_residues += 1
                        if residue.get_resname().strip() in rna_nucleotides:
                            rna_residues += 1

                    # Calculate the proportion of RNA residues
                    if total_residues == 0:
                        return False  # Avoid division by zero if chain has no residues

                    rna_percentage = rna_residues / total_residues

                    # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
                    return rna_percentage >= threshold

                from Bio.PDB.MMCIFParser import MMCIFParser
                from Bio.PDB import MMCIFParser, Structure, Model, Chain
                
                # Initialize the parser
                parser = MMCIFParser()

                # Parse the structure
                structure = parser.get_structure("structure", cif_file)

                # Create a list of single-letter chain identifiers
                import string
                letters = list(string.ascii_uppercase)

                for model in structure:
                    for chain in model:
                        if has_high_rna_content(chain):
                            # New structure
                            new_structure = Structure.Structure("new_structure")
                            new_model = Model.Model(0)  # Create a new model
                            new_structure.add(new_model)  # Add the new model to the new structure

                            chain_id_new = letters.pop(0)
                            chain_id = chain.get_id()

                            atom_count = 0
                            for residue in chain:
                                  for atom in residue:
                                       atom_count += 1

                            remarks = []
                            remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')

                            pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
                            print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')

                            chain.id = chain_id_new
                            new_model.add(chain)

                            io = PDBIO()
                            io.set_structure(new_structure)
                            
                            io.save(pdb_file)
                            # open a file add remarks
                            new_file = ''
                            with open(pdb_file, 'r') as f:
                                if not args.no_hr:
                                    new_file += add_header(version) + '\n'
                                if remarks:
                                    new_file += '\n'.join(remarks) + '\n'
                                new_file += f.read()

                            with open(pdb_file, 'w') as f:
                                f.write(new_file)

    if args.pdb2cif:
        try:

		@@ 34-139 (lines=106) @@
31		return parser, version
32
33
34		if __name__ == '__main__':
35		parser, version = get_parser()
36		args = parser.parse_args()
37
38		if list != type(args.file):
39		args.file = [args.file]
40
41		for cif_file in args.file:
42		from Bio.PDB import MMCIFParser, PDBIO
43		parser = MMCIFParser()
44		structure = parser.get_structure("structure_id", cif_file)
45		pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
46
47		try:
48		# Save to PDB format
49		io = PDBIO()
50		io.set_structure(structure)
51		io.save(pdb_file)
52
53		print(f'saved: {pdb_file}')
54		# open a file add remarks
55		new_file = ''
56		with open(pdb_file, 'r') as f:
57		if not args.no_hr:
58		new_file += add_header(version) + '\n'
59		new_file += f.read()
60
61		with open(pdb_file, 'w') as f:
62		f.write(new_file)
63
64		except:
65		print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
66		def has_high_rna_content(chain, threshold=0.8):
67		# RNA nucleotides: A, C, G, U, and X (you can modify as needed)
68		rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
69		total_residues = 0
70		rna_residues = 0
71
72		# Count the total number of residues and RNA-like residues
73		for residue in chain:
74		total_residues += 1
75		if residue.get_resname().strip() in rna_nucleotides:
76		rna_residues += 1
77
78		# Calculate the proportion of RNA residues
79		if total_residues == 0:
80		return False # Avoid division by zero if chain has no residues
81
82		rna_percentage = rna_residues / total_residues
83
84		# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
85		return rna_percentage >= threshold
86
87		from Bio.PDB.MMCIFParser import MMCIFParser
88		from Bio.PDB import MMCIFParser, Structure, Model, Chain
89
90		# Initialize the parser
91		parser = MMCIFParser()
92
93		# Parse the structure
94		structure = parser.get_structure("structure", cif_file)
95
96		# Create a list of single-letter chain identifiers
97		import string
98		letters = list(string.ascii_uppercase)
99
100		for model in structure:
101		for chain in model:
102		if has_high_rna_content(chain):
103		# New structure
104		new_structure = Structure.Structure("new_structure")
105		new_model = Model.Model(0) # Create a new model
106		new_structure.add(new_model) # Add the new model to the new structure
107
108		chain_id_new = letters.pop(0)
109		chain_id = chain.get_id()
110
111		atom_count = 0
112		for residue in chain:
113		for atom in residue:
114		atom_count += 1
115
116		remarks = []
117		remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
118
119		pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
120		print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
121
122		chain.id = chain_id_new
123		new_model.add(chain)
124
125		io = PDBIO()
126		io.set_structure(new_structure)
127
128		io.save(pdb_file)
129		# open a file add remarks
130		new_file = ''
131		with open(pdb_file, 'r') as f:
132		if not args.no_hr:
133		new_file += add_header(version) + '\n'
134		if remarks:
135		new_file += '\n'.join(remarks) + '\n'
136		new_file += f.read()
137
138		with open(pdb_file, 'w') as f:
139		f.write(new_file)
140

		@@ 1346-1451 (lines=106) @@
1343
1344		from rna_tools.rna_tools_config import PYMOL_PATH
1345		sys.path.insert(0, PYMOL_PATH)
1346		if args.cif2pdb:
1347		# quick fix - make a list on the spot
1348		if list != type(args.file):
1349		args.file = [args.file]
1350		##################################
1351		for cif_file in args.file:
1352		from Bio.PDB import MMCIFParser, PDBIO
1353		parser = MMCIFParser()
1354		structure = parser.get_structure("structure_id", cif_file)
1355		pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
1356
1357
1358		try:
1359		# Save to PDB format
1360		io = PDBIO()
1361		io.set_structure(structure)
1362		io.save(pdb_file)
1363
1364		print(f'saved: {pdb_file}')
1365		# open a file add remarks
1366		new_file = ''
1367		with open(pdb_file, 'r') as f:
1368		if not args.no_hr:
1369		new_file += add_header(version) + '\n'
1370		new_file += f.read()
1371
1372		with open(pdb_file, 'w') as f:
1373		f.write(new_file)
1374
1375		except:
1376		print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
1377
1378		def has_high_rna_content(chain, threshold=0.8):
1379		# RNA nucleotides: A, C, G, U, and X (you can modify as needed)
1380		rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
1381		total_residues = 0
1382		rna_residues = 0
1383
1384		# Count the total number of residues and RNA-like residues
1385		for residue in chain:
1386		total_residues += 1
1387		if residue.get_resname().strip() in rna_nucleotides:
1388		rna_residues += 1
1389
1390		# Calculate the proportion of RNA residues
1391		if total_residues == 0:
1392		return False # Avoid division by zero if chain has no residues
1393
1394		rna_percentage = rna_residues / total_residues
1395
1396		# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
1397		return rna_percentage >= threshold
1398
1399		from Bio.PDB.MMCIFParser import MMCIFParser
1400		from Bio.PDB import MMCIFParser, Structure, Model, Chain
1401
1402		# Initialize the parser
1403		parser = MMCIFParser()
1404
1405		# Parse the structure
1406		structure = parser.get_structure("structure", cif_file)
1407
1408		# Create a list of single-letter chain identifiers
1409		import string
1410		letters = list(string.ascii_uppercase)
1411
1412		for model in structure:
1413		for chain in model:
1414		if has_high_rna_content(chain):
1415		# New structure
1416		new_structure = Structure.Structure("new_structure")
1417		new_model = Model.Model(0) # Create a new model
1418		new_structure.add(new_model) # Add the new model to the new structure
1419
1420		chain_id_new = letters.pop(0)
1421		chain_id = chain.get_id()
1422
1423		atom_count = 0
1424		for residue in chain:
1425		for atom in residue:
1426		atom_count += 1
1427
1428		remarks = []
1429		remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
1430
1431		pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
1432		print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
1433
1434		chain.id = chain_id_new
1435		new_model.add(chain)
1436
1437		io = PDBIO()
1438		io.set_structure(new_structure)
1439
1440		io.save(pdb_file)
1441		# open a file add remarks
1442		new_file = ''
1443		with open(pdb_file, 'r') as f:
1444		if not args.no_hr:
1445		new_file += add_header(version) + '\n'
1446		if remarks:
1447		new_file += '\n'.join(remarks) + '\n'
1448		new_file += f.read()
1449
1450		with open(pdb_file, 'w') as f:
1451		f.write(new_file)
1452
1453		if args.pdb2cif:
1454		try:

mmagnus / rna-tools

Code Duplication Length = 106-106 lines in 2 locations

rna_tools/rna_cif2pdb.py 1 location

rna_tools/rna_pdb_tools.py 1 location