Code Duplication    Length = 106-106 lines in 2 locations

rna_tools/rna_cif2pdb.py 1 location

@@ 34-139 (lines=106) @@
31
    return parser, version
32
33
34
if __name__ == '__main__':
35
    parser, version = get_parser()
36
    args = parser.parse_args()
37
38
    if list != type(args.file):
39
        args.file = [args.file]
40
41
    for cif_file in args.file:
42
        from Bio.PDB import MMCIFParser, PDBIO
43
        parser = MMCIFParser()
44
        structure = parser.get_structure("structure_id", cif_file)
45
        pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
46
47
        try:
48
            # Save to PDB format
49
            io = PDBIO()
50
            io.set_structure(structure)
51
            io.save(pdb_file)
52
53
            print(f'saved: {pdb_file}')
54
            # open a file add remarks
55
            new_file = ''
56
            with open(pdb_file, 'r') as f:
57
                if not args.no_hr:
58
                    new_file += add_header(version) + '\n'
59
                new_file += f.read()
60
61
            with open(pdb_file, 'w') as f:
62
                f.write(new_file)
63
64
        except:
65
            print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
66
            def has_high_rna_content(chain, threshold=0.8):
67
                # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
68
                rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
69
                total_residues = 0
70
                rna_residues = 0
71
72
                # Count the total number of residues and RNA-like residues
73
                for residue in chain:
74
                    total_residues += 1
75
                    if residue.get_resname().strip() in rna_nucleotides:
76
                        rna_residues += 1
77
78
                # Calculate the proportion of RNA residues
79
                if total_residues == 0:
80
                    return False  # Avoid division by zero if chain has no residues
81
82
                rna_percentage = rna_residues / total_residues
83
84
                # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
85
                return rna_percentage >= threshold
86
87
            from Bio.PDB.MMCIFParser import MMCIFParser
88
            from Bio.PDB import MMCIFParser, Structure, Model, Chain
89
90
            # Initialize the parser
91
            parser = MMCIFParser()
92
93
            # Parse the structure
94
            structure = parser.get_structure("structure", cif_file)
95
96
            # Create a list of single-letter chain identifiers
97
            import string
98
            letters = list(string.ascii_uppercase)
99
100
            for model in structure:
101
                for chain in model:
102
                    if has_high_rna_content(chain):
103
                        # New structure
104
                        new_structure = Structure.Structure("new_structure")
105
                        new_model = Model.Model(0)  # Create a new model
106
                        new_structure.add(new_model)  # Add the new model to the new structure
107
108
                        chain_id_new = letters.pop(0)
109
                        chain_id = chain.get_id()
110
111
                        atom_count = 0
112
                        for residue in chain:
113
                              for atom in residue:
114
                                   atom_count += 1
115
116
                        remarks = []
117
                        remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
118
119
                        pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
120
                        print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
121
122
                        chain.id = chain_id_new
123
                        new_model.add(chain)
124
125
                        io = PDBIO()
126
                        io.set_structure(new_structure)
127
128
                        io.save(pdb_file)
129
                        # open a file add remarks
130
                        new_file = ''
131
                        with open(pdb_file, 'r') as f:
132
                            if not args.no_hr:
133
                                new_file += add_header(version) + '\n'
134
                            if remarks:
135
                                new_file += '\n'.join(remarks) + '\n'
136
                            new_file += f.read()
137
138
                        with open(pdb_file, 'w') as f:
139
                            f.write(new_file)
140

rna_tools/rna_pdb_tools.py 1 location

@@ 1346-1451 (lines=106) @@
1343
            
1344
    from rna_tools.rna_tools_config import PYMOL_PATH
1345
    sys.path.insert(0, PYMOL_PATH)
1346
    if args.cif2pdb:
1347
        # quick fix - make a list on the spot
1348
        if list != type(args.file):
1349
            args.file = [args.file]
1350
        ##################################
1351
        for cif_file in args.file:
1352
            from Bio.PDB import MMCIFParser, PDBIO
1353
            parser = MMCIFParser()
1354
            structure = parser.get_structure("structure_id", cif_file)
1355
            pdb_file = cif_file.replace('.cif', '_fCIF.pdb')
1356
1357
1358
            try:
1359
                # Save to PDB format
1360
                io = PDBIO()
1361
                io.set_structure(structure)
1362
                io.save(pdb_file)
1363
1364
                print(f'saved: {pdb_file}')
1365
                # open a file add remarks
1366
                new_file = ''
1367
                with open(pdb_file, 'r') as f:
1368
                    if not args.no_hr:
1369
                        new_file += add_header(version) + '\n'
1370
                    new_file += f.read()
1371
1372
                with open(pdb_file, 'w') as f:
1373
                    f.write(new_file)
1374
1375
            except:
1376
                print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.')
1377
1378
                def has_high_rna_content(chain, threshold=0.8):
1379
                    # RNA nucleotides: A, C, G, U, and X (you can modify as needed)
1380
                    rna_nucleotides = ['A', 'C', 'G', 'U', 'X']
1381
                    total_residues = 0
1382
                    rna_residues = 0
1383
1384
                    # Count the total number of residues and RNA-like residues
1385
                    for residue in chain:
1386
                        total_residues += 1
1387
                        if residue.get_resname().strip() in rna_nucleotides:
1388
                            rna_residues += 1
1389
1390
                    # Calculate the proportion of RNA residues
1391
                    if total_residues == 0:
1392
                        return False  # Avoid division by zero if chain has no residues
1393
1394
                    rna_percentage = rna_residues / total_residues
1395
1396
                    # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default)
1397
                    return rna_percentage >= threshold
1398
1399
                from Bio.PDB.MMCIFParser import MMCIFParser
1400
                from Bio.PDB import MMCIFParser, Structure, Model, Chain
1401
                
1402
                # Initialize the parser
1403
                parser = MMCIFParser()
1404
1405
                # Parse the structure
1406
                structure = parser.get_structure("structure", cif_file)
1407
1408
                # Create a list of single-letter chain identifiers
1409
                import string
1410
                letters = list(string.ascii_uppercase)
1411
1412
                for model in structure:
1413
                    for chain in model:
1414
                        if has_high_rna_content(chain):
1415
                            # New structure
1416
                            new_structure = Structure.Structure("new_structure")
1417
                            new_model = Model.Model(0)  # Create a new model
1418
                            new_structure.add(new_model)  # Add the new model to the new structure
1419
1420
                            chain_id_new = letters.pop(0)
1421
                            chain_id = chain.get_id()
1422
1423
                            atom_count = 0
1424
                            for residue in chain:
1425
                                  for atom in residue:
1426
                                       atom_count += 1
1427
1428
                            remarks = []
1429
                            remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}')
1430
1431
                            pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb')
1432
                            print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}')
1433
1434
                            chain.id = chain_id_new
1435
                            new_model.add(chain)
1436
1437
                            io = PDBIO()
1438
                            io.set_structure(new_structure)
1439
                            
1440
                            io.save(pdb_file)
1441
                            # open a file add remarks
1442
                            new_file = ''
1443
                            with open(pdb_file, 'r') as f:
1444
                                if not args.no_hr:
1445
                                    new_file += add_header(version) + '\n'
1446
                                if remarks:
1447
                                    new_file += '\n'.join(remarks) + '\n'
1448
                                new_file += f.read()
1449
1450
                            with open(pdb_file, 'w') as f:
1451
                                f.write(new_file)
1452
1453
    if args.pdb2cif:
1454
        try: