| @@ 34-139 (lines=106) @@ | ||
| 31 | return parser, version |
|
| 32 | ||
| 33 | ||
| 34 | if __name__ == '__main__': |
|
| 35 | parser, version = get_parser() |
|
| 36 | args = parser.parse_args() |
|
| 37 | ||
| 38 | if list != type(args.file): |
|
| 39 | args.file = [args.file] |
|
| 40 | ||
| 41 | for cif_file in args.file: |
|
| 42 | from Bio.PDB import MMCIFParser, PDBIO |
|
| 43 | parser = MMCIFParser() |
|
| 44 | structure = parser.get_structure("structure_id", cif_file) |
|
| 45 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
| 46 | ||
| 47 | try: |
|
| 48 | # Save to PDB format |
|
| 49 | io = PDBIO() |
|
| 50 | io.set_structure(structure) |
|
| 51 | io.save(pdb_file) |
|
| 52 | ||
| 53 | print(f'saved: {pdb_file}') |
|
| 54 | # open a file add remarks |
|
| 55 | new_file = '' |
|
| 56 | with open(pdb_file, 'r') as f: |
|
| 57 | if not args.no_hr: |
|
| 58 | new_file += add_header(version) + '\n' |
|
| 59 | new_file += f.read() |
|
| 60 | ||
| 61 | with open(pdb_file, 'w') as f: |
|
| 62 | f.write(new_file) |
|
| 63 | ||
| 64 | except: |
|
| 65 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
| 66 | def has_high_rna_content(chain, threshold=0.8): |
|
| 67 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
| 68 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
| 69 | total_residues = 0 |
|
| 70 | rna_residues = 0 |
|
| 71 | ||
| 72 | # Count the total number of residues and RNA-like residues |
|
| 73 | for residue in chain: |
|
| 74 | total_residues += 1 |
|
| 75 | if residue.get_resname().strip() in rna_nucleotides: |
|
| 76 | rna_residues += 1 |
|
| 77 | ||
| 78 | # Calculate the proportion of RNA residues |
|
| 79 | if total_residues == 0: |
|
| 80 | return False # Avoid division by zero if chain has no residues |
|
| 81 | ||
| 82 | rna_percentage = rna_residues / total_residues |
|
| 83 | ||
| 84 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
| 85 | return rna_percentage >= threshold |
|
| 86 | ||
| 87 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
| 88 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
| 89 | ||
| 90 | # Initialize the parser |
|
| 91 | parser = MMCIFParser() |
|
| 92 | ||
| 93 | # Parse the structure |
|
| 94 | structure = parser.get_structure("structure", cif_file) |
|
| 95 | ||
| 96 | # Create a list of single-letter chain identifiers |
|
| 97 | import string |
|
| 98 | letters = list(string.ascii_uppercase) |
|
| 99 | ||
| 100 | for model in structure: |
|
| 101 | for chain in model: |
|
| 102 | if has_high_rna_content(chain): |
|
| 103 | # New structure |
|
| 104 | new_structure = Structure.Structure("new_structure") |
|
| 105 | new_model = Model.Model(0) # Create a new model |
|
| 106 | new_structure.add(new_model) # Add the new model to the new structure |
|
| 107 | ||
| 108 | chain_id_new = letters.pop(0) |
|
| 109 | chain_id = chain.get_id() |
|
| 110 | ||
| 111 | atom_count = 0 |
|
| 112 | for residue in chain: |
|
| 113 | for atom in residue: |
|
| 114 | atom_count += 1 |
|
| 115 | ||
| 116 | remarks = [] |
|
| 117 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
| 118 | ||
| 119 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
| 120 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
| 121 | ||
| 122 | chain.id = chain_id_new |
|
| 123 | new_model.add(chain) |
|
| 124 | ||
| 125 | io = PDBIO() |
|
| 126 | io.set_structure(new_structure) |
|
| 127 | ||
| 128 | io.save(pdb_file) |
|
| 129 | # open a file add remarks |
|
| 130 | new_file = '' |
|
| 131 | with open(pdb_file, 'r') as f: |
|
| 132 | if not args.no_hr: |
|
| 133 | new_file += add_header(version) + '\n' |
|
| 134 | if remarks: |
|
| 135 | new_file += '\n'.join(remarks) + '\n' |
|
| 136 | new_file += f.read() |
|
| 137 | ||
| 138 | with open(pdb_file, 'w') as f: |
|
| 139 | f.write(new_file) |
|
| 140 | ||
| @@ 1346-1451 (lines=106) @@ | ||
| 1343 | ||
| 1344 | from rna_tools.rna_tools_config import PYMOL_PATH |
|
| 1345 | sys.path.insert(0, PYMOL_PATH) |
|
| 1346 | if args.cif2pdb: |
|
| 1347 | # quick fix - make a list on the spot |
|
| 1348 | if list != type(args.file): |
|
| 1349 | args.file = [args.file] |
|
| 1350 | ################################## |
|
| 1351 | for cif_file in args.file: |
|
| 1352 | from Bio.PDB import MMCIFParser, PDBIO |
|
| 1353 | parser = MMCIFParser() |
|
| 1354 | structure = parser.get_structure("structure_id", cif_file) |
|
| 1355 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
| 1356 | ||
| 1357 | ||
| 1358 | try: |
|
| 1359 | # Save to PDB format |
|
| 1360 | io = PDBIO() |
|
| 1361 | io.set_structure(structure) |
|
| 1362 | io.save(pdb_file) |
|
| 1363 | ||
| 1364 | print(f'saved: {pdb_file}') |
|
| 1365 | # open a file add remarks |
|
| 1366 | new_file = '' |
|
| 1367 | with open(pdb_file, 'r') as f: |
|
| 1368 | if not args.no_hr: |
|
| 1369 | new_file += add_header(version) + '\n' |
|
| 1370 | new_file += f.read() |
|
| 1371 | ||
| 1372 | with open(pdb_file, 'w') as f: |
|
| 1373 | f.write(new_file) |
|
| 1374 | ||
| 1375 | except: |
|
| 1376 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
| 1377 | ||
| 1378 | def has_high_rna_content(chain, threshold=0.8): |
|
| 1379 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
| 1380 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
| 1381 | total_residues = 0 |
|
| 1382 | rna_residues = 0 |
|
| 1383 | ||
| 1384 | # Count the total number of residues and RNA-like residues |
|
| 1385 | for residue in chain: |
|
| 1386 | total_residues += 1 |
|
| 1387 | if residue.get_resname().strip() in rna_nucleotides: |
|
| 1388 | rna_residues += 1 |
|
| 1389 | ||
| 1390 | # Calculate the proportion of RNA residues |
|
| 1391 | if total_residues == 0: |
|
| 1392 | return False # Avoid division by zero if chain has no residues |
|
| 1393 | ||
| 1394 | rna_percentage = rna_residues / total_residues |
|
| 1395 | ||
| 1396 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
| 1397 | return rna_percentage >= threshold |
|
| 1398 | ||
| 1399 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
| 1400 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
| 1401 | ||
| 1402 | # Initialize the parser |
|
| 1403 | parser = MMCIFParser() |
|
| 1404 | ||
| 1405 | # Parse the structure |
|
| 1406 | structure = parser.get_structure("structure", cif_file) |
|
| 1407 | ||
| 1408 | # Create a list of single-letter chain identifiers |
|
| 1409 | import string |
|
| 1410 | letters = list(string.ascii_uppercase) |
|
| 1411 | ||
| 1412 | for model in structure: |
|
| 1413 | for chain in model: |
|
| 1414 | if has_high_rna_content(chain): |
|
| 1415 | # New structure |
|
| 1416 | new_structure = Structure.Structure("new_structure") |
|
| 1417 | new_model = Model.Model(0) # Create a new model |
|
| 1418 | new_structure.add(new_model) # Add the new model to the new structure |
|
| 1419 | ||
| 1420 | chain_id_new = letters.pop(0) |
|
| 1421 | chain_id = chain.get_id() |
|
| 1422 | ||
| 1423 | atom_count = 0 |
|
| 1424 | for residue in chain: |
|
| 1425 | for atom in residue: |
|
| 1426 | atom_count += 1 |
|
| 1427 | ||
| 1428 | remarks = [] |
|
| 1429 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
| 1430 | ||
| 1431 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
| 1432 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
| 1433 | ||
| 1434 | chain.id = chain_id_new |
|
| 1435 | new_model.add(chain) |
|
| 1436 | ||
| 1437 | io = PDBIO() |
|
| 1438 | io.set_structure(new_structure) |
|
| 1439 | ||
| 1440 | io.save(pdb_file) |
|
| 1441 | # open a file add remarks |
|
| 1442 | new_file = '' |
|
| 1443 | with open(pdb_file, 'r') as f: |
|
| 1444 | if not args.no_hr: |
|
| 1445 | new_file += add_header(version) + '\n' |
|
| 1446 | if remarks: |
|
| 1447 | new_file += '\n'.join(remarks) + '\n' |
|
| 1448 | new_file += f.read() |
|
| 1449 | ||
| 1450 | with open(pdb_file, 'w') as f: |
|
| 1451 | f.write(new_file) |
|
| 1452 | ||
| 1453 | if args.pdb2cif: |
|
| 1454 | try: |
|