@@ 34-139 (lines=106) @@ | ||
31 | return parser, version |
|
32 | ||
33 | ||
34 | if __name__ == '__main__': |
|
35 | parser, version = get_parser() |
|
36 | args = parser.parse_args() |
|
37 | ||
38 | if list != type(args.file): |
|
39 | args.file = [args.file] |
|
40 | ||
41 | for cif_file in args.file: |
|
42 | from Bio.PDB import MMCIFParser, PDBIO |
|
43 | parser = MMCIFParser() |
|
44 | structure = parser.get_structure("structure_id", cif_file) |
|
45 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
46 | ||
47 | try: |
|
48 | # Save to PDB format |
|
49 | io = PDBIO() |
|
50 | io.set_structure(structure) |
|
51 | io.save(pdb_file) |
|
52 | ||
53 | print(f'saved: {pdb_file}') |
|
54 | # open a file add remarks |
|
55 | new_file = '' |
|
56 | with open(pdb_file, 'r') as f: |
|
57 | if not args.no_hr: |
|
58 | new_file += add_header(version) + '\n' |
|
59 | new_file += f.read() |
|
60 | ||
61 | with open(pdb_file, 'w') as f: |
|
62 | f.write(new_file) |
|
63 | ||
64 | except: |
|
65 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
66 | def has_high_rna_content(chain, threshold=0.8): |
|
67 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
68 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
69 | total_residues = 0 |
|
70 | rna_residues = 0 |
|
71 | ||
72 | # Count the total number of residues and RNA-like residues |
|
73 | for residue in chain: |
|
74 | total_residues += 1 |
|
75 | if residue.get_resname().strip() in rna_nucleotides: |
|
76 | rna_residues += 1 |
|
77 | ||
78 | # Calculate the proportion of RNA residues |
|
79 | if total_residues == 0: |
|
80 | return False # Avoid division by zero if chain has no residues |
|
81 | ||
82 | rna_percentage = rna_residues / total_residues |
|
83 | ||
84 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
85 | return rna_percentage >= threshold |
|
86 | ||
87 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
88 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
89 | ||
90 | # Initialize the parser |
|
91 | parser = MMCIFParser() |
|
92 | ||
93 | # Parse the structure |
|
94 | structure = parser.get_structure("structure", cif_file) |
|
95 | ||
96 | # Create a list of single-letter chain identifiers |
|
97 | import string |
|
98 | letters = list(string.ascii_uppercase) |
|
99 | ||
100 | for model in structure: |
|
101 | for chain in model: |
|
102 | if has_high_rna_content(chain): |
|
103 | # New structure |
|
104 | new_structure = Structure.Structure("new_structure") |
|
105 | new_model = Model.Model(0) # Create a new model |
|
106 | new_structure.add(new_model) # Add the new model to the new structure |
|
107 | ||
108 | chain_id_new = letters.pop(0) |
|
109 | chain_id = chain.get_id() |
|
110 | ||
111 | atom_count = 0 |
|
112 | for residue in chain: |
|
113 | for atom in residue: |
|
114 | atom_count += 1 |
|
115 | ||
116 | remarks = [] |
|
117 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
118 | ||
119 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
120 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
121 | ||
122 | chain.id = chain_id_new |
|
123 | new_model.add(chain) |
|
124 | ||
125 | io = PDBIO() |
|
126 | io.set_structure(new_structure) |
|
127 | ||
128 | io.save(pdb_file) |
|
129 | # open a file add remarks |
|
130 | new_file = '' |
|
131 | with open(pdb_file, 'r') as f: |
|
132 | if not args.no_hr: |
|
133 | new_file += add_header(version) + '\n' |
|
134 | if remarks: |
|
135 | new_file += '\n'.join(remarks) + '\n' |
|
136 | new_file += f.read() |
|
137 | ||
138 | with open(pdb_file, 'w') as f: |
|
139 | f.write(new_file) |
|
140 |
@@ 1352-1457 (lines=106) @@ | ||
1349 | ||
1350 | from rna_tools.rna_tools_config import PYMOL_PATH |
|
1351 | sys.path.insert(0, PYMOL_PATH) |
|
1352 | if args.cif2pdb: |
|
1353 | # quick fix - make a list on the spot |
|
1354 | if list != type(args.file): |
|
1355 | args.file = [args.file] |
|
1356 | ################################## |
|
1357 | for cif_file in args.file: |
|
1358 | from Bio.PDB import MMCIFParser, PDBIO |
|
1359 | parser = MMCIFParser() |
|
1360 | structure = parser.get_structure("structure_id", cif_file) |
|
1361 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
1362 | ||
1363 | ||
1364 | try: |
|
1365 | # Save to PDB format |
|
1366 | io = PDBIO() |
|
1367 | io.set_structure(structure) |
|
1368 | io.save(pdb_file) |
|
1369 | ||
1370 | print(f'saved: {pdb_file}') |
|
1371 | # open a file add remarks |
|
1372 | new_file = '' |
|
1373 | with open(pdb_file, 'r') as f: |
|
1374 | if not args.no_hr: |
|
1375 | new_file += add_header(version) + '\n' |
|
1376 | new_file += f.read() |
|
1377 | ||
1378 | with open(pdb_file, 'w') as f: |
|
1379 | f.write(new_file) |
|
1380 | ||
1381 | except: |
|
1382 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
1383 | ||
1384 | def has_high_rna_content(chain, threshold=0.8): |
|
1385 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
1386 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
1387 | total_residues = 0 |
|
1388 | rna_residues = 0 |
|
1389 | ||
1390 | # Count the total number of residues and RNA-like residues |
|
1391 | for residue in chain: |
|
1392 | total_residues += 1 |
|
1393 | if residue.get_resname().strip() in rna_nucleotides: |
|
1394 | rna_residues += 1 |
|
1395 | ||
1396 | # Calculate the proportion of RNA residues |
|
1397 | if total_residues == 0: |
|
1398 | return False # Avoid division by zero if chain has no residues |
|
1399 | ||
1400 | rna_percentage = rna_residues / total_residues |
|
1401 | ||
1402 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
1403 | return rna_percentage >= threshold |
|
1404 | ||
1405 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
1406 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
1407 | ||
1408 | # Initialize the parser |
|
1409 | parser = MMCIFParser() |
|
1410 | ||
1411 | # Parse the structure |
|
1412 | structure = parser.get_structure("structure", cif_file) |
|
1413 | ||
1414 | # Create a list of single-letter chain identifiers |
|
1415 | import string |
|
1416 | letters = list(string.ascii_uppercase) |
|
1417 | ||
1418 | for model in structure: |
|
1419 | for chain in model: |
|
1420 | if has_high_rna_content(chain): |
|
1421 | # New structure |
|
1422 | new_structure = Structure.Structure("new_structure") |
|
1423 | new_model = Model.Model(0) # Create a new model |
|
1424 | new_structure.add(new_model) # Add the new model to the new structure |
|
1425 | ||
1426 | chain_id_new = letters.pop(0) |
|
1427 | chain_id = chain.get_id() |
|
1428 | ||
1429 | atom_count = 0 |
|
1430 | for residue in chain: |
|
1431 | for atom in residue: |
|
1432 | atom_count += 1 |
|
1433 | ||
1434 | remarks = [] |
|
1435 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
1436 | ||
1437 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
1438 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
1439 | ||
1440 | chain.id = chain_id_new |
|
1441 | new_model.add(chain) |
|
1442 | ||
1443 | io = PDBIO() |
|
1444 | io.set_structure(new_structure) |
|
1445 | ||
1446 | io.save(pdb_file) |
|
1447 | # open a file add remarks |
|
1448 | new_file = '' |
|
1449 | with open(pdb_file, 'r') as f: |
|
1450 | if not args.no_hr: |
|
1451 | new_file += add_header(version) + '\n' |
|
1452 | if remarks: |
|
1453 | new_file += '\n'.join(remarks) + '\n' |
|
1454 | new_file += f.read() |
|
1455 | ||
1456 | with open(pdb_file, 'w') as f: |
|
1457 | f.write(new_file) |
|
1458 | ||
1459 | if args.pdb2cif: |
|
1460 | try: |