| @@ 1211-1316 (lines=106) @@ | ||
| 1208 | ||
| 1209 | from rna_tools.rna_tools_config import PYMOL_PATH |
|
| 1210 | sys.path.insert(0, PYMOL_PATH) |
|
| 1211 | if args.cif2pdb: |
|
| 1212 | # quick fix - make a list on the spot |
|
| 1213 | if list != type(args.file): |
|
| 1214 | args.file = [args.file] |
|
| 1215 | ################################## |
|
| 1216 | for cif_file in args.file: |
|
| 1217 | from Bio.PDB import MMCIFParser, PDBIO |
|
| 1218 | parser = MMCIFParser() |
|
| 1219 | structure = parser.get_structure("structure_id", cif_file) |
|
| 1220 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
| 1221 | ||
| 1222 | ||
| 1223 | try: |
|
| 1224 | # Save to PDB format |
|
| 1225 | io = PDBIO() |
|
| 1226 | io.set_structure(structure) |
|
| 1227 | io.save(pdb_file) |
|
| 1228 | ||
| 1229 | print(f'saved: {pdb_file}') |
|
| 1230 | # open a file add remarks |
|
| 1231 | new_file = '' |
|
| 1232 | with open(pdb_file, 'r') as f: |
|
| 1233 | if not args.no_hr: |
|
| 1234 | new_file += add_header(version) + '\n' |
|
| 1235 | new_file += f.read() |
|
| 1236 | ||
| 1237 | with open(pdb_file, 'w') as f: |
|
| 1238 | f.write(new_file) |
|
| 1239 | ||
| 1240 | except: |
|
| 1241 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
| 1242 | ||
| 1243 | def has_high_rna_content(chain, threshold=0.8): |
|
| 1244 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
| 1245 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
| 1246 | total_residues = 0 |
|
| 1247 | rna_residues = 0 |
|
| 1248 | ||
| 1249 | # Count the total number of residues and RNA-like residues |
|
| 1250 | for residue in chain: |
|
| 1251 | total_residues += 1 |
|
| 1252 | if residue.get_resname().strip() in rna_nucleotides: |
|
| 1253 | rna_residues += 1 |
|
| 1254 | ||
| 1255 | # Calculate the proportion of RNA residues |
|
| 1256 | if total_residues == 0: |
|
| 1257 | return False # Avoid division by zero if chain has no residues |
|
| 1258 | ||
| 1259 | rna_percentage = rna_residues / total_residues |
|
| 1260 | ||
| 1261 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
| 1262 | return rna_percentage >= threshold |
|
| 1263 | ||
| 1264 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
| 1265 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
| 1266 | ||
| 1267 | # Initialize the parser |
|
| 1268 | parser = MMCIFParser() |
|
| 1269 | ||
| 1270 | # Parse the structure |
|
| 1271 | structure = parser.get_structure("structure", cif_file) |
|
| 1272 | ||
| 1273 | # Create a list of single-letter chain identifiers |
|
| 1274 | import string |
|
| 1275 | letters = list(string.ascii_uppercase) |
|
| 1276 | ||
| 1277 | for model in structure: |
|
| 1278 | for chain in model: |
|
| 1279 | if has_high_rna_content(chain): |
|
| 1280 | # New structure |
|
| 1281 | new_structure = Structure.Structure("new_structure") |
|
| 1282 | new_model = Model.Model(0) # Create a new model |
|
| 1283 | new_structure.add(new_model) # Add the new model to the new structure |
|
| 1284 | ||
| 1285 | chain_id_new = letters.pop(0) |
|
| 1286 | chain_id = chain.get_id() |
|
| 1287 | ||
| 1288 | atom_count = 0 |
|
| 1289 | for residue in chain: |
|
| 1290 | for atom in residue: |
|
| 1291 | atom_count += 1 |
|
| 1292 | ||
| 1293 | remarks = [] |
|
| 1294 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
| 1295 | ||
| 1296 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
| 1297 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
| 1298 | ||
| 1299 | chain.id = chain_id_new |
|
| 1300 | new_model.add(chain) |
|
| 1301 | ||
| 1302 | io = PDBIO() |
|
| 1303 | io.set_structure(new_structure) |
|
| 1304 | ||
| 1305 | io.save(pdb_file) |
|
| 1306 | # open a file add remarks |
|
| 1307 | new_file = '' |
|
| 1308 | with open(pdb_file, 'r') as f: |
|
| 1309 | if not args.no_hr: |
|
| 1310 | new_file += add_header(version) + '\n' |
|
| 1311 | if remarks: |
|
| 1312 | new_file += '\n'.join(remarks) + '\n' |
|
| 1313 | new_file += f.read() |
|
| 1314 | ||
| 1315 | with open(pdb_file, 'w') as f: |
|
| 1316 | f.write(new_file) |
|
| 1317 | ||
| 1318 | if args.pdb2cif: |
|
| 1319 | try: |
|
| @@ 34-139 (lines=106) @@ | ||
| 31 | return parser, version |
|
| 32 | ||
| 33 | ||
| 34 | if __name__ == '__main__': |
|
| 35 | parser, version = get_parser() |
|
| 36 | args = parser.parse_args() |
|
| 37 | ||
| 38 | if list != type(args.file): |
|
| 39 | args.file = [args.file] |
|
| 40 | ||
| 41 | for cif_file in args.file: |
|
| 42 | from Bio.PDB import MMCIFParser, PDBIO |
|
| 43 | parser = MMCIFParser() |
|
| 44 | structure = parser.get_structure("structure_id", cif_file) |
|
| 45 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
|
| 46 | ||
| 47 | try: |
|
| 48 | # Save to PDB format |
|
| 49 | io = PDBIO() |
|
| 50 | io.set_structure(structure) |
|
| 51 | io.save(pdb_file) |
|
| 52 | ||
| 53 | print(f'saved: {pdb_file}') |
|
| 54 | # open a file add remarks |
|
| 55 | new_file = '' |
|
| 56 | with open(pdb_file, 'r') as f: |
|
| 57 | if not args.no_hr: |
|
| 58 | new_file += add_header(version) + '\n' |
|
| 59 | new_file += f.read() |
|
| 60 | ||
| 61 | with open(pdb_file, 'w') as f: |
|
| 62 | f.write(new_file) |
|
| 63 | ||
| 64 | except: |
|
| 65 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
|
| 66 | def has_high_rna_content(chain, threshold=0.8): |
|
| 67 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
|
| 68 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
|
| 69 | total_residues = 0 |
|
| 70 | rna_residues = 0 |
|
| 71 | ||
| 72 | # Count the total number of residues and RNA-like residues |
|
| 73 | for residue in chain: |
|
| 74 | total_residues += 1 |
|
| 75 | if residue.get_resname().strip() in rna_nucleotides: |
|
| 76 | rna_residues += 1 |
|
| 77 | ||
| 78 | # Calculate the proportion of RNA residues |
|
| 79 | if total_residues == 0: |
|
| 80 | return False # Avoid division by zero if chain has no residues |
|
| 81 | ||
| 82 | rna_percentage = rna_residues / total_residues |
|
| 83 | ||
| 84 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
|
| 85 | return rna_percentage >= threshold |
|
| 86 | ||
| 87 | from Bio.PDB.MMCIFParser import MMCIFParser |
|
| 88 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
|
| 89 | ||
| 90 | # Initialize the parser |
|
| 91 | parser = MMCIFParser() |
|
| 92 | ||
| 93 | # Parse the structure |
|
| 94 | structure = parser.get_structure("structure", cif_file) |
|
| 95 | ||
| 96 | # Create a list of single-letter chain identifiers |
|
| 97 | import string |
|
| 98 | letters = list(string.ascii_uppercase) |
|
| 99 | ||
| 100 | for model in structure: |
|
| 101 | for chain in model: |
|
| 102 | if has_high_rna_content(chain): |
|
| 103 | # New structure |
|
| 104 | new_structure = Structure.Structure("new_structure") |
|
| 105 | new_model = Model.Model(0) # Create a new model |
|
| 106 | new_structure.add(new_model) # Add the new model to the new structure |
|
| 107 | ||
| 108 | chain_id_new = letters.pop(0) |
|
| 109 | chain_id = chain.get_id() |
|
| 110 | ||
| 111 | atom_count = 0 |
|
| 112 | for residue in chain: |
|
| 113 | for atom in residue: |
|
| 114 | atom_count += 1 |
|
| 115 | ||
| 116 | remarks = [] |
|
| 117 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
|
| 118 | ||
| 119 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
|
| 120 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
|
| 121 | ||
| 122 | chain.id = chain_id_new |
|
| 123 | new_model.add(chain) |
|
| 124 | ||
| 125 | io = PDBIO() |
|
| 126 | io.set_structure(new_structure) |
|
| 127 | ||
| 128 | io.save(pdb_file) |
|
| 129 | # open a file add remarks |
|
| 130 | new_file = '' |
|
| 131 | with open(pdb_file, 'r') as f: |
|
| 132 | if not args.no_hr: |
|
| 133 | new_file += add_header(version) + '\n' |
|
| 134 | if remarks: |
|
| 135 | new_file += '\n'.join(remarks) + '\n' |
|
| 136 | new_file += f.read() |
|
| 137 | ||
| 138 | with open(pdb_file, 'w') as f: |
|
| 139 | f.write(new_file) |
|
| 140 | ||