1
|
|
|
#!/usr/bin/env python |
2
|
|
|
# -*- coding: utf-8 -*- |
3
|
|
|
""" |
4
|
|
|
|
5
|
|
|
""" |
6
|
|
|
from __future__ import print_function |
7
|
|
|
import argparse |
8
|
|
|
from icecream import ic |
9
|
|
|
import sys |
10
|
|
|
ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr)) |
11
|
|
|
ic.configureOutput(prefix='> ') |
12
|
|
|
|
13
|
|
|
from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version |
14
|
|
|
import os |
15
|
|
|
|
16
|
|
|
def get_parser(): |
17
|
|
|
parser = argparse.ArgumentParser( |
18
|
|
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
19
|
|
|
|
20
|
|
|
version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
21
|
|
|
version = version[1].strip() |
22
|
|
|
parser = argparse.ArgumentParser( |
23
|
|
|
description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
24
|
|
|
|
25
|
|
|
parser.add_argument('--version', help='', action='version', version=version) |
26
|
|
|
parser.add_argument("-v", "--verbose", |
27
|
|
|
action="store_true", help="be verbose") |
28
|
|
|
parser.add_argument('--no-hr', help='do not insert the header into files', |
29
|
|
|
action='store_true') |
30
|
|
|
parser.add_argument("file", help="", default="", nargs='+') |
31
|
|
|
return parser, version |
32
|
|
|
|
33
|
|
|
|
34
|
|
View Code Duplication |
if __name__ == '__main__': |
|
|
|
|
35
|
|
|
parser, version = get_parser() |
36
|
|
|
args = parser.parse_args() |
37
|
|
|
|
38
|
|
|
if list != type(args.file): |
39
|
|
|
args.file = [args.file] |
40
|
|
|
|
41
|
|
|
for cif_file in args.file: |
42
|
|
|
from Bio.PDB import MMCIFParser, PDBIO |
43
|
|
|
parser = MMCIFParser() |
44
|
|
|
structure = parser.get_structure("structure_id", cif_file) |
45
|
|
|
pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
46
|
|
|
|
47
|
|
|
try: |
48
|
|
|
# Save to PDB format |
49
|
|
|
io = PDBIO() |
50
|
|
|
io.set_structure(structure) |
51
|
|
|
io.save(pdb_file) |
52
|
|
|
|
53
|
|
|
print(f'saved: {pdb_file}') |
54
|
|
|
# open a file add remarks |
55
|
|
|
new_file = '' |
56
|
|
|
with open(pdb_file, 'r') as f: |
57
|
|
|
if not args.no_hr: |
58
|
|
|
new_file += add_header(version) + '\n' |
59
|
|
|
new_file += f.read() |
60
|
|
|
|
61
|
|
|
with open(pdb_file, 'w') as f: |
62
|
|
|
f.write(new_file) |
63
|
|
|
|
64
|
|
|
except: |
65
|
|
|
print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
66
|
|
|
def has_high_rna_content(chain, threshold=0.8): |
67
|
|
|
# RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
68
|
|
|
rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
69
|
|
|
total_residues = 0 |
70
|
|
|
rna_residues = 0 |
71
|
|
|
|
72
|
|
|
# Count the total number of residues and RNA-like residues |
73
|
|
|
for residue in chain: |
74
|
|
|
total_residues += 1 |
75
|
|
|
if residue.get_resname().strip() in rna_nucleotides: |
76
|
|
|
rna_residues += 1 |
77
|
|
|
|
78
|
|
|
# Calculate the proportion of RNA residues |
79
|
|
|
if total_residues == 0: |
80
|
|
|
return False # Avoid division by zero if chain has no residues |
81
|
|
|
|
82
|
|
|
rna_percentage = rna_residues / total_residues |
83
|
|
|
|
84
|
|
|
# Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
85
|
|
|
return rna_percentage >= threshold |
86
|
|
|
|
87
|
|
|
from Bio.PDB.MMCIFParser import MMCIFParser |
88
|
|
|
from Bio.PDB import MMCIFParser, Structure, Model, Chain |
89
|
|
|
|
90
|
|
|
# Initialize the parser |
91
|
|
|
parser = MMCIFParser() |
92
|
|
|
|
93
|
|
|
# Parse the structure |
94
|
|
|
structure = parser.get_structure("structure", cif_file) |
95
|
|
|
|
96
|
|
|
# Create a list of single-letter chain identifiers |
97
|
|
|
import string |
98
|
|
|
letters = list(string.ascii_uppercase) |
99
|
|
|
|
100
|
|
|
for model in structure: |
101
|
|
|
for chain in model: |
102
|
|
|
if has_high_rna_content(chain): |
103
|
|
|
# New structure |
104
|
|
|
new_structure = Structure.Structure("new_structure") |
105
|
|
|
new_model = Model.Model(0) # Create a new model |
106
|
|
|
new_structure.add(new_model) # Add the new model to the new structure |
107
|
|
|
|
108
|
|
|
chain_id_new = letters.pop(0) |
109
|
|
|
chain_id = chain.get_id() |
110
|
|
|
|
111
|
|
|
atom_count = 0 |
112
|
|
|
for residue in chain: |
113
|
|
|
for atom in residue: |
114
|
|
|
atom_count += 1 |
115
|
|
|
|
116
|
|
|
remarks = [] |
117
|
|
|
remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
118
|
|
|
|
119
|
|
|
pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
120
|
|
|
print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
121
|
|
|
|
122
|
|
|
chain.id = chain_id_new |
123
|
|
|
new_model.add(chain) |
124
|
|
|
|
125
|
|
|
io = PDBIO() |
126
|
|
|
io.set_structure(new_structure) |
127
|
|
|
|
128
|
|
|
io.save(pdb_file) |
129
|
|
|
# open a file add remarks |
130
|
|
|
new_file = '' |
131
|
|
|
with open(pdb_file, 'r') as f: |
132
|
|
|
if not args.no_hr: |
133
|
|
|
new_file += add_header(version) + '\n' |
134
|
|
|
if remarks: |
135
|
|
|
new_file += '\n'.join(remarks) + '\n' |
136
|
|
|
new_file += f.read() |
137
|
|
|
|
138
|
|
|
with open(pdb_file, 'w') as f: |
139
|
|
|
f.write(new_file) |
140
|
|
|
|