1 | #!/usr/bin/env python |
||
2 | # -*- coding: utf-8 -*- |
||
3 | """ |
||
4 | |||
5 | """ |
||
6 | from __future__ import print_function |
||
7 | import argparse |
||
8 | from icecream import ic |
||
9 | import sys |
||
10 | ic.configureOutput(outputFunction=lambda *a: print(*a, file=sys.stderr)) |
||
11 | ic.configureOutput(prefix='> ') |
||
12 | |||
13 | from rna_tools.rna_tools_lib import edit_pdb, add_header, get_version |
||
14 | import os |
||
15 | |||
16 | def get_parser(): |
||
17 | parser = argparse.ArgumentParser( |
||
18 | description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
||
19 | |||
20 | version = os.path.basename(os.path.dirname(os.path.abspath(__file__))), get_version(__file__) |
||
21 | version = version[1].strip() |
||
22 | parser = argparse.ArgumentParser( |
||
23 | description=__doc__, formatter_class=argparse.RawTextHelpFormatter) |
||
24 | |||
25 | parser.add_argument('--version', help='', action='version', version=version) |
||
26 | parser.add_argument("-v", "--verbose", |
||
27 | action="store_true", help="be verbose") |
||
28 | parser.add_argument('--no-hr', help='do not insert the header into files', |
||
29 | action='store_true') |
||
30 | parser.add_argument("file", help="", default="", nargs='+') |
||
31 | return parser, version |
||
32 | |||
33 | |||
34 | View Code Duplication | if __name__ == '__main__': |
|
0 ignored issues
–
show
Duplication
introduced
by
![]() |
|||
35 | parser, version = get_parser() |
||
36 | args = parser.parse_args() |
||
37 | |||
38 | if list != type(args.file): |
||
39 | args.file = [args.file] |
||
40 | |||
41 | for cif_file in args.file: |
||
42 | from Bio.PDB import MMCIFParser, PDBIO |
||
43 | parser = MMCIFParser() |
||
44 | structure = parser.get_structure("structure_id", cif_file) |
||
45 | pdb_file = cif_file.replace('.cif', '_fCIF.pdb') |
||
46 | |||
47 | try: |
||
48 | # Save to PDB format |
||
49 | io = PDBIO() |
||
50 | io.set_structure(structure) |
||
51 | io.save(pdb_file) |
||
52 | |||
53 | print(f'saved: {pdb_file}') |
||
54 | # open a file add remarks |
||
55 | new_file = '' |
||
56 | with open(pdb_file, 'r') as f: |
||
57 | if not args.no_hr: |
||
58 | new_file += add_header(version) + '\n' |
||
59 | new_file += f.read() |
||
60 | |||
61 | with open(pdb_file, 'w') as f: |
||
62 | f.write(new_file) |
||
63 | |||
64 | except: |
||
65 | print('Warning: some of the chains in this mmCIF file has chain names with more char than 1, e.g. AB, and the PDB format needs single-letter code, e.g. A.') |
||
66 | def has_high_rna_content(chain, threshold=0.8): |
||
67 | # RNA nucleotides: A, C, G, U, and X (you can modify as needed) |
||
68 | rna_nucleotides = ['A', 'C', 'G', 'U', 'X'] |
||
69 | total_residues = 0 |
||
70 | rna_residues = 0 |
||
71 | |||
72 | # Count the total number of residues and RNA-like residues |
||
73 | for residue in chain: |
||
74 | total_residues += 1 |
||
75 | if residue.get_resname().strip() in rna_nucleotides: |
||
76 | rna_residues += 1 |
||
77 | |||
78 | # Calculate the proportion of RNA residues |
||
79 | if total_residues == 0: |
||
80 | return False # Avoid division by zero if chain has no residues |
||
81 | |||
82 | rna_percentage = rna_residues / total_residues |
||
83 | |||
84 | # Check if the percentage of RNA residues is greater than or equal to the threshold (80% by default) |
||
85 | return rna_percentage >= threshold |
||
86 | |||
87 | from Bio.PDB.MMCIFParser import MMCIFParser |
||
88 | from Bio.PDB import MMCIFParser, Structure, Model, Chain |
||
89 | |||
90 | # Initialize the parser |
||
91 | parser = MMCIFParser() |
||
92 | |||
93 | # Parse the structure |
||
94 | structure = parser.get_structure("structure", cif_file) |
||
95 | |||
96 | # Create a list of single-letter chain identifiers |
||
97 | import string |
||
98 | letters = list(string.ascii_uppercase) |
||
99 | |||
100 | for model in structure: |
||
101 | for chain in model: |
||
102 | if has_high_rna_content(chain): |
||
103 | # New structure |
||
104 | new_structure = Structure.Structure("new_structure") |
||
105 | new_model = Model.Model(0) # Create a new model |
||
106 | new_structure.add(new_model) # Add the new model to the new structure |
||
107 | |||
108 | chain_id_new = letters.pop(0) |
||
109 | chain_id = chain.get_id() |
||
110 | |||
111 | atom_count = 0 |
||
112 | for residue in chain: |
||
113 | for atom in residue: |
||
114 | atom_count += 1 |
||
115 | |||
116 | remarks = [] |
||
117 | remarks.append(f'REMARK rna chain {chain.id} -> {chain_id_new}') |
||
118 | |||
119 | pdb_file = cif_file.replace('.cif', f'_{chain_id}_n{chain_id_new}_fCIF.pdb') |
||
120 | print(f'rna chain {chain.id} -> {chain_id_new} {pdb_file} # of atoms: {atom_count}') |
||
121 | |||
122 | chain.id = chain_id_new |
||
123 | new_model.add(chain) |
||
124 | |||
125 | io = PDBIO() |
||
126 | io.set_structure(new_structure) |
||
127 | |||
128 | io.save(pdb_file) |
||
129 | # open a file add remarks |
||
130 | new_file = '' |
||
131 | with open(pdb_file, 'r') as f: |
||
132 | if not args.no_hr: |
||
133 | new_file += add_header(version) + '\n' |
||
134 | if remarks: |
||
135 | new_file += '\n'.join(remarks) + '\n' |
||
136 | new_file += f.read() |
||
137 | |||
138 | with open(pdb_file, 'w') as f: |
||
139 | f.write(new_file) |
||
140 |