|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
from __future__ import print_function |
|
3
|
|
|
__docformat__ = 'reStructuredText' |
|
4
|
|
|
import os |
|
5
|
|
|
import Bio.PDB.PDBParser |
|
6
|
|
|
import Bio.PDB.Superimposer |
|
7
|
|
|
from Bio.PDB.PDBIO import Select |
|
8
|
|
|
from Bio.PDB import PDBIO |
|
9
|
|
|
from Bio.SVDSuperimposer import SVDSuperimposer |
|
10
|
|
|
from numpy import sqrt, array, asarray |
|
11
|
|
|
|
|
12
|
|
|
|
|
13
|
|
|
BACKBONE_ATOMS = [ |
|
14
|
|
|
'P', 'OP1', 'OP2', "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'" |
|
15
|
|
|
] |
|
16
|
|
|
# Atoms used to anchor base orientation per residue type |
|
17
|
|
|
PURINE_ATOMS = ['N9', 'C8', 'C7'] # G, A |
|
18
|
|
|
PYRIMIDINE_ATOMS = ['N1', 'C6', 'C2'] # C, U (and T) |
|
19
|
|
|
PURINES = {'A', 'G', 'DA', 'DG'} |
|
20
|
|
|
PYRIMIDINES = {'C', 'U', 'T', 'DT'} |
|
21
|
|
|
|
|
22
|
|
|
ATOM_SELECTION_SUMMARY = "\n".join([ |
|
23
|
|
|
f"BACKBONE_ATOMS = {BACKBONE_ATOMS}", |
|
24
|
|
|
"# Atoms used to anchor base orientation per residue type", |
|
25
|
|
|
f"PURINE_ATOMS = {PURINE_ATOMS} # G, A", |
|
26
|
|
|
f"PYRIMIDINE_ATOMS = {PYRIMIDINE_ATOMS} # C, U (and T)", |
|
27
|
|
|
f"PURINES = {sorted(PURINES)}", |
|
28
|
|
|
f"PYRIMIDINES = {sorted(PYRIMIDINES)}", |
|
29
|
|
|
]) |
|
30
|
|
|
|
|
31
|
|
|
|
|
32
|
|
|
def get_atom_selection_summary(): |
|
33
|
|
|
return ATOM_SELECTION_SUMMARY |
|
34
|
|
|
|
|
35
|
|
|
class RNAmodel: |
|
36
|
|
|
"""RNAmodel |
|
37
|
|
|
|
|
38
|
|
|
:Example: |
|
39
|
|
|
|
|
40
|
|
|
>>> rna = RNAmodel("test_data/rp14/rp14_5ddp_bound_clean_ligand.pdb", [1], False, None) |
|
41
|
|
|
>>> rna.get_report() |
|
42
|
|
|
"File: rp14_5ddp_bound_clean_ligand.pdb # of atoms: 1 \\nresi: 1 atom: <Atom C3'> \\n" |
|
43
|
|
|
|
|
44
|
|
|
:param fpath: file path, string |
|
45
|
|
|
:param residues: list of residues to use (and since we take only 1 atom, C3', this equals to number of atoms. |
|
46
|
|
|
:param save: boolean, save to output_dir or not |
|
47
|
|
|
:param output_dir: string, if save, save segments to this folder |
|
48
|
|
|
""" |
|
49
|
|
|
#:returns: None |
|
50
|
|
|
#:rtype: None |
|
51
|
|
|
#""" |
|
52
|
|
|
def __init__(self, fpath, residues, save=False, output_dir=""): |
|
53
|
|
|
|
|
54
|
|
|
# parser 1-5 -> 1 2 3 4 5 |
|
55
|
|
|
self.struc = Bio.PDB.PDBParser().get_structure('', fpath) |
|
56
|
|
|
self.fpath = fpath |
|
57
|
|
|
self.fn = os.path.basename(fpath) |
|
58
|
|
|
self.residues = residues #self.__parser_residues(residues) |
|
59
|
|
|
self.__get_atoms() |
|
60
|
|
|
#self.atoms = [] |
|
61
|
|
|
if save: |
|
62
|
|
|
self.save(output_dir) # @save |
|
63
|
|
|
|
|
64
|
|
|
def __parser_residues(self, residues): |
|
65
|
|
|
"""Get string and parse it |
|
66
|
|
|
'1 4 5 10-15' -> [1, 4, 5, 10, 11, 12, 13, 14, 15]""" |
|
67
|
|
|
rs = [] |
|
68
|
|
|
for r in residues.split(): |
|
69
|
|
|
l = parse_num_list(r) |
|
|
|
|
|
|
70
|
|
|
for i in l: |
|
71
|
|
|
if i in rs: |
|
72
|
|
|
raise Exception('You have this resi already in your list! See', residues) |
|
73
|
|
|
rs.extend(l) |
|
74
|
|
|
return rs |
|
75
|
|
|
|
|
76
|
|
|
def __get_atoms(self): |
|
77
|
|
|
self.atoms = [] |
|
78
|
|
|
self.atom_ids = [] # (resSeq, atomName) |
|
79
|
|
|
self.atom_lookup = {} |
|
80
|
|
|
for res in self.struc.get_residues(): |
|
81
|
|
|
res_seq = res.id[1] |
|
82
|
|
|
if res_seq not in self.residues: |
|
83
|
|
|
continue |
|
84
|
|
|
# backbone atoms |
|
85
|
|
|
for atom_name in BACKBONE_ATOMS: |
|
86
|
|
|
self._append_atom_if_present(res, res_seq, atom_name) |
|
87
|
|
|
# base atoms |
|
88
|
|
|
for atom_name in self._get_base_atoms(res): |
|
89
|
|
|
self._append_atom_if_present(res, res_seq, atom_name) |
|
90
|
|
|
if len(self.atom_ids) <= 0: |
|
91
|
|
|
raise Exception('problem: no atoms were selected!: %s' % self.fn) |
|
92
|
|
|
return self.atoms |
|
93
|
|
|
|
|
94
|
|
|
def _append_atom_if_present(self, residue, res_seq, atom_name): |
|
95
|
|
|
if residue.has_id(atom_name): |
|
96
|
|
|
atom = residue[atom_name] |
|
97
|
|
|
key = (res_seq, atom_name) |
|
98
|
|
|
self.atom_lookup[key] = atom |
|
99
|
|
|
self.atom_ids.append(key) |
|
100
|
|
|
self.atoms.append(atom) |
|
101
|
|
|
else: |
|
102
|
|
|
pass # silently ignore missing atoms |
|
103
|
|
|
|
|
104
|
|
|
def _get_base_atoms(self, residue): |
|
105
|
|
|
resname = residue.get_resname().strip().upper() |
|
106
|
|
|
if resname in PURINES or (resname and resname[0] in ('A', 'G')): |
|
107
|
|
|
return PURINE_ATOMS |
|
108
|
|
|
if resname in PYRIMIDINES or (resname and resname[0] in ('C', 'U', 'T')): |
|
109
|
|
|
return PYRIMIDINE_ATOMS |
|
110
|
|
|
return [] |
|
111
|
|
|
|
|
112
|
|
|
def __str__(self): |
|
113
|
|
|
return self.fn #+ ' # beads' + str(len(self.residues)) |
|
114
|
|
|
|
|
115
|
|
|
def __repr__(self): |
|
116
|
|
|
return self.fn #+ ' # beads' + str(len(self.residues)) |
|
117
|
|
|
|
|
118
|
|
|
def get_report(self): |
|
119
|
|
|
"""Str a short report about rna model""" |
|
120
|
|
|
t = ' '.join(['File: ', self.fn, ' # of atoms:', str(len(self.atoms)), '\n']) |
|
121
|
|
|
for r, a in zip(self.residues, self.atoms): |
|
122
|
|
|
t += ' '.join(['resi: ', str(r), ' atom: ', str(a), '\n']) |
|
123
|
|
|
return t |
|
124
|
|
|
|
|
125
|
|
|
def get_rmsd_to(self, other_rnamodel, output='', dont_move=False): |
|
126
|
|
|
"""Calc rmsd P-atom based rmsd to other rna model""" |
|
127
|
|
|
sup = Bio.PDB.Superimposer() |
|
128
|
|
|
|
|
129
|
|
|
paired_self, paired_other = self._get_matched_atom_lists(other_rnamodel) |
|
130
|
|
|
if dont_move: |
|
131
|
|
|
coords = array([a.get_vector().get_array() for a in paired_self]) |
|
132
|
|
|
other_coords = array([a.get_vector().get_array() for a in paired_other]) |
|
133
|
|
|
s = SVDSuperimposer() |
|
134
|
|
|
s.set(coords, other_coords) |
|
135
|
|
|
return s.get_init_rms() |
|
136
|
|
|
|
|
137
|
|
|
sup.set_atoms(paired_self, paired_other) |
|
138
|
|
|
|
|
139
|
|
|
rms = round(sup.rms, 3) |
|
140
|
|
|
|
|
141
|
|
|
if output: |
|
142
|
|
|
io = Bio.PDB.PDBIO() |
|
143
|
|
|
sup.apply(self.struc.get_atoms()) |
|
144
|
|
|
io.set_structure( self.struc ) |
|
145
|
|
|
io.save("aligned.pdb") |
|
146
|
|
|
|
|
147
|
|
|
io = Bio.PDB.PDBIO() |
|
148
|
|
|
sup.apply(other_rnamodel.struc.get_atoms()) |
|
149
|
|
|
io.set_structure( other_rnamodel.struc ) |
|
150
|
|
|
io.save("aligned2.pdb") |
|
151
|
|
|
return rms |
|
152
|
|
|
|
|
153
|
|
|
def _get_matched_atom_lists(self, other_rnamodel): |
|
154
|
|
|
other_lookup = other_rnamodel.atom_lookup |
|
155
|
|
|
common_keys = [key for key in self.atom_ids if key in other_lookup] |
|
156
|
|
|
if not common_keys: |
|
157
|
|
|
raise Exception('No common atoms found between %s and %s' % (self.fn, other_rnamodel.fn)) |
|
158
|
|
|
self_atoms = [self.atom_lookup[key] for key in common_keys] |
|
159
|
|
|
other_atoms = [other_lookup[key] for key in common_keys] |
|
160
|
|
|
return self_atoms, other_atoms |
|
161
|
|
|
|
|
162
|
|
|
def save(self, output_dir, verbose=True): |
|
163
|
|
|
"""Save structures and motifs """ |
|
164
|
|
|
folder_to_save = output_dir + os.sep # ugly hack 'rp14/' |
|
165
|
|
|
try: |
|
166
|
|
|
os.makedirs(folder_to_save) |
|
167
|
|
|
except OSError: |
|
168
|
|
|
pass |
|
169
|
|
|
|
|
170
|
|
|
try: |
|
171
|
|
|
os.mkdir(folder_to_save + 'structures') |
|
172
|
|
|
except OSError: |
|
173
|
|
|
pass |
|
174
|
|
|
|
|
175
|
|
|
try: |
|
176
|
|
|
os.mkdir(folder_to_save + 'motifs') |
|
177
|
|
|
except OSError: |
|
178
|
|
|
pass |
|
179
|
|
|
|
|
180
|
|
|
RESI = self.residues |
|
181
|
|
|
if not self.struc: |
|
182
|
|
|
raise Exception('self.struct was not defined! Can not save a pdb!') |
|
183
|
|
|
|
|
184
|
|
|
class BpSelect(Select): |
|
185
|
|
|
def accept_residue(self, residue): |
|
186
|
|
|
if residue.get_id()[1] in RESI: |
|
187
|
|
|
return 1 |
|
188
|
|
|
else: |
|
189
|
|
|
return 0 |
|
190
|
|
|
|
|
191
|
|
|
io = PDBIO() |
|
192
|
|
|
io.set_structure(self.struc) |
|
193
|
|
|
fn = folder_to_save + 'structures' + os.sep + self.fn #+ '.pdb' |
|
194
|
|
|
io.save(fn) |
|
195
|
|
|
if verbose: |
|
196
|
|
|
print(' saved to struc: %s ' % fn) |
|
197
|
|
|
|
|
198
|
|
|
io = PDBIO() |
|
199
|
|
|
io.set_structure(self.struc) |
|
200
|
|
|
fn = folder_to_save + 'motifs/' + os.sep + self.fn #+ self.fn.replace('.pdb', '_motif.pdb')# #+ '.pdb' |
|
201
|
|
|
io.save(fn, BpSelect()) |
|
202
|
|
|
if verbose: |
|
203
|
|
|
print(' saved to motifs: %s ' % fn) |
|
204
|
|
|
|
|
205
|
|
|
#main |
|
206
|
|
|
if __name__ == '__main__': |
|
207
|
|
|
import doctest |
|
208
|
|
|
doctest.testmod() |
|
209
|
|
|
|
|
210
|
|
|
#rna = RNAmodel("test_data/rp14/rp14_5ddp_bound_clean_ligand.pdb", [1], False, Non3e) |
|
211
|
|
|
#print(rna.get_report()) |
|
212
|
|
|
a = RNAmodel("test_data/GGC.pdb", [46,47,48]) |
|
213
|
|
|
b = RNAmodel("test_data/GUC.pdb", [31, 32, 33]) |
|
214
|
|
|
|
|
215
|
|
|
print(a.get_rmsd_to(b)) |
|
216
|
|
|
print(a.get_rmsd_to(b, dont_move=True)) |
|
217
|
|
|
|
|
218
|
|
|
|