|
1
|
|
|
#!/usr/bin/env python |
|
2
|
|
|
|
|
3
|
|
|
""" |
|
4
|
|
|
Calculate RMSD between two XYZ files |
|
5
|
|
|
|
|
6
|
|
|
by: Jimmy Charnley Kromann <[email protected]> and Lars Andersen Bratholm <[email protected]> |
|
7
|
|
|
project: https://github.com/charnley/rmsd |
|
8
|
|
|
license: https://github.com/charnley/rmsd/blob/master/LICENSE |
|
9
|
|
|
|
|
10
|
|
|
""" |
|
11
|
|
|
import numpy as np |
|
12
|
|
|
import re |
|
13
|
|
|
from rna_tools.tools.extra_functions.select_fragment import is_in_selection |
|
14
|
|
|
|
|
15
|
|
|
def kabsch_rmsd(P, Q): |
|
16
|
|
|
""" |
|
17
|
|
|
Rotate matrix P unto Q and calculate the RMSD |
|
18
|
|
|
""" |
|
19
|
|
|
P = rotate(P, Q) |
|
20
|
|
|
return rmsd(P, Q) |
|
21
|
|
|
|
|
22
|
|
|
|
|
23
|
|
|
def rotate(P, Q): |
|
24
|
|
|
""" |
|
25
|
|
|
Rotate matrix P unto matrix Q using Kabsch algorithm |
|
26
|
|
|
""" |
|
27
|
|
|
U = kabsch(P, Q) |
|
28
|
|
|
|
|
29
|
|
|
# Rotate P |
|
30
|
|
|
P = np.dot(P, U) |
|
31
|
|
|
return P |
|
32
|
|
|
|
|
33
|
|
|
|
|
34
|
|
|
def kabsch(P, Q): |
|
35
|
|
|
""" |
|
36
|
|
|
The optimal rotation matrix U is calculated and then used to rotate matrix |
|
37
|
|
|
P unto matrix Q so the minimum root-mean-square deviation (RMSD) can be |
|
38
|
|
|
calculated. |
|
39
|
|
|
|
|
40
|
|
|
Using the Kabsch algorithm with two sets of paired point P and Q, |
|
41
|
|
|
centered around the center-of-mass. |
|
42
|
|
|
Each vector set is represented as an NxD matrix, where D is the |
|
43
|
|
|
the dimension of the space. |
|
44
|
|
|
|
|
45
|
|
|
The algorithm works in three steps: |
|
46
|
|
|
- a translation of P and Q |
|
47
|
|
|
- the computation of a covariance matrix C |
|
48
|
|
|
- computation of the optimal rotation matrix U |
|
49
|
|
|
|
|
50
|
|
|
http://en.wikipedia.org/wiki/Kabsch_algorithm |
|
51
|
|
|
|
|
52
|
|
|
Parameters: |
|
53
|
|
|
P -- (N, number of points)x(D, dimension) matrix |
|
54
|
|
|
Q -- (N, number of points)x(D, dimension) matrix |
|
55
|
|
|
|
|
56
|
|
|
Returns: |
|
57
|
|
|
U -- Rotation matrix |
|
58
|
|
|
|
|
59
|
|
|
""" |
|
60
|
|
|
|
|
61
|
|
|
# Computation of the covariance matrix |
|
62
|
|
|
C = np.dot(np.transpose(P), Q) |
|
63
|
|
|
|
|
64
|
|
|
# Computation of the optimal rotation matrix |
|
65
|
|
|
# This can be done using singular value decomposition (SVD) |
|
66
|
|
|
# Getting the sign of the det(V)*(W) to decide |
|
67
|
|
|
# whether we need to correct our rotation matrix to ensure a |
|
68
|
|
|
# right-handed coordinate system. |
|
69
|
|
|
# And finally calculating the optimal rotation matrix U |
|
70
|
|
|
# see http://en.wikipedia.org/wiki/Kabsch_algorithm |
|
71
|
|
|
V, S, W = np.linalg.svd(C) |
|
72
|
|
|
d = (np.linalg.det(V) * np.linalg.det(W)) < 0.0 |
|
73
|
|
|
|
|
74
|
|
|
if d: |
|
75
|
|
|
S[-1] = -S[-1] |
|
76
|
|
|
V[:, -1] = -V[:, -1] |
|
77
|
|
|
|
|
78
|
|
|
# Create Rotation matrix U |
|
79
|
|
|
U = np.dot(V, W) |
|
80
|
|
|
|
|
81
|
|
|
return U |
|
82
|
|
|
|
|
83
|
|
|
|
|
84
|
|
|
def centroid(X): |
|
85
|
|
|
""" |
|
86
|
|
|
Calculate the centroid from a vectorset X |
|
87
|
|
|
""" |
|
88
|
|
|
C = sum(X)/len(X) |
|
89
|
|
|
return C |
|
90
|
|
|
|
|
91
|
|
|
|
|
92
|
|
|
def rmsd(V, W): |
|
93
|
|
|
""" |
|
94
|
|
|
Calculate Root-mean-square deviation from two sets of vectors V and W. |
|
95
|
|
|
""" |
|
96
|
|
|
D = len(V[0]) |
|
97
|
|
|
N = len(V) |
|
98
|
|
|
rmsd = 0.0 |
|
99
|
|
|
for v, w in zip(V, W): |
|
100
|
|
|
rmsd += sum([(v[i]-w[i])**2.0 for i in range(D)]) |
|
101
|
|
|
return np.sqrt(rmsd/N) |
|
102
|
|
|
|
|
103
|
|
|
|
|
104
|
|
|
def get_coordinates(filename, selection, ignore_selection, fmt, ignore_hydrogens, way='all'): |
|
105
|
|
|
"""Get coordinates from filename.""" |
|
106
|
|
|
return get_coordinates_pdb(filename, selection, ignore_selection, ignore_hydrogens, way) |
|
107
|
|
|
|
|
108
|
|
|
|
|
109
|
|
|
def get_coordinates_pdb(filename, selection, ignore_selection, ignore_hydrogens, way='all'): |
|
110
|
|
|
""" |
|
111
|
|
|
Get coordinates from the first chain in a pdb file |
|
112
|
|
|
and return a vectorset with all the coordinates. |
|
113
|
|
|
|
|
114
|
|
|
""" |
|
115
|
|
|
# PDB files tend to be a bit of a mess. The x, y and z coordinates |
|
116
|
|
|
# are supposed to be in column 31-38, 39-46 and 47-54, but this is not always the case. |
|
117
|
|
|
# Because of this the three first columns containing a decimal is used. |
|
118
|
|
|
# Since the format doesn't require a space between columns, we use the above |
|
119
|
|
|
# column indices as a fallback. |
|
120
|
|
|
x_column = None |
|
121
|
|
|
V = [] |
|
122
|
|
|
# Same with atoms and atom naming. The most robust way to do this is probably |
|
123
|
|
|
# to assume that the atomtype is given in column 3. |
|
124
|
|
|
atoms = [] |
|
125
|
|
|
resi_set = set() |
|
126
|
|
|
if way == "c1p": |
|
127
|
|
|
way_atoms = ["C1'"] |
|
128
|
|
|
elif way == 'pooo': |
|
129
|
|
|
way_atoms = "P OP1 OP2 O5'".split() |
|
130
|
|
|
elif way == 'alpha': |
|
131
|
|
|
way_atoms = "P OP1 OP2 O5' C5'".split() |
|
132
|
|
|
elif way == 'backbone': |
|
133
|
|
|
way_atoms = "P OP1 OP2 O5' C5' C4' C3' O3'".split() |
|
134
|
|
|
elif way == 'po': |
|
135
|
|
|
way_atoms = "P OP1 OP2".split() |
|
136
|
|
|
elif way == 'no-backbone': |
|
137
|
|
|
way_atoms = "C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() |
|
138
|
|
|
way_atoms += "N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split() |
|
139
|
|
|
way_atoms += "N1 C2 O2 N3 C4 O4 C5 C6".split() |
|
140
|
|
|
way_atoms += "N1 C2 O2 N3 C4 N4 C5 C6".split() |
|
141
|
|
|
elif way == 'bases': |
|
142
|
|
|
way_atoms = "N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() |
|
143
|
|
|
way_atoms += "N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split() |
|
144
|
|
|
way_atoms += "N1 C2 O2 N3 C4 O4 C5 C6".split() |
|
145
|
|
|
way_atoms += "N1 C2 O2 N3 C4 N4 C5 C6".split() |
|
146
|
|
|
elif way == 'backbone+sugar': |
|
147
|
|
|
way_atoms = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1'".split() |
|
148
|
|
|
elif way == 'sugar': |
|
149
|
|
|
way_atoms = "C4' O4' C3' C2' O2' C1'".split() |
|
150
|
|
|
elif way == 'all': |
|
151
|
|
|
way_atoms = "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 O6 N1 C2 N2 N3 C4".split() |
|
152
|
|
|
way_atoms += "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N9 C8 N7 C5 C6 N6 N1 C2 N3 C4".split() |
|
153
|
|
|
way_atoms += "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 O4 C5 C6".split() |
|
154
|
|
|
way_atoms += "P OP1 OP2 O5' C5' C4' O4' C3' O3' C2' O2' C1' N1 C2 O2 N3 C4 N4 C5 C6".split() |
|
155
|
|
|
|
|
156
|
|
|
with open(filename) as f: |
|
157
|
|
|
lines = f.readlines() |
|
158
|
|
|
for line in lines: |
|
159
|
|
|
# hmm... |
|
160
|
|
|
# of models: 490 |
|
161
|
|
|
# Error: # of atoms is not equal target (1f27_rpr.pdb):641 vs model (struc/1f27_rnakbnm_decoy0001_amb_clx_rpr.pdb):408 |
|
162
|
|
|
#if line.startswith("TER") or line.startswith("END"): |
|
163
|
|
|
# break |
|
164
|
|
|
if line.startswith("ATOM"): |
|
165
|
|
|
curr_chain_id = line[21] |
|
166
|
|
|
curr_resi = int(line[22:26]) |
|
167
|
|
|
curr_atom_name = line[12:16].strip() |
|
168
|
|
|
if selection: |
|
169
|
|
|
if curr_chain_id in selection: |
|
170
|
|
|
if curr_resi in selection[curr_chain_id]: |
|
171
|
|
|
# ignore if to be ingored (!) |
|
172
|
|
|
#try: |
|
173
|
|
|
resi_set.add(curr_chain_id + ':' + str(curr_resi)) |
|
174
|
|
|
x = line[30:38] |
|
175
|
|
|
y = line[38:46] |
|
176
|
|
|
z = line[46:54] |
|
177
|
|
|
if ignore_selection: |
|
178
|
|
|
if not is_in_selection(ignore_selection, curr_chain_id, curr_resi, curr_atom_name): |
|
179
|
|
|
if curr_atom_name in way_atoms: |
|
|
|
|
|
|
180
|
|
|
V.append(np.asarray([x,y,z],dtype=float)) |
|
181
|
|
|
else: |
|
182
|
|
|
if curr_atom_name in way_atoms: |
|
183
|
|
|
V.append(np.asarray([x,y,z],dtype=float)) |
|
184
|
|
|
|
|
185
|
|
|
else: |
|
186
|
|
|
x = line[30:38] |
|
187
|
|
|
y = line[38:46] |
|
188
|
|
|
z = line[46:54] |
|
189
|
|
|
if curr_atom_name in way_atoms: |
|
190
|
|
|
V.append(np.asarray([x,y,z],dtype=float)) |
|
191
|
|
|
|
|
192
|
|
|
V = np.asarray(V) |
|
193
|
|
|
#print filename, resi_set, len(resi_set) |
|
194
|
|
|
return len(V), V |
|
195
|
|
|
|