1
|
|
|
""" |
2
|
|
|
flatten_obj.py - Flatten multi-state pymol objects into a single state. |
3
|
|
|
|
4
|
|
|
<https://pymolwiki.org/index.php/Flatten_obj> |
5
|
|
|
|
6
|
|
|
This is particularly useful for dealing with biological assemblies, which are |
7
|
|
|
loaded as multi-state objects when fetched using `fetch PDBID, type=pdb1`. It |
8
|
|
|
can also be used as a quick way to combine multiple objects without causing |
9
|
|
|
collisions between chain identifiers. |
10
|
|
|
|
11
|
|
|
The command re-letters chains to avoid collisions. Older versions of PyMOL |
12
|
|
|
restrict the chain id to a single character, so the script will fail for |
13
|
|
|
assemblies with >62 chains. With more recent versions, this problem is solved |
14
|
|
|
with multi-character chain IDs. Several options are available for how |
15
|
|
|
re-lettering should occur. |
16
|
|
|
|
17
|
|
|
Author: Spencer Bliven <[email protected]> |
18
|
|
|
Date: October 30, 2015 |
19
|
|
|
Version: 1.0 |
20
|
|
|
License: Public Domain |
21
|
|
|
""" |
22
|
|
|
from pymol import cmd, stored |
23
|
|
|
import re |
24
|
|
|
try: |
25
|
|
|
from collections import OrderedDict |
26
|
|
|
_orderedDict = True |
27
|
|
|
except ImportError: |
28
|
|
|
_orderedDict = False |
29
|
|
|
|
30
|
|
|
# PyMOL 1.7.4 introduces support for multi-letter chains, so we can afford to |
31
|
|
|
# use a smaller alphabet. In earlier versions, use lower-case letters if needed |
32
|
|
|
# (requires running `set ignore_case, 0`) |
33
|
|
|
_long_chains = cmd.get_version()[1] >= 1.74 |
34
|
|
|
_default_base = 36 if _long_chains else 62 |
35
|
|
|
|
36
|
|
|
class OutOfChainsError(Exception): |
37
|
|
|
def __init__(self,msg): |
38
|
|
|
self.msg=msg |
39
|
|
|
def __str__(self): |
40
|
|
|
return str(self.msg) |
41
|
|
|
|
42
|
|
|
class ChainSet(object): |
43
|
|
|
""" |
44
|
|
|
Base class for various methods to rename chains |
45
|
|
|
|
46
|
|
|
Contains _chains, which maps from the renamed chain to a tuple with the |
47
|
|
|
original (object,state,chain). All dict-like accessors work on ChainSets, |
48
|
|
|
e.g. |
49
|
|
|
chain_set["A"] -> ("obj",1,"A") |
50
|
|
|
|
51
|
|
|
""" |
52
|
|
|
def __init__(self): |
53
|
|
|
# Use an OrderedDict in Python >= 1.7 for better printing |
54
|
|
|
if _orderedDict: |
55
|
|
|
self._chains = OrderedDict() |
56
|
|
|
else: |
57
|
|
|
self._chains = dict() |
58
|
|
|
|
59
|
|
|
def map_chain(self, obj, state, origChain ): |
60
|
|
|
""" |
61
|
|
|
map_chain(string obj,int state, string chain]]) -> string |
62
|
|
|
|
63
|
|
|
Maps a chain letter to a unique chainID. Results are unique within each |
64
|
|
|
instance, and can be used as keys on this chain set. |
65
|
|
|
""" |
66
|
|
|
raise NotImplementedError("Base class") |
67
|
|
|
|
68
|
|
|
# delegate most methods to _chains |
69
|
|
|
def __getattr__(self,at): |
70
|
|
|
if at in "pop popitem update setdefault".split(): |
71
|
|
|
raise AttributeError("type object '%s' has no attribute '%s'"%(type(self),at)) |
72
|
|
|
return getattr(self._chains,at) |
73
|
|
|
def __cmp__(self,other): return self._chains.__cmp__(other) |
74
|
|
|
def __eq__(self,other): return self._chains.__eq__(other) |
75
|
|
|
def __ge__(self,other): return self._chains.__ge__(other) |
76
|
|
|
def __gt__(self,other): return self._chains.__gt__(other) |
77
|
|
|
def __le__(self,other): return self._chains.__le__(other) |
78
|
|
|
def __lt__(self,other): return self._chains.__lt__(other) |
79
|
|
|
def __ne__(self,other): return self._chains.__ne__(other) |
80
|
|
|
def __len__(self): return self._chains.__len__() |
81
|
|
|
def __contains__(self,key): return self._chains.__contains__(key) |
82
|
|
|
def __getitem__(self,key): return self._chains.__getitem__(key) |
83
|
|
|
def __iter__(self): return self._chains.__iter__() |
84
|
|
|
def __str__(self): return str(self._chains) |
85
|
|
|
|
86
|
|
|
@staticmethod |
87
|
|
|
def _int_to_chain(i,base=_default_base): |
88
|
|
|
""" |
89
|
|
|
_int_to_chain(int,int) -> str |
90
|
|
|
|
91
|
|
|
Converts a positive integer to a chain ID. Chain IDs include uppercase |
92
|
|
|
characters, numbers, and optionally lowercase letters. |
93
|
|
|
|
94
|
|
|
i = a positive integer to convert |
95
|
|
|
base = the alphabet size to include. Typically 36 or 62. |
96
|
|
|
""" |
97
|
|
|
if i < 0: |
98
|
|
|
raise ValueError("positive integers only") |
99
|
|
|
if base < 0 or 62 < base: |
100
|
|
|
raise ValueError("Invalid base") |
101
|
|
|
|
102
|
|
|
quot = int(i)//base |
103
|
|
|
rem = i%base |
104
|
|
|
if rem < 26: |
105
|
|
|
letter = chr( ord("A") + rem) |
106
|
|
|
elif rem < 36: |
107
|
|
|
letter = str( rem-26) |
108
|
|
|
else: |
109
|
|
|
letter = chr( ord("a") + rem - 36) |
110
|
|
|
if quot == 0: |
111
|
|
|
return letter |
112
|
|
|
else: |
113
|
|
|
return ChainSet._int_to_chain(quot-1,base) + letter |
114
|
|
|
|
115
|
|
|
|
116
|
|
|
class DefaultChainSet(ChainSet): |
117
|
|
|
""" |
118
|
|
|
Avoids relettering chains if possible. If a chain has been used, uses the |
119
|
|
|
next available chain letter. Note that this can potentially lead to |
120
|
|
|
cascading renames, e.g. if chains are sorted alphabetically rather than by |
121
|
|
|
object. |
122
|
|
|
|
123
|
|
|
Used for rename = 0. |
124
|
|
|
""" |
125
|
|
|
def __init__(self): |
126
|
|
|
super(DefaultChainSet,self).__init__() |
127
|
|
|
self._next_chain = 0 |
128
|
|
|
def map_chain(self, obj, state, origChain ): |
129
|
|
|
# Keep _next_chain up-to-date |
130
|
|
|
while ChainSet._int_to_chain(self._next_chain) in self: |
131
|
|
|
self._next_chain += 1 |
132
|
|
|
# Map this chain |
133
|
|
|
if origChain in self: |
134
|
|
|
# Rename |
135
|
|
|
next_chain = ChainSet._int_to_chain(self._next_chain) |
136
|
|
|
self._next_chain += 1 |
137
|
|
|
else: |
138
|
|
|
next_chain = origChain |
139
|
|
|
self._chains[next_chain] = (obj,state,origChain) |
140
|
|
|
return next_chain |
141
|
|
|
|
142
|
|
|
class SequentialChainSet(ChainSet): |
143
|
|
|
""" |
144
|
|
|
Renumbers all chains starting at A, continuing through the capital letters |
145
|
|
|
and numbers, and then adding additional letters through 9999 (the last |
146
|
|
|
valid chain for mmCIF) and beyond. |
147
|
|
|
|
148
|
|
|
Used for rename=1 |
149
|
|
|
""" |
150
|
|
|
def __init__(self): |
151
|
|
|
super(SequentialChainSet,self).__init__() |
152
|
|
|
self._next_chain = 0 |
153
|
|
|
|
154
|
|
|
def map_chain(self, obj, state, origChain ): |
155
|
|
|
next_chain = ChainSet._int_to_chain(self._next_chain) |
156
|
|
|
self._chains[next_chain] = (obj,state,origChain) |
157
|
|
|
self._next_chain += 1 |
158
|
|
|
return next_chain |
159
|
|
|
|
160
|
|
|
class LongChainSet(ChainSet): |
161
|
|
|
""" |
162
|
|
|
Uses long strings for the chain names. Chains are renamed like |
163
|
|
|
"%s_%s_%04d"%(original_chainid,objectname,state). |
164
|
|
|
|
165
|
|
|
Used for rename=2 |
166
|
|
|
""" |
167
|
|
|
def map_chain(self, obj, state, origChain ): |
168
|
|
|
ch = "%s_%s_%04d"%(origChain,obj,state) |
169
|
|
|
if ch in self: |
170
|
|
|
raise ValueError("Duplicate chain %s"%(ch)) |
171
|
|
|
self._chains[ch] = (obj,state,origChain) |
172
|
|
|
return ch |
173
|
|
|
|
174
|
|
|
|
175
|
|
|
|
176
|
|
|
|
177
|
|
|
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""): |
178
|
|
|
""" |
179
|
|
|
DESCRIPTION |
180
|
|
|
|
181
|
|
|
"flatten_obj" combines multiple objects or states into a single object, |
182
|
|
|
renaming chains where required |
183
|
|
|
|
184
|
|
|
USAGE |
185
|
|
|
|
186
|
|
|
flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]] |
187
|
|
|
|
188
|
|
|
ARGUMENTS |
189
|
|
|
|
190
|
|
|
name = a unique name for the flattened object {default: flat} |
191
|
|
|
|
192
|
|
|
selection = the set of objects to include in the flattening. The selection |
193
|
|
|
will be expanded to include all atoms of objects. {default: all} |
194
|
|
|
|
195
|
|
|
state = the source state to select. Use 0 or -1 to flatten all states {default: 0} |
196
|
|
|
|
197
|
|
|
rename = The scheme to use for renaming chains: {default: 0} |
198
|
|
|
(0) preserve chains IDs where possible, rename other chains |
199
|
|
|
alphabetically |
200
|
|
|
(1) rename all chains alphabetically |
201
|
|
|
(2) rename chains using the original chain letter, object name, and state |
202
|
|
|
|
203
|
|
|
quiet = If set to 0, print some additional information about progress and |
204
|
|
|
chain renaming {default: 1} |
205
|
|
|
|
206
|
|
|
chain_map = An attribute name for the 'stored' scratch object. If |
207
|
|
|
specified, `stored.<chain_map>` will be populated with a dictionary |
208
|
|
|
mapping the new chain names to a tuple giving the originated object, |
209
|
|
|
state, and chainID. {default: ""} |
210
|
|
|
|
211
|
|
|
NOTES |
212
|
|
|
|
213
|
|
|
Like the select command, if name is omitted then the default object name |
214
|
|
|
("flat") is used as the name argument. |
215
|
|
|
|
216
|
|
|
Chain renaming is tricky. PDB files originally limited chains to single |
217
|
|
|
letter identifiers containing [A-Za-z0-9]. When this was found to be |
218
|
|
|
limiting, multi-letter chains (ideally < 4 chars) were allowed. This is |
219
|
|
|
supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and |
220
|
|
|
will raise an exception when flattening a structure with more than 62 |
221
|
|
|
chains. |
222
|
|
|
|
223
|
|
|
EXAMPLES |
224
|
|
|
|
225
|
|
|
flatten_obj flat, nmrObj |
226
|
|
|
flatten_obj ( obj1 or obj2 ) |
227
|
|
|
|
228
|
|
|
SEE ALSO |
229
|
|
|
|
230
|
|
|
split_states |
231
|
|
|
|
232
|
|
|
""" |
233
|
|
|
|
234
|
|
|
# arguments |
235
|
|
|
|
236
|
|
|
# Single argument; treat as selection |
237
|
|
|
if name and not selection: |
238
|
|
|
selection = name |
239
|
|
|
name = "" |
240
|
|
|
# default name and selection |
241
|
|
|
if not name: |
242
|
|
|
name = "flat" |
243
|
|
|
if not selection: |
244
|
|
|
selection = "(all)" |
245
|
|
|
|
246
|
|
|
state = int(state) |
247
|
|
|
rename = int(rename) |
248
|
|
|
quiet = int(quiet) |
249
|
|
|
|
250
|
|
|
# Wrap in extra parantheses for get_object_list |
251
|
|
|
selection = "( %s )" % selection |
252
|
|
|
|
253
|
|
|
if rename == 0: |
254
|
|
|
chainSet = DefaultChainSet() |
255
|
|
|
elif rename == 1: |
256
|
|
|
chainSet = SequentialChainSet() |
257
|
|
|
elif rename == 2: |
258
|
|
|
chainSet = LongChainSet() |
259
|
|
|
else: |
260
|
|
|
raise ValueError("Unrecognized rename option (Valid: 0,1,2)") |
261
|
|
|
|
262
|
|
|
metaprefix = "temp" #TODO unique prefix |
263
|
|
|
|
264
|
|
|
# store original value of retain_order, which causes weird interleaving of |
265
|
|
|
# structures if enabled. |
266
|
|
|
retain_order = cmd.get("retain_order") |
267
|
|
|
try: |
268
|
|
|
cmd.set("retain_order",0) |
269
|
|
|
|
270
|
|
|
# create new object for each state |
271
|
|
|
for obj in cmd.get_object_list(selection): |
272
|
|
|
|
273
|
|
|
if state <= 0: |
274
|
|
|
# all states |
275
|
|
|
prefix = "%s_%s_"%(metaprefix,obj) |
276
|
|
|
cmd.split_states(obj,prefix=prefix) |
277
|
|
|
else: |
278
|
|
|
prefix = "%s_%s_%04d"%(metaprefix,obj,state) |
279
|
|
|
cmd.create(prefix, obj, state, 1) |
280
|
|
|
|
281
|
|
|
# renumber all states |
282
|
|
|
statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names |
283
|
|
|
|
284
|
|
|
warn_lowercase = False |
285
|
|
|
|
286
|
|
|
# Iterate over all objects with metaprefix |
287
|
|
|
try: |
288
|
|
|
for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ): |
289
|
|
|
m = statere.match(obj) |
290
|
|
|
if m is None: |
291
|
|
|
print(("Failed to match object %s" %obj)) |
292
|
|
|
continue |
293
|
|
|
origobj = m.group(1) |
294
|
|
|
statenum = int(m.group(2)) |
295
|
|
|
|
296
|
|
|
chains = cmd.get_chains(obj) |
297
|
|
|
|
298
|
|
|
rev_chain_map = {} #old -> new, for this obj only |
299
|
|
|
for chain in sorted(chains,key=lambda x:(len(x),x)): |
300
|
|
|
new_chain = chainSet.map_chain(origobj,statenum,chain) |
301
|
|
|
rev_chain_map[chain] = new_chain |
302
|
|
|
if not quiet: |
303
|
|
|
print((" %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) )) |
304
|
|
|
if not _long_chains: |
305
|
|
|
if len(new_chain) > 1: |
306
|
|
|
raise OutOfChainsError("No additional chains available (max 62).") |
307
|
|
|
|
308
|
|
|
space = {'rev_chain_map':rev_chain_map} |
309
|
|
|
cmd.alter(obj,"chain = rev_chain_map[chain]",space=space) |
310
|
|
|
|
311
|
|
|
print(("Creating object from %s_*"%metaprefix)) |
312
|
|
|
# Recombine into a single object |
313
|
|
|
cmd.create(name,"%s_*"%metaprefix) |
314
|
|
|
|
315
|
|
|
# Set chain_map |
316
|
|
|
if chain_map: |
317
|
|
|
setattr(stored,chain_map,chainSet) |
318
|
|
|
|
319
|
|
|
# Warn if lowercase chains were generated |
320
|
|
|
if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]): |
321
|
|
|
print("Warning: using lower-case chain IDs. Consider running the " |
322
|
|
|
"following command:\n set ignore_case, 0" ) |
323
|
|
|
|
324
|
|
|
finally: |
325
|
|
|
# Clean up |
326
|
|
|
print("Cleaning up intermediates") |
327
|
|
|
cmd.delete("%s_*"%metaprefix) |
328
|
|
|
finally: |
329
|
|
|
# restore original parameters |
330
|
|
|
print("Resetting variables") |
331
|
|
|
cmd.set("retain_order",retain_order) |
332
|
|
|
|
333
|
|
|
|
334
|
|
|
cmd.extend('flatten_obj', flatten_obj) |
335
|
|
|
|
336
|
|
|
# tab-completion of arguments |
337
|
|
|
cmd.auto_arg[0]['flatten_obj'] = [ cmd.object_sc, 'name or selection', ''] |
338
|
|
|
cmd.auto_arg[1]['flatten_obj'] = [ cmd.object_sc, 'selection', ''] |
339
|
|
|
|