|
1
|
|
|
""" |
|
2
|
|
|
flatten_obj.py - Flatten multi-state pymol objects into a single state. |
|
3
|
|
|
|
|
4
|
|
|
<https://pymolwiki.org/index.php/Flatten_obj> |
|
5
|
|
|
|
|
6
|
|
|
This is particularly useful for dealing with biological assemblies, which are |
|
7
|
|
|
loaded as multi-state objects when fetched using `fetch PDBID, type=pdb1`. It |
|
8
|
|
|
can also be used as a quick way to combine multiple objects without causing |
|
9
|
|
|
collisions between chain identifiers. |
|
10
|
|
|
|
|
11
|
|
|
The command re-letters chains to avoid collisions. Older versions of PyMOL |
|
12
|
|
|
restrict the chain id to a single character, so the script will fail for |
|
13
|
|
|
assemblies with >62 chains. With more recent versions, this problem is solved |
|
14
|
|
|
with multi-character chain IDs. Several options are available for how |
|
15
|
|
|
re-lettering should occur. |
|
16
|
|
|
|
|
17
|
|
|
Author: Spencer Bliven <[email protected]> |
|
18
|
|
|
Date: October 30, 2015 |
|
19
|
|
|
Version: 1.0 |
|
20
|
|
|
License: Public Domain |
|
21
|
|
|
""" |
|
22
|
|
|
from pymol import cmd, stored |
|
23
|
|
|
import re |
|
24
|
|
|
try: |
|
25
|
|
|
from collections import OrderedDict |
|
26
|
|
|
_orderedDict = True |
|
27
|
|
|
except ImportError: |
|
28
|
|
|
_orderedDict = False |
|
29
|
|
|
|
|
30
|
|
|
# PyMOL 1.7.4 introduces support for multi-letter chains, so we can afford to |
|
31
|
|
|
# use a smaller alphabet. In earlier versions, use lower-case letters if needed |
|
32
|
|
|
# (requires running `set ignore_case, 0`) |
|
33
|
|
|
_long_chains = cmd.get_version()[1] >= 1.74 |
|
34
|
|
|
_default_base = 36 if _long_chains else 62 |
|
35
|
|
|
|
|
36
|
|
|
class OutOfChainsError(Exception): |
|
37
|
|
|
def __init__(self,msg): |
|
38
|
|
|
self.msg=msg |
|
39
|
|
|
def __str__(self): |
|
40
|
|
|
return str(self.msg) |
|
41
|
|
|
|
|
42
|
|
|
class ChainSet(object): |
|
43
|
|
|
""" |
|
44
|
|
|
Base class for various methods to rename chains |
|
45
|
|
|
|
|
46
|
|
|
Contains _chains, which maps from the renamed chain to a tuple with the |
|
47
|
|
|
original (object,state,chain). All dict-like accessors work on ChainSets, |
|
48
|
|
|
e.g. |
|
49
|
|
|
chain_set["A"] -> ("obj",1,"A") |
|
50
|
|
|
|
|
51
|
|
|
""" |
|
52
|
|
|
def __init__(self): |
|
53
|
|
|
# Use an OrderedDict in Python >= 1.7 for better printing |
|
54
|
|
|
if _orderedDict: |
|
55
|
|
|
self._chains = OrderedDict() |
|
56
|
|
|
else: |
|
57
|
|
|
self._chains = dict() |
|
58
|
|
|
|
|
59
|
|
|
def map_chain(self, obj, state, origChain ): |
|
60
|
|
|
""" |
|
61
|
|
|
map_chain(string obj,int state, string chain]]) -> string |
|
62
|
|
|
|
|
63
|
|
|
Maps a chain letter to a unique chainID. Results are unique within each |
|
64
|
|
|
instance, and can be used as keys on this chain set. |
|
65
|
|
|
""" |
|
66
|
|
|
raise NotImplementedError("Base class") |
|
67
|
|
|
|
|
68
|
|
|
# delegate most methods to _chains |
|
69
|
|
|
def __getattr__(self,at): |
|
70
|
|
|
if at in "pop popitem update setdefault".split(): |
|
71
|
|
|
raise AttributeError("type object '%s' has no attribute '%s'"%(type(self),at)) |
|
72
|
|
|
return getattr(self._chains,at) |
|
73
|
|
|
def __cmp__(self,other): return self._chains.__cmp__(other) |
|
74
|
|
|
def __eq__(self,other): return self._chains.__eq__(other) |
|
75
|
|
|
def __ge__(self,other): return self._chains.__ge__(other) |
|
76
|
|
|
def __gt__(self,other): return self._chains.__gt__(other) |
|
77
|
|
|
def __le__(self,other): return self._chains.__le__(other) |
|
78
|
|
|
def __lt__(self,other): return self._chains.__lt__(other) |
|
79
|
|
|
def __ne__(self,other): return self._chains.__ne__(other) |
|
80
|
|
|
def __len__(self): return self._chains.__len__() |
|
81
|
|
|
def __contains__(self,key): return self._chains.__contains__(key) |
|
82
|
|
|
def __getitem__(self,key): return self._chains.__getitem__(key) |
|
83
|
|
|
def __iter__(self): return self._chains.__iter__() |
|
84
|
|
|
def __str__(self): return str(self._chains) |
|
85
|
|
|
|
|
86
|
|
|
@staticmethod |
|
87
|
|
|
def _int_to_chain(i,base=_default_base): |
|
88
|
|
|
""" |
|
89
|
|
|
_int_to_chain(int,int) -> str |
|
90
|
|
|
|
|
91
|
|
|
Converts a positive integer to a chain ID. Chain IDs include uppercase |
|
92
|
|
|
characters, numbers, and optionally lowercase letters. |
|
93
|
|
|
|
|
94
|
|
|
i = a positive integer to convert |
|
95
|
|
|
base = the alphabet size to include. Typically 36 or 62. |
|
96
|
|
|
""" |
|
97
|
|
|
if i < 0: |
|
98
|
|
|
raise ValueError("positive integers only") |
|
99
|
|
|
if base < 0 or 62 < base: |
|
100
|
|
|
raise ValueError("Invalid base") |
|
101
|
|
|
|
|
102
|
|
|
quot = int(i)//base |
|
103
|
|
|
rem = i%base |
|
104
|
|
|
if rem < 26: |
|
105
|
|
|
letter = chr( ord("A") + rem) |
|
106
|
|
|
elif rem < 36: |
|
107
|
|
|
letter = str( rem-26) |
|
108
|
|
|
else: |
|
109
|
|
|
letter = chr( ord("a") + rem - 36) |
|
110
|
|
|
if quot == 0: |
|
111
|
|
|
return letter |
|
112
|
|
|
else: |
|
113
|
|
|
return ChainSet._int_to_chain(quot-1,base) + letter |
|
114
|
|
|
|
|
115
|
|
|
|
|
116
|
|
|
class DefaultChainSet(ChainSet): |
|
117
|
|
|
""" |
|
118
|
|
|
Avoids relettering chains if possible. If a chain has been used, uses the |
|
119
|
|
|
next available chain letter. Note that this can potentially lead to |
|
120
|
|
|
cascading renames, e.g. if chains are sorted alphabetically rather than by |
|
121
|
|
|
object. |
|
122
|
|
|
|
|
123
|
|
|
Used for rename = 0. |
|
124
|
|
|
""" |
|
125
|
|
|
def __init__(self): |
|
126
|
|
|
super(DefaultChainSet,self).__init__() |
|
127
|
|
|
self._next_chain = 0 |
|
128
|
|
|
def map_chain(self, obj, state, origChain ): |
|
129
|
|
|
# Keep _next_chain up-to-date |
|
130
|
|
|
while ChainSet._int_to_chain(self._next_chain) in self: |
|
131
|
|
|
self._next_chain += 1 |
|
132
|
|
|
# Map this chain |
|
133
|
|
|
if origChain in self: |
|
134
|
|
|
# Rename |
|
135
|
|
|
next_chain = ChainSet._int_to_chain(self._next_chain) |
|
136
|
|
|
self._next_chain += 1 |
|
137
|
|
|
else: |
|
138
|
|
|
next_chain = origChain |
|
139
|
|
|
self._chains[next_chain] = (obj,state,origChain) |
|
140
|
|
|
return next_chain |
|
141
|
|
|
|
|
142
|
|
|
class SequentialChainSet(ChainSet): |
|
143
|
|
|
""" |
|
144
|
|
|
Renumbers all chains starting at A, continuing through the capital letters |
|
145
|
|
|
and numbers, and then adding additional letters through 9999 (the last |
|
146
|
|
|
valid chain for mmCIF) and beyond. |
|
147
|
|
|
|
|
148
|
|
|
Used for rename=1 |
|
149
|
|
|
""" |
|
150
|
|
|
def __init__(self): |
|
151
|
|
|
super(SequentialChainSet,self).__init__() |
|
152
|
|
|
self._next_chain = 0 |
|
153
|
|
|
|
|
154
|
|
|
def map_chain(self, obj, state, origChain ): |
|
155
|
|
|
next_chain = ChainSet._int_to_chain(self._next_chain) |
|
156
|
|
|
self._chains[next_chain] = (obj,state,origChain) |
|
157
|
|
|
self._next_chain += 1 |
|
158
|
|
|
return next_chain |
|
159
|
|
|
|
|
160
|
|
|
class LongChainSet(ChainSet): |
|
161
|
|
|
""" |
|
162
|
|
|
Uses long strings for the chain names. Chains are renamed like |
|
163
|
|
|
"%s_%s_%04d"%(original_chainid,objectname,state). |
|
164
|
|
|
|
|
165
|
|
|
Used for rename=2 |
|
166
|
|
|
""" |
|
167
|
|
|
def map_chain(self, obj, state, origChain ): |
|
168
|
|
|
ch = "%s_%s_%04d"%(origChain,obj,state) |
|
169
|
|
|
if ch in self: |
|
170
|
|
|
raise ValueError("Duplicate chain %s"%(ch)) |
|
171
|
|
|
self._chains[ch] = (obj,state,origChain) |
|
172
|
|
|
return ch |
|
173
|
|
|
|
|
174
|
|
|
|
|
175
|
|
|
|
|
176
|
|
|
|
|
177
|
|
|
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""): |
|
178
|
|
|
""" |
|
179
|
|
|
DESCRIPTION |
|
180
|
|
|
|
|
181
|
|
|
"flatten_obj" combines multiple objects or states into a single object, |
|
182
|
|
|
renaming chains where required |
|
183
|
|
|
|
|
184
|
|
|
USAGE |
|
185
|
|
|
|
|
186
|
|
|
flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]] |
|
187
|
|
|
|
|
188
|
|
|
ARGUMENTS |
|
189
|
|
|
|
|
190
|
|
|
name = a unique name for the flattened object {default: flat} |
|
191
|
|
|
|
|
192
|
|
|
selection = the set of objects to include in the flattening. The selection |
|
193
|
|
|
will be expanded to include all atoms of objects. {default: all} |
|
194
|
|
|
|
|
195
|
|
|
state = the source state to select. Use 0 or -1 to flatten all states {default: 0} |
|
196
|
|
|
|
|
197
|
|
|
rename = The scheme to use for renaming chains: {default: 0} |
|
198
|
|
|
(0) preserve chains IDs where possible, rename other chains |
|
199
|
|
|
alphabetically |
|
200
|
|
|
(1) rename all chains alphabetically |
|
201
|
|
|
(2) rename chains using the original chain letter, object name, and state |
|
202
|
|
|
|
|
203
|
|
|
quiet = If set to 0, print some additional information about progress and |
|
204
|
|
|
chain renaming {default: 1} |
|
205
|
|
|
|
|
206
|
|
|
chain_map = An attribute name for the 'stored' scratch object. If |
|
207
|
|
|
specified, `stored.<chain_map>` will be populated with a dictionary |
|
208
|
|
|
mapping the new chain names to a tuple giving the originated object, |
|
209
|
|
|
state, and chainID. {default: ""} |
|
210
|
|
|
|
|
211
|
|
|
NOTES |
|
212
|
|
|
|
|
213
|
|
|
Like the select command, if name is omitted then the default object name |
|
214
|
|
|
("flat") is used as the name argument. |
|
215
|
|
|
|
|
216
|
|
|
Chain renaming is tricky. PDB files originally limited chains to single |
|
217
|
|
|
letter identifiers containing [A-Za-z0-9]. When this was found to be |
|
218
|
|
|
limiting, multi-letter chains (ideally < 4 chars) were allowed. This is |
|
219
|
|
|
supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and |
|
220
|
|
|
will raise an exception when flattening a structure with more than 62 |
|
221
|
|
|
chains. |
|
222
|
|
|
|
|
223
|
|
|
EXAMPLES |
|
224
|
|
|
|
|
225
|
|
|
flatten_obj flat, nmrObj |
|
226
|
|
|
flatten_obj ( obj1 or obj2 ) |
|
227
|
|
|
|
|
228
|
|
|
SEE ALSO |
|
229
|
|
|
|
|
230
|
|
|
split_states |
|
231
|
|
|
|
|
232
|
|
|
""" |
|
233
|
|
|
|
|
234
|
|
|
# arguments |
|
235
|
|
|
|
|
236
|
|
|
# Single argument; treat as selection |
|
237
|
|
|
if name and not selection: |
|
238
|
|
|
selection = name |
|
239
|
|
|
name = "" |
|
240
|
|
|
# default name and selection |
|
241
|
|
|
if not name: |
|
242
|
|
|
name = "flat" |
|
243
|
|
|
if not selection: |
|
244
|
|
|
selection = "(all)" |
|
245
|
|
|
|
|
246
|
|
|
state = int(state) |
|
247
|
|
|
rename = int(rename) |
|
248
|
|
|
quiet = int(quiet) |
|
249
|
|
|
|
|
250
|
|
|
# Wrap in extra parantheses for get_object_list |
|
251
|
|
|
selection = "( %s )" % selection |
|
252
|
|
|
|
|
253
|
|
|
if rename == 0: |
|
254
|
|
|
chainSet = DefaultChainSet() |
|
255
|
|
|
elif rename == 1: |
|
256
|
|
|
chainSet = SequentialChainSet() |
|
257
|
|
|
elif rename == 2: |
|
258
|
|
|
chainSet = LongChainSet() |
|
259
|
|
|
else: |
|
260
|
|
|
raise ValueError("Unrecognized rename option (Valid: 0,1,2)") |
|
261
|
|
|
|
|
262
|
|
|
metaprefix = "temp" #TODO unique prefix |
|
263
|
|
|
|
|
264
|
|
|
# store original value of retain_order, which causes weird interleaving of |
|
265
|
|
|
# structures if enabled. |
|
266
|
|
|
retain_order = cmd.get("retain_order") |
|
267
|
|
|
try: |
|
268
|
|
|
cmd.set("retain_order",0) |
|
269
|
|
|
|
|
270
|
|
|
# create new object for each state |
|
271
|
|
|
for obj in cmd.get_object_list(selection): |
|
272
|
|
|
|
|
273
|
|
|
if state <= 0: |
|
274
|
|
|
# all states |
|
275
|
|
|
prefix = "%s_%s_"%(metaprefix,obj) |
|
276
|
|
|
cmd.split_states(obj,prefix=prefix) |
|
277
|
|
|
else: |
|
278
|
|
|
prefix = "%s_%s_%04d"%(metaprefix,obj,state) |
|
279
|
|
|
cmd.create(prefix, obj, state, 1) |
|
280
|
|
|
|
|
281
|
|
|
# renumber all states |
|
282
|
|
|
statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names |
|
283
|
|
|
|
|
284
|
|
|
warn_lowercase = False |
|
285
|
|
|
|
|
286
|
|
|
# Iterate over all objects with metaprefix |
|
287
|
|
|
try: |
|
288
|
|
|
for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ): |
|
289
|
|
|
m = statere.match(obj) |
|
290
|
|
|
if m is None: |
|
291
|
|
|
print(("Failed to match object %s" %obj)) |
|
292
|
|
|
continue |
|
293
|
|
|
origobj = m.group(1) |
|
294
|
|
|
statenum = int(m.group(2)) |
|
295
|
|
|
|
|
296
|
|
|
chains = cmd.get_chains(obj) |
|
297
|
|
|
|
|
298
|
|
|
rev_chain_map = {} #old -> new, for this obj only |
|
299
|
|
|
for chain in sorted(chains,key=lambda x:(len(x),x)): |
|
300
|
|
|
new_chain = chainSet.map_chain(origobj,statenum,chain) |
|
301
|
|
|
rev_chain_map[chain] = new_chain |
|
302
|
|
|
if not quiet: |
|
303
|
|
|
print((" %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) )) |
|
304
|
|
|
if not _long_chains: |
|
305
|
|
|
if len(new_chain) > 1: |
|
306
|
|
|
raise OutOfChainsError("No additional chains available (max 62).") |
|
307
|
|
|
|
|
308
|
|
|
space = {'rev_chain_map':rev_chain_map} |
|
309
|
|
|
cmd.alter(obj,"chain = rev_chain_map[chain]",space=space) |
|
310
|
|
|
|
|
311
|
|
|
print(("Creating object from %s_*"%metaprefix)) |
|
312
|
|
|
# Recombine into a single object |
|
313
|
|
|
cmd.create(name,"%s_*"%metaprefix) |
|
314
|
|
|
|
|
315
|
|
|
# Set chain_map |
|
316
|
|
|
if chain_map: |
|
317
|
|
|
setattr(stored,chain_map,chainSet) |
|
318
|
|
|
|
|
319
|
|
|
# Warn if lowercase chains were generated |
|
320
|
|
|
if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]): |
|
321
|
|
|
print("Warning: using lower-case chain IDs. Consider running the " |
|
322
|
|
|
"following command:\n set ignore_case, 0" ) |
|
323
|
|
|
|
|
324
|
|
|
finally: |
|
325
|
|
|
# Clean up |
|
326
|
|
|
print("Cleaning up intermediates") |
|
327
|
|
|
cmd.delete("%s_*"%metaprefix) |
|
328
|
|
|
finally: |
|
329
|
|
|
# restore original parameters |
|
330
|
|
|
print("Resetting variables") |
|
331
|
|
|
cmd.set("retain_order",retain_order) |
|
332
|
|
|
|
|
333
|
|
|
|
|
334
|
|
|
cmd.extend('flatten_obj', flatten_obj) |
|
335
|
|
|
|
|
336
|
|
|
# tab-completion of arguments |
|
337
|
|
|
cmd.auto_arg[0]['flatten_obj'] = [ cmd.object_sc, 'name or selection', ''] |
|
338
|
|
|
cmd.auto_arg[1]['flatten_obj'] = [ cmd.object_sc, 'selection', ''] |
|
339
|
|
|
|