DefaultChainSet.__init__()   A
last analyzed

Complexity

Conditions 1

Size

Total Lines 3
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 3
nop 1
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
"""
2
flatten_obj.py - Flatten multi-state pymol objects into a single state.
3
4
<https://pymolwiki.org/index.php/Flatten_obj>
5
6
This is particularly useful for dealing with biological assemblies, which are
7
loaded as multi-state objects when fetched using `fetch PDBID, type=pdb1`. It
8
can also be used as a quick way to combine multiple objects without causing
9
collisions between chain identifiers.
10
11
The command re-letters chains to avoid collisions. Older versions of PyMOL
12
restrict the chain id to a single character, so the script will fail for
13
assemblies with >62 chains. With more recent versions, this problem is solved
14
with multi-character chain IDs. Several options are available for how
15
re-lettering should occur.
16
17
Author: Spencer Bliven <[email protected]>
18
Date: October 30, 2015
19
Version: 1.0
20
License: Public Domain
21
"""
22
from pymol import cmd, stored
23
import re
24
try:
25
    from collections import OrderedDict
26
    _orderedDict = True
27
except ImportError:
28
    _orderedDict = False
29
30
# PyMOL 1.7.4 introduces support for multi-letter chains, so we can afford to
31
# use a smaller alphabet. In earlier versions, use lower-case letters if needed
32
# (requires running `set ignore_case, 0`)
33
_long_chains = cmd.get_version()[1] >= 1.74
34
_default_base = 36 if _long_chains else 62
35
36
class OutOfChainsError(Exception):
37
    def __init__(self,msg):
38
        self.msg=msg
39
    def __str__(self):
40
        return str(self.msg)
41
42
class ChainSet(object):
43
    """
44
    Base class for various methods to rename chains
45
46
    Contains _chains, which maps from the renamed chain to a tuple with the
47
    original (object,state,chain). All dict-like accessors work on ChainSets,
48
    e.g.
49
        chain_set["A"] -> ("obj",1,"A")
50
51
    """
52
    def __init__(self):
53
        # Use an OrderedDict in Python >= 1.7 for better printing
54
        if _orderedDict:
55
            self._chains = OrderedDict()
56
        else:
57
            self._chains = dict()
58
59
    def map_chain(self, obj, state, origChain ):
60
        """
61
        map_chain(string obj,int state, string chain]]) -> string
62
63
        Maps a chain letter to a unique chainID. Results are unique within each
64
        instance, and can be used as keys on this chain set.
65
        """
66
        raise NotImplementedError("Base class")
67
68
    # delegate most methods to _chains
69
    def __getattr__(self,at):
70
        if at in "pop popitem update setdefault".split():
71
            raise AttributeError("type object '%s' has no attribute '%s'"%(type(self),at))
72
        return getattr(self._chains,at)
73
    def __cmp__(self,other):    return self._chains.__cmp__(other)
74
    def __eq__(self,other):     return self._chains.__eq__(other)
75
    def __ge__(self,other):     return self._chains.__ge__(other)
76
    def __gt__(self,other):     return self._chains.__gt__(other)
77
    def __le__(self,other):     return self._chains.__le__(other)
78
    def __lt__(self,other):     return self._chains.__lt__(other)
79
    def __ne__(self,other):     return self._chains.__ne__(other)
80
    def __len__(self):          return self._chains.__len__()
81
    def __contains__(self,key): return self._chains.__contains__(key)
82
    def __getitem__(self,key):  return self._chains.__getitem__(key)
83
    def __iter__(self):         return self._chains.__iter__()
84
    def __str__(self):          return str(self._chains)
85
86
    @staticmethod
87
    def _int_to_chain(i,base=_default_base):
88
        """
89
        _int_to_chain(int,int) -> str
90
91
        Converts a positive integer to a chain ID. Chain IDs include uppercase
92
        characters, numbers, and optionally lowercase letters.
93
94
        i = a positive integer to convert
95
        base = the alphabet size to include. Typically 36 or 62.
96
        """
97
        if i < 0:
98
            raise ValueError("positive integers only")
99
        if base < 0 or 62 < base:
100
            raise ValueError("Invalid base")
101
102
        quot = int(i)//base
103
        rem = i%base
104
        if rem < 26:
105
            letter = chr( ord("A") + rem)
106
        elif rem < 36:
107
            letter = str( rem-26)
108
        else:
109
            letter = chr( ord("a") + rem - 36)
110
        if quot == 0:
111
            return letter
112
        else:
113
            return ChainSet._int_to_chain(quot-1,base) + letter
114
115
116
class DefaultChainSet(ChainSet):
117
    """
118
    Avoids relettering chains if possible. If a chain has been used, uses the
119
    next available chain letter. Note that this can potentially lead to
120
    cascading renames, e.g. if chains are sorted alphabetically rather than by
121
    object.
122
123
    Used for rename = 0.
124
    """
125
    def __init__(self):
126
        super(DefaultChainSet,self).__init__()
127
        self._next_chain = 0
128
    def map_chain(self, obj, state, origChain ):
129
        # Keep _next_chain up-to-date
130
        while ChainSet._int_to_chain(self._next_chain) in self:
131
            self._next_chain += 1
132
        # Map this chain
133
        if origChain in self:
134
            # Rename
135
            next_chain = ChainSet._int_to_chain(self._next_chain)
136
            self._next_chain += 1
137
        else:
138
            next_chain = origChain
139
        self._chains[next_chain] = (obj,state,origChain)
140
        return next_chain
141
142
class SequentialChainSet(ChainSet):
143
    """
144
    Renumbers all chains starting at A, continuing through the capital letters
145
    and numbers, and then adding additional letters through 9999 (the last
146
    valid chain for mmCIF) and beyond.
147
148
    Used for rename=1
149
    """
150
    def __init__(self):
151
        super(SequentialChainSet,self).__init__()
152
        self._next_chain = 0
153
154
    def map_chain(self, obj, state, origChain ):
155
        next_chain = ChainSet._int_to_chain(self._next_chain)
156
        self._chains[next_chain] = (obj,state,origChain)
157
        self._next_chain += 1
158
        return next_chain
159
160
class LongChainSet(ChainSet):
161
    """
162
    Uses long strings for the chain names. Chains are renamed like
163
    "%s_%s_%04d"%(original_chainid,objectname,state).
164
165
    Used for rename=2
166
    """
167
    def map_chain(self, obj, state, origChain ):
168
        ch = "%s_%s_%04d"%(origChain,obj,state)
169
        if ch in self:
170
            raise ValueError("Duplicate chain %s"%(ch))
171
        self._chains[ch] = (obj,state,origChain)
172
        return ch
173
174
175
176
177
def flatten_obj(name="",selection="",state=0,rename=0,quiet=1,chain_map=""):
178
    """
179
DESCRIPTION
180
181
    "flatten_obj" combines multiple objects or states into a single object,
182
    renaming chains where required
183
184
USAGE
185
186
    flatten_obj name, selection[, state[, rename[, quiet[, chain_map]]]]
187
188
ARGUMENTS
189
190
    name = a unique name for the flattened object {default: flat}
191
192
    selection = the set of objects to include in the flattening. The selection
193
        will be expanded to include all atoms of objects. {default: all}
194
195
    state = the source state to select. Use 0 or -1 to flatten all states {default: 0}
196
197
    rename = The scheme to use for renaming chains: {default: 0}
198
        (0) preserve chains IDs where possible, rename other chains
199
            alphabetically
200
        (1) rename all chains alphabetically
201
        (2) rename chains using the original chain letter, object name, and state
202
203
    quiet = If set to 0, print some additional information about progress and
204
        chain renaming {default: 1}
205
206
    chain_map = An attribute name for the 'stored' scratch object. If
207
        specified, `stored.<chain_map>` will be populated with a dictionary
208
        mapping the new chain names to a tuple giving the originated object,
209
        state, and chainID. {default: ""}
210
211
NOTES
212
213
    Like the select command, if name is omitted then the default object name
214
    ("flat") is used as the name argument.
215
216
    Chain renaming is tricky. PDB files originally limited chains to single
217
    letter identifiers containing [A-Za-z0-9]. When this was found to be
218
    limiting, multi-letter chains (ideally < 4 chars) were allowed. This is
219
    supported as of PyMOL 1.7. Earlier versions do not accept rename=2, and
220
    will raise an exception when flattening a structure with more than 62
221
    chains.
222
223
EXAMPLES
224
225
    flatten_obj flat, nmrObj
226
    flatten_obj ( obj1 or obj2 )
227
228
SEE ALSO
229
230
    split_states
231
232
    """
233
234
    # arguments
235
236
    # Single argument; treat as selection
237
    if name and not selection:
238
        selection = name
239
        name = ""
240
    # default name and selection
241
    if not name:
242
        name = "flat"
243
    if not selection:
244
        selection = "(all)"
245
246
    state = int(state)
247
    rename = int(rename)
248
    quiet = int(quiet)
249
250
    # Wrap in extra parantheses for get_object_list
251
    selection = "( %s )" % selection
252
253
    if rename == 0:
254
        chainSet = DefaultChainSet()
255
    elif rename == 1:
256
        chainSet = SequentialChainSet()
257
    elif rename == 2:
258
        chainSet = LongChainSet()
259
    else:
260
        raise ValueError("Unrecognized rename option (Valid: 0,1,2)")
261
262
    metaprefix = "temp" #TODO unique prefix
263
264
    # store original value of retain_order, which causes weird interleaving of
265
    # structures if enabled.
266
    retain_order = cmd.get("retain_order")
267
    try:
268
        cmd.set("retain_order",0)
269
270
        # create new object for each state
271
        for obj in cmd.get_object_list(selection):
272
273
            if state <= 0:
274
                # all states
275
                prefix = "%s_%s_"%(metaprefix,obj)
276
                cmd.split_states(obj,prefix=prefix)
277
            else:
278
                prefix = "%s_%s_%04d"%(metaprefix,obj,state)
279
                cmd.create(prefix, obj, state, 1)
280
281
        # renumber all states
282
        statere = re.compile("^%s_(.*)_(\d+)$" % metaprefix) # matches split object names
283
284
        warn_lowercase = False
285
286
        # Iterate over all objects with metaprefix
287
        try:
288
            for obj in cmd.get_object_list("(%s_*)"%(metaprefix) ):
289
                m = statere.match(obj)
290
                if m is None:
291
                    print(("Failed to match object %s" %obj))
292
                    continue
293
                origobj = m.group(1)
294
                statenum = int(m.group(2))
295
296
                chains = cmd.get_chains(obj)
297
298
                rev_chain_map = {} #old -> new, for this obj only
299
                for chain in sorted(chains,key=lambda x:(len(x),x)):
300
                    new_chain = chainSet.map_chain(origobj,statenum,chain)
301
                    rev_chain_map[chain] = new_chain
302
                    if not quiet:
303
                        print(("  %s state %d chain %s -> %s"%(origobj,statenum,chain, new_chain) ))
304
                    if not _long_chains:
305
                        if len(new_chain) > 1:
306
                            raise OutOfChainsError("No additional chains available (max 62).")
307
308
                space = {'rev_chain_map':rev_chain_map}
309
                cmd.alter(obj,"chain = rev_chain_map[chain]",space=space)
310
311
            print(("Creating object from %s_*"%metaprefix))
312
            # Recombine into a single object
313
            cmd.create(name,"%s_*"%metaprefix)
314
315
            # Set chain_map
316
            if chain_map:
317
                setattr(stored,chain_map,chainSet)
318
319
            # Warn if lowercase chains were generated
320
            if cmd.get("ignore_case") == "on" and any([c.upper() != c for c in list(chainSet.keys())]):
321
                print("Warning: using lower-case chain IDs. Consider running the "
322
                        "following command:\n  set ignore_case, 0" )
323
324
        finally:
325
            # Clean up
326
            print("Cleaning up intermediates")
327
            cmd.delete("%s_*"%metaprefix)
328
    finally:
329
        # restore original parameters
330
        print("Resetting variables")
331
        cmd.set("retain_order",retain_order)
332
333
334
cmd.extend('flatten_obj', flatten_obj)
335
336
# tab-completion of arguments
337
cmd.auto_arg[0]['flatten_obj'] = [ cmd.object_sc, 'name or selection', '']
338
cmd.auto_arg[1]['flatten_obj'] = [ cmd.object_sc, 'selection', '']
339