1
|
|
|
#! /usr/bin/env python |
2
|
|
|
# |
3
|
|
|
# Copyright (C) 2015-2016 Rich Lewis <[email protected]> |
4
|
|
|
# License: 3-clause BSD |
5
|
|
|
|
6
|
1 |
|
""" |
7
|
|
|
## skchem.core.mol |
8
|
|
|
|
9
|
|
|
Defining molecules in scikit-chem. |
10
|
|
|
""" |
11
|
|
|
|
12
|
1 |
|
import copy |
13
|
|
|
|
14
|
1 |
|
import rdkit.Chem |
|
|
|
|
15
|
1 |
|
import rdkit.Chem.inchi |
|
|
|
|
16
|
1 |
|
from rdkit.Chem import AddHs, RemoveHs |
|
|
|
|
17
|
1 |
|
from rdkit.Chem.rdMolDescriptors import CalcMolFormula, CalcExactMolWt |
|
|
|
|
18
|
|
|
|
19
|
1 |
|
import json |
20
|
|
|
|
21
|
1 |
|
from .atom import AtomView |
22
|
1 |
|
from .bond import BondView |
23
|
1 |
|
from .conformer import ConformerView |
24
|
1 |
|
from .base import ChemicalObject, PropertyView |
25
|
1 |
|
from ..utils import Suppressor |
26
|
|
|
|
27
|
|
|
|
28
|
1 |
|
class Mol(rdkit.Chem.rdchem.Mol, ChemicalObject): |
29
|
|
|
|
30
|
|
|
"""Class representing a Molecule in scikit-chem. |
31
|
|
|
|
32
|
|
|
Mol objects inherit directly from rdkit Mol objects. Therefore, they |
33
|
|
|
contain atom and bond information, and may also include properties and |
34
|
|
|
atom bookmarks. |
35
|
|
|
|
36
|
|
|
Example: |
37
|
|
|
Constructors are implemented as class methods with the `from_` prefix. |
38
|
|
|
|
39
|
|
|
>>> import skchem |
40
|
|
|
>>> m = skchem.Mol.from_smiles('CC(=O)Cl'); m # doctest: +ELLIPSIS |
41
|
|
|
<Mol name="None" formula="C2H3ClO" at ...> |
42
|
|
|
|
43
|
|
|
This is an rdkit Mol: |
44
|
|
|
|
45
|
|
|
>>> from rdkit.Chem import Mol as RDKMol |
46
|
|
|
>>> isinstance(m, RDKMol) |
47
|
|
|
True |
48
|
|
|
|
49
|
|
|
A name can be given at initialization: |
50
|
|
|
>>> m = skchem.Mol.from_smiles('CC(=O)Cl', name='acetyl chloride'); m # doctest: +ELLIPSIS |
51
|
|
|
<Mol name="acetyl chloride" formula="C2H3ClO" at ...> |
52
|
|
|
|
53
|
|
|
>>> m.name |
54
|
|
|
'acetyl chloride' |
55
|
|
|
|
56
|
|
|
Serializers are implemented as instance methods with the `to_` prefix. |
57
|
|
|
|
58
|
|
|
>>> m.to_smiles() |
59
|
|
|
'CC(=O)Cl' |
60
|
|
|
|
61
|
|
|
>>> m.to_inchi() |
62
|
|
|
'InChI=1S/C2H3ClO/c1-2(3)4/h1H3' |
63
|
|
|
|
64
|
|
|
>>> m.to_inchi_key() |
65
|
|
|
'WETWJCDKMRHUPV-UHFFFAOYSA-N' |
66
|
|
|
|
67
|
|
|
RDKit properties are accessible through the `props` property: |
68
|
|
|
|
69
|
|
|
>>> m.SetProp('example_key', 'example_value') # set prop with rdkit directly |
70
|
|
|
>>> m.props['example_key'] |
71
|
|
|
'example_value' |
72
|
|
|
|
73
|
|
|
>>> m.SetIntProp('float_key', 42) # set int prop with rdkit directly |
74
|
|
|
>>> m.props['float_key'] |
75
|
|
|
42 |
76
|
|
|
|
77
|
|
|
They can be set too: |
78
|
|
|
|
79
|
|
|
>>> m.props['example_set'] = 'set_value' |
80
|
|
|
>>> m.GetProp('example_set') # getting with rdkit directly |
81
|
|
|
'set_value' |
82
|
|
|
|
83
|
|
|
We can export the properties into a dict or a pandas series: |
84
|
|
|
|
85
|
|
|
>>> m.props.to_series() |
86
|
|
|
example_key example_value |
87
|
|
|
example_set set_value |
88
|
|
|
float_key 42 |
89
|
|
|
dtype: object |
90
|
|
|
|
91
|
|
|
Atoms and bonds are provided in views: |
92
|
|
|
|
93
|
|
|
>>> m.atoms # doctest: +ELLIPSIS |
94
|
|
|
<AtomView values="['C', 'C', 'O', 'Cl']" at ...> |
95
|
|
|
|
96
|
|
|
>>> m.bonds # doctest: +ELLIPSIS |
97
|
|
|
<BondView values="['C-C', 'C=O', 'C-Cl']" at ...> |
98
|
|
|
|
99
|
|
|
These are iterable: |
100
|
|
|
>>> [a.symbol for a in m.atoms] |
101
|
|
|
['C', 'C', 'O', 'Cl'] |
102
|
|
|
|
103
|
|
|
The view provides shorthands for some attributes to get these: |
104
|
|
|
|
105
|
|
|
>>> m.atoms.symbol # doctest: +ELLIPSIS |
106
|
|
|
array(['C', 'C', 'O', 'Cl'], dtype=...) |
107
|
|
|
|
108
|
|
|
Atom and bond props can also be set: |
109
|
|
|
|
110
|
|
|
>>> m.atoms[0].props['atom_key'] = 'atom_value' |
111
|
|
|
>>> m.atoms[0].props['atom_key'] |
112
|
|
|
'atom_value' |
113
|
|
|
|
114
|
|
|
The properties for atoms on the whole molecule can be accessed like so: |
115
|
|
|
|
116
|
|
|
>>> m.atoms.props # doctest: +ELLIPSIS |
117
|
|
|
<MolPropertyView values="{'atom_key': ['atom_value', None, None, None]}" at ...> |
118
|
|
|
|
119
|
|
|
The properties can be exported as a pandas dataframe |
120
|
|
|
>>> m.atoms.props.to_frame() |
121
|
|
|
atom_key |
122
|
|
|
atom_idx |
123
|
|
|
0 atom_value |
124
|
|
|
1 None |
125
|
|
|
2 None |
126
|
|
|
3 None |
127
|
|
|
|
128
|
|
|
""" |
129
|
|
|
|
130
|
1 |
|
def __init__(self, *args, **kwargs): |
131
|
|
|
|
132
|
|
|
""" |
133
|
|
|
The default constructor. |
134
|
|
|
|
135
|
|
|
Note: |
136
|
|
|
This will be rarely used, as it can only create an empty molecule. |
137
|
|
|
|
138
|
|
|
Args: |
139
|
|
|
*args: Arguments to be passed to the rdkit Mol constructor. |
140
|
|
|
**kwargs: Arguments to be passed to the rdkit Mol constructor. |
141
|
|
|
""" |
142
|
1 |
|
super(Mol, self).__init__(*args, **kwargs) |
143
|
1 |
|
self.__two_d = None # set in constructor |
144
|
|
|
|
145
|
1 |
|
@property |
146
|
|
|
def name(self): |
147
|
|
|
|
148
|
|
|
""" str: The name of the molecule. |
149
|
|
|
|
150
|
|
|
Raises: |
151
|
|
|
KeyError""" |
152
|
|
|
|
153
|
1 |
|
try: |
154
|
1 |
|
return self.GetProp('_Name') |
|
|
|
|
155
|
1 |
|
except KeyError: |
156
|
1 |
|
return None |
157
|
|
|
|
158
|
1 |
|
@name.setter |
159
|
|
|
def name(self, value): |
|
|
|
|
160
|
|
|
|
161
|
1 |
|
if value is None: |
162
|
1 |
|
self.ClearProp('_Name') |
|
|
|
|
163
|
|
|
else: |
164
|
1 |
|
self.SetProp('_Name', value) |
|
|
|
|
165
|
|
|
|
166
|
1 |
|
@property |
167
|
|
|
def atoms(self): |
168
|
|
|
|
169
|
|
|
""" List[skchem.Atom]: An iterable over the atoms of the molecule. """ |
170
|
|
|
|
171
|
1 |
|
if not hasattr(self, '_atoms'): |
172
|
1 |
|
self._atoms = AtomView(self) |
|
|
|
|
173
|
1 |
|
return self._atoms |
174
|
|
|
|
175
|
1 |
|
@property |
176
|
|
|
def bonds(self): |
177
|
|
|
|
178
|
|
|
""" List[skchem.Bond]: An iterable over the bonds of the molecule. """ |
179
|
|
|
|
180
|
1 |
|
if not hasattr(self, '_bonds'): |
181
|
1 |
|
self._bonds = BondView(self) |
|
|
|
|
182
|
1 |
|
return self._bonds |
183
|
|
|
|
184
|
1 |
|
@property |
185
|
|
|
def mass(self): |
186
|
|
|
|
187
|
|
|
""" float: the mass of the molecule. """ |
188
|
|
|
|
189
|
1 |
|
return CalcExactMolWt(self) |
190
|
|
|
|
191
|
1 |
|
@property |
192
|
|
|
def props(self): |
193
|
|
|
|
194
|
|
|
""" PropertyView: A dictionary of the properties of the molecule. """ |
195
|
|
|
|
196
|
1 |
|
if not hasattr(self, '_props'): |
197
|
1 |
|
self._props = PropertyView(self) |
|
|
|
|
198
|
1 |
|
return self._props |
199
|
|
|
|
200
|
1 |
|
@property |
201
|
|
|
def conformers(self): |
202
|
|
|
|
203
|
|
|
""" List[Conformer]: conformers of the molecule. """ |
204
|
|
|
|
205
|
1 |
|
if not hasattr(self, '_conformers'): |
206
|
1 |
|
self._conformers = ConformerView(self) |
|
|
|
|
207
|
1 |
|
return self._conformers |
208
|
|
|
|
209
|
1 |
|
def to_formula(self): |
210
|
|
|
|
211
|
|
|
""" str: the chemical formula of the molecule. |
212
|
|
|
|
213
|
|
|
Raises: |
214
|
|
|
RuntimeError""" |
215
|
|
|
|
216
|
|
|
# formula may be undefined if atoms are uncertainly typed |
217
|
|
|
# e.g. if the molecule was initialize through SMARTS |
218
|
1 |
|
try: |
219
|
1 |
|
with Suppressor(): |
220
|
1 |
|
return CalcMolFormula(self) |
221
|
1 |
|
except RuntimeError: |
222
|
1 |
|
raise ValueError('Formula is undefined for {}'.format(self)) |
223
|
|
|
|
224
|
1 |
|
def add_hs(self, inplace=False, add_coords=True, explicit_only=False, |
225
|
|
|
only_on_atoms=False): |
226
|
|
|
""" Add hydrogens to self. |
227
|
|
|
|
228
|
|
|
Args: |
229
|
|
|
inplace (bool): |
230
|
|
|
Whether to add Hs to `Mol`, or return a new `Mol`. |
231
|
|
|
add_coords (bool): |
232
|
|
|
Whether to set 3D coordinate for added Hs. |
233
|
|
|
explicit_only (bool): |
234
|
|
|
Whether to add only explicit Hs, or also implicit ones. |
235
|
|
|
only_on_atoms (iterable<bool>): |
236
|
|
|
An iterable specifying the atoms to add Hs. |
237
|
|
|
Returns: |
238
|
|
|
skchem.Mol: |
239
|
|
|
`Mol` with Hs added. |
240
|
|
|
""" |
241
|
1 |
|
if inplace: |
242
|
1 |
|
msg = 'Inplace addition of Hs is not yet supported.' |
243
|
1 |
|
raise NotImplementedError(msg) |
244
|
1 |
|
raw = AddHs(self, addCoords=add_coords, onlyOnAtoms=only_on_atoms, |
245
|
|
|
explicitOnly=explicit_only) |
246
|
1 |
|
return self.__class__.from_super(raw) |
247
|
|
|
|
248
|
1 |
|
def remove_hs(self, inplace=False, sanitize=True, update_explicit=False, |
249
|
|
|
implicit_only=False): |
250
|
|
|
|
251
|
|
|
""" Remove hydrogens from self. |
252
|
|
|
|
253
|
|
|
Args: |
254
|
|
|
inplace (bool): |
255
|
|
|
Whether to add Hs to `Mol`, or return a new `Mol`. |
256
|
|
|
sanitize (bool): |
257
|
|
|
Whether to sanitize after Hs are removed. |
258
|
|
|
update_explicit (bool): |
259
|
|
|
Whether to update explicit count after the removal. |
260
|
|
|
implicit_only (bool): |
261
|
|
|
Whether to remove explict and implicit Hs, or Hs only. |
262
|
|
|
Returns: |
263
|
|
|
skchem.Mol: |
264
|
|
|
`Mol` with Hs removed. |
265
|
|
|
""" |
266
|
1 |
|
if inplace: |
267
|
1 |
|
msg = 'Inplace removed of Hs is not yet supported.' |
268
|
1 |
|
raise NotImplementedError(msg) |
269
|
1 |
|
raw = RemoveHs(self, implicitOnly=implicit_only, |
270
|
|
|
updateExplicitCount=update_explicit, sanitize=sanitize) |
271
|
1 |
|
return self.__class__.from_super(raw) |
272
|
|
|
|
273
|
1 |
|
def to_dict(self, kind="chemdoodle", conformer_id=-1): |
274
|
|
|
|
275
|
|
|
""" A dictionary representation of the molecule. |
276
|
|
|
|
277
|
|
|
Args: |
278
|
|
|
kind (str): |
279
|
|
|
The type of representation to use. Only `chemdoodle` is |
280
|
|
|
currently supported. |
281
|
|
|
|
282
|
|
|
Returns: |
283
|
|
|
dict: |
284
|
|
|
dictionary representation of the molecule.""" |
285
|
|
|
|
286
|
1 |
|
if kind == "chemdoodle": |
287
|
1 |
|
return self._to_dict_chemdoodle(conformer_id=conformer_id) |
288
|
|
|
|
289
|
|
|
else: |
290
|
1 |
|
raise NotImplementedError |
291
|
|
|
|
292
|
1 |
|
def _to_dict_chemdoodle(self, conformer_id=-1): |
293
|
|
|
|
294
|
|
|
""" Chemdoodle dict representation of the molecule. |
295
|
|
|
|
296
|
|
|
Documentation of the format may be found on the `chemdoodle website \ |
297
|
|
|
<https://web.chemdoodle.com/docs/chemdoodle-json-format>`_""" |
298
|
|
|
|
299
|
1 |
|
try: |
300
|
1 |
|
pos = self.conformers[conformer_id].positions |
301
|
1 |
|
except IndexError as e: |
|
|
|
|
302
|
1 |
|
if conformer_id == -1: |
303
|
|
|
# no conformers available, so we generate one with 2d coords, |
304
|
|
|
# save the positions, then delete the conf |
305
|
|
|
|
306
|
1 |
|
self.conformers.append_2d() |
307
|
1 |
|
pos = self.conformers[0].positions |
308
|
|
|
|
309
|
1 |
|
del self.conformers[0] |
310
|
|
|
else: |
311
|
1 |
|
raise e |
312
|
|
|
|
313
|
1 |
|
atoms = [{'x': p[0], 'y': p[1], 'z': p[2], 'l': s} |
314
|
|
|
for s, p in zip(self.atoms.symbol, pos.round(4))] |
315
|
|
|
|
316
|
1 |
|
bonds = [b.to_dict() for b in self.bonds] |
317
|
|
|
|
318
|
1 |
|
return {"m": [{"a": atoms, "b": bonds}]} |
319
|
|
|
|
320
|
1 |
|
def to_json(self, kind='chemdoodle'): |
321
|
|
|
|
322
|
|
|
""" Serialize a molecule using JSON. |
323
|
|
|
|
324
|
|
|
Args: |
325
|
|
|
kind (str): |
326
|
|
|
The type of serialization to use. Only `chemdoodle` is |
327
|
|
|
currently supported. |
328
|
|
|
|
329
|
|
|
Returns: |
330
|
|
|
str: the json string. """ |
331
|
|
|
|
332
|
1 |
|
return json.dumps(self.to_dict(kind=kind)) |
333
|
|
|
|
334
|
1 |
|
def to_inchi_key(self): |
335
|
|
|
|
336
|
|
|
""" The InChI key of the molecule. |
337
|
|
|
|
338
|
|
|
Returns: |
339
|
|
|
str: the InChI key. |
340
|
|
|
|
341
|
|
|
Raises: |
342
|
|
|
RuntimeError""" |
343
|
|
|
|
344
|
1 |
|
if not rdkit.Chem.inchi.INCHI_AVAILABLE: |
345
|
1 |
|
raise ImportError("InChI module not available.") |
346
|
|
|
|
347
|
1 |
|
res = rdkit.Chem.InchiToInchiKey(self.to_inchi()) |
|
|
|
|
348
|
|
|
|
349
|
1 |
|
if res is None: |
350
|
1 |
|
raise RuntimeError("An InChI key could not be generated.") |
351
|
|
|
|
352
|
1 |
|
return res |
353
|
|
|
|
354
|
1 |
|
def to_binary(self): |
355
|
|
|
|
356
|
|
|
""" Serialize the molecule to binary encoding. |
357
|
|
|
|
358
|
|
|
Returns: |
359
|
|
|
bytes: the molecule in bytes. |
360
|
|
|
|
361
|
|
|
Notes: |
362
|
|
|
Due to limitations in RDKit, not all data is serialized. Notably, |
363
|
|
|
properties are not, so e.g. compound names are not saved.""" |
364
|
|
|
|
365
|
1 |
|
return self.ToBinary() |
|
|
|
|
366
|
|
|
|
367
|
1 |
|
@classmethod |
368
|
|
|
def from_binary(cls, binary): |
369
|
|
|
|
370
|
|
|
""" Decode a molecule from a binary serialization. |
371
|
|
|
|
372
|
|
|
Args: |
373
|
|
|
binary: The bytes string to decode. |
374
|
|
|
|
375
|
|
|
Returns: |
376
|
|
|
skchem.Mol: The molecule encoded in the binary.""" |
377
|
|
|
|
378
|
1 |
|
return cls(binary) |
379
|
|
|
|
380
|
1 |
|
def copy(self): |
381
|
|
|
|
382
|
|
|
""" Return a copy of the molecule. """ |
383
|
|
|
|
384
|
1 |
|
return Mol.from_super(copy.deepcopy(self)) |
385
|
|
|
|
386
|
1 |
|
def __repr__(self): |
387
|
1 |
|
try: |
388
|
1 |
|
formula = self.to_formula() |
389
|
1 |
|
except ValueError: |
390
|
|
|
# if we can't generate the formula, just say it is unknown |
391
|
1 |
|
formula = 'unknown' |
392
|
|
|
|
393
|
1 |
|
return '<{klass} name="{name}" formula="{form}" at {address}>'.format( |
394
|
|
|
klass=self.__class__.__name__, |
395
|
|
|
name=self.name, |
396
|
|
|
form=formula, |
397
|
|
|
address=hex(id(self))) |
398
|
|
|
|
399
|
1 |
|
def __contains__(self, item): |
400
|
1 |
|
if isinstance(item, Mol): |
401
|
1 |
|
return self.HasSubstructMatch(item) |
|
|
|
|
402
|
|
|
else: |
403
|
1 |
|
msg = 'No way to check if {} contains {}'.format(self, item) |
404
|
1 |
|
raise NotImplementedError(msg) |
405
|
|
|
|
406
|
1 |
|
def __eq__(self, item): |
407
|
1 |
|
if isinstance(item, self.__class__): |
408
|
1 |
|
return (self in item) and (item in self) |
409
|
|
|
else: |
410
|
|
|
return False |
411
|
|
|
|
412
|
1 |
|
def __str__(self): |
413
|
1 |
|
return '<Mol: {}>'.format(self.to_smiles()) |
|
|
|
|
414
|
|
|
|
415
|
|
|
|
416
|
1 |
|
def bind_constructor(constructor_name, name_to_bind=None): |
417
|
|
|
|
418
|
|
|
""" Bind an (rdkit) constructor to the class """ |
419
|
|
|
|
420
|
1 |
|
@classmethod |
421
|
1 |
|
def constructor(_, in_arg, name=None, *args, **kwargs): |
422
|
|
|
|
423
|
|
|
""" The constructor to be bound. """ |
424
|
|
|
|
425
|
1 |
|
m = getattr(rdkit.Chem, 'MolFrom' + constructor_name)(in_arg, *args, |
|
|
|
|
426
|
|
|
**kwargs) |
427
|
1 |
|
if m is None: |
428
|
1 |
|
raise ValueError('Failed to parse molecule, {}'.format(in_arg)) |
429
|
1 |
|
m = Mol.from_super(m) |
|
|
|
|
430
|
1 |
|
m.name = name |
431
|
1 |
|
return m |
432
|
|
|
|
433
|
1 |
|
setattr(Mol, 'from_{}'.format(constructor_name).lower() |
434
|
|
|
if name_to_bind is None else name_to_bind, constructor) |
|
|
|
|
435
|
|
|
|
436
|
|
|
|
437
|
1 |
|
def bind_serializer(serializer_name, name_to_bind=None): |
438
|
|
|
|
439
|
|
|
""" Bind an (rdkit) serializer to the class """ |
440
|
|
|
|
441
|
1 |
|
def serializer(self, *args, **kwargs): |
442
|
|
|
|
443
|
|
|
""" The serializer to be bound. """ |
444
|
1 |
|
with Suppressor(): |
445
|
1 |
|
return getattr(rdkit.Chem, 'MolTo' + serializer_name)(self, *args, |
446
|
|
|
**kwargs) |
447
|
|
|
|
448
|
1 |
|
setattr(Mol, 'to_{}'.format(serializer_name).lower() |
449
|
|
|
if name_to_bind is None else name_to_bind, serializer) |
|
|
|
|
450
|
|
|
|
451
|
1 |
|
CONSTRUCTORS = ['Inchi', 'Smiles', 'Mol2Block', 'Mol2File', 'MolBlock', |
452
|
|
|
'MolFile', 'PDBBlock', 'PDBFile', 'Smarts', 'TPLBlock', |
453
|
|
|
'TPLFile'] |
454
|
1 |
|
SERIALIZERS = ['Inchi', 'Smiles', 'MolBlock', 'MolFile', 'PDBBlock', 'Smarts', |
455
|
|
|
'TPLBlock', 'TPLFile'] |
456
|
|
|
|
457
|
1 |
|
list(map(bind_constructor, CONSTRUCTORS)) |
|
|
|
|
458
|
|
|
list(map(bind_serializer, SERIALIZERS)) |
|
|
|
|
459
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.