1
|
|
|
from __future__ import annotations |
|
|
|
|
2
|
|
|
from typing import Iterator, List, Sequence, Set |
|
|
|
|
3
|
|
|
|
4
|
|
|
import numpy as np |
|
|
|
|
5
|
|
|
|
6
|
|
|
from mandos import logger |
7
|
|
|
|
8
|
|
|
|
9
|
|
|
try: |
10
|
|
|
from rdkit import Chem |
11
|
|
|
from rdkit.Chem import SaltRemover |
|
|
|
|
12
|
|
|
from rdkit.Chem import Mol |
|
|
|
|
13
|
|
|
import rdkit.Chem.inchi as Inchi |
14
|
|
|
from rdkit.Chem import AllChem |
15
|
|
|
except ImportError: |
16
|
|
|
logger.debug("Failed importing rdkit.", exc_info=True) |
17
|
|
|
Chem = None |
18
|
|
|
Mol = None |
19
|
|
|
Inchi = None |
20
|
|
|
SaltRemover = None |
21
|
|
|
AllChem = None |
22
|
|
|
|
23
|
|
|
|
24
|
|
|
class MoleculeError(Exception): |
|
|
|
|
25
|
|
|
pass |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
class MoleculeConversionError(MoleculeError): |
|
|
|
|
29
|
|
|
pass |
30
|
|
|
|
31
|
|
|
|
32
|
|
|
class NullMoleculeError(MoleculeConversionError): |
|
|
|
|
33
|
|
|
pass |
34
|
|
|
|
35
|
|
|
|
36
|
|
|
class Fingerprint: |
37
|
|
|
""" |
38
|
|
|
Just a simple wrapper for rdkit fingerprints. |
39
|
|
|
A bit unnecessary, but convenient when you're using them a lot. |
40
|
|
|
""" |
41
|
|
|
|
42
|
|
|
def __init__(self, fp): |
43
|
|
|
self._fp = fp |
44
|
|
|
|
45
|
|
|
@property |
46
|
|
|
def bytes(self) -> bytes: |
|
|
|
|
47
|
|
|
return self._fp.ToBinary() |
48
|
|
|
|
49
|
|
|
@property |
50
|
|
|
def numpy(self) -> np.array: |
|
|
|
|
51
|
|
|
# NOTE: frombuffer will NOT work correctly for bool arrays |
52
|
|
|
# also, fromiter is much slower than creating a list first |
53
|
|
|
# This is appears to be the fastest way to create an array here |
54
|
|
|
return np.array(list(self._fp), dtype=bool) |
55
|
|
|
|
56
|
|
|
@property |
57
|
|
|
def list(self) -> List[bool]: |
|
|
|
|
58
|
|
|
return list(map(bool, self._fp)) |
59
|
|
|
|
60
|
|
|
@property |
61
|
|
|
def list_on(self) -> Set[int]: |
|
|
|
|
62
|
|
|
self.numpy.nonzero() |
63
|
|
|
return set(map(bool, self._fp)) |
64
|
|
|
|
65
|
|
|
@property |
66
|
|
|
def string(self) -> str: |
|
|
|
|
67
|
|
|
return self._fp.ToBitString() |
68
|
|
|
|
69
|
|
|
@property |
70
|
|
|
def base64(self) -> str: |
|
|
|
|
71
|
|
|
return self._fp.ToBase64() |
72
|
|
|
|
73
|
|
|
@property |
74
|
|
|
def n_bits(self) -> int: |
|
|
|
|
75
|
|
|
return self._fp.GetNumBits() |
76
|
|
|
|
77
|
|
|
@property |
78
|
|
|
def n_on(self) -> int: |
|
|
|
|
79
|
|
|
return self._fp.GetNumOnBits() |
80
|
|
|
|
81
|
|
|
@property |
82
|
|
|
def n_off(self) -> int: |
|
|
|
|
83
|
|
|
return self._fp.GetNumOffBits() |
84
|
|
|
|
85
|
|
|
# TODO: Consider changing to hold bytes or numpy array, and implement | and & |
|
|
|
|
86
|
|
|
# def __ror__(self, other: Fingerprint) -> Fingerprint: |
87
|
|
|
# https://bugs.python.org/issue19251 |
88
|
|
|
|
89
|
|
|
def __len__(self) -> int: |
90
|
|
|
return self._fp.GetNumBits() |
91
|
|
|
|
92
|
|
|
def __str__(self) -> str: |
93
|
|
|
return self._fp.ToBitString() |
94
|
|
|
|
95
|
|
|
def __repr__(self) -> str: |
96
|
|
|
return self._fp.ToBitString() |
97
|
|
|
|
98
|
|
|
def __bytes__(self) -> bytes: |
99
|
|
|
return self._fp.ToBinary() |
100
|
|
|
|
101
|
|
|
def __iter__(self) -> Iterator[bool]: |
102
|
|
|
return iter(map(bool, self._fp)) |
103
|
|
|
|
104
|
|
|
|
105
|
|
|
class RdkitUtils: |
|
|
|
|
106
|
|
|
@classmethod |
107
|
|
|
def inchikey(cls, inchi_or_smiles: str) -> str: |
|
|
|
|
108
|
|
|
inchi = cls.inchi(inchi_or_smiles) |
109
|
|
|
return Inchi.InchiToInchiKey(inchi) |
110
|
|
|
|
111
|
|
|
@classmethod |
112
|
|
|
def inchi(cls, inchi_or_smiles: str) -> str: |
|
|
|
|
113
|
|
|
if inchi_or_smiles.startswith("InChI="): |
114
|
|
|
return inchi_or_smiles |
115
|
|
|
mol = Chem.MolFromSmiles(inchi_or_smiles) |
116
|
|
|
return Chem.inchi.MolToInchi(mol) |
117
|
|
|
|
118
|
|
|
@classmethod |
119
|
|
|
def ecfp(cls, inchi_or_smiles: str, radius: int, n_bits: int) -> Fingerprint: |
|
|
|
|
120
|
|
|
mol = cls._mol(inchi_or_smiles) |
121
|
|
|
fp1 = AllChem.GetMorganFingerprintAsBitVect( |
122
|
|
|
mol, radius=radius, nBits=n_bits, useFeatures=False |
123
|
|
|
) |
124
|
|
|
return Fingerprint(fp1) |
125
|
|
|
|
126
|
|
|
@classmethod |
127
|
|
|
def _mol(cls, inchi_or_smiles: str): |
128
|
|
|
if inchi_or_smiles.startswith("InChI="): |
|
|
|
|
129
|
|
|
return Chem.MolFromInchi(inchi_or_smiles) |
130
|
|
|
else: |
131
|
|
|
return Chem.MolFromSmiles(inchi_or_smiles) |
132
|
|
|
|
133
|
|
|
|
134
|
|
|
__all__ = ["Fingerprint", "RdkitUtils"] |
135
|
|
|
|