|
1
|
|
|
#! /usr/bin/env python |
|
2
|
|
|
# |
|
3
|
|
|
# Copyright (C) 2016 Rich Lewis <[email protected]> |
|
4
|
|
|
# License: 3-clause BSD |
|
5
|
|
|
|
|
6
|
1 |
|
""" |
|
7
|
|
|
## skchem.standardizers.chemaxon |
|
8
|
|
|
|
|
9
|
|
|
Module wrapping ChemAxon Standardizer. Must have standardizer installed and |
|
10
|
|
|
license activated. |
|
11
|
|
|
""" |
|
12
|
|
|
|
|
13
|
1 |
|
import os |
|
14
|
1 |
|
import sys |
|
15
|
1 |
|
import re |
|
16
|
1 |
|
import subprocess |
|
17
|
1 |
|
import logging |
|
18
|
1 |
|
import warnings |
|
19
|
|
|
|
|
20
|
1 |
|
import pandas as pd |
|
|
|
|
|
|
21
|
|
|
|
|
22
|
1 |
|
from .. import io |
|
23
|
1 |
|
from ..utils import sdf_count |
|
24
|
1 |
|
from ..base import CLIWrapper, Transformer, BatchTransformer |
|
25
|
1 |
|
from ..filters.base import TransformFilter |
|
26
|
|
|
|
|
27
|
1 |
|
LOGGER = logging.getLogger(__name__) |
|
28
|
|
|
|
|
29
|
1 |
|
if sys.version_info[0] == 2: |
|
30
|
1 |
|
NoFoundError = OSError |
|
31
|
1 |
|
subprocess.DEVNULL = open(os.devnull, 'w') |
|
32
|
|
|
else: |
|
33
|
|
|
NoFoundError = FileNotFoundError |
|
|
|
|
|
|
34
|
|
|
|
|
35
|
|
|
|
|
36
|
1 |
|
class ChemAxonStandardizer(CLIWrapper, BatchTransformer, Transformer, |
|
|
|
|
|
|
37
|
|
|
TransformFilter): |
|
38
|
|
|
|
|
39
|
|
|
""" ChemAxon Standardizer Wrapper. |
|
40
|
|
|
|
|
41
|
|
|
Args: |
|
42
|
|
|
config_path (str): |
|
43
|
|
|
The path of the config_file. If None, use the default one. |
|
44
|
|
|
|
|
45
|
|
|
Notes: |
|
46
|
|
|
ChemAxon Standardizer must be installed and accessible as `standardize` |
|
47
|
|
|
from the shell launching the program. |
|
48
|
|
|
|
|
49
|
|
|
Warnings: |
|
50
|
|
|
Must use a unique index (see #31). |
|
51
|
|
|
|
|
52
|
|
|
Examples: |
|
53
|
|
|
|
|
54
|
|
|
>>> import skchem |
|
55
|
|
|
>>> std = skchem.standardizers.ChemAxonStandardizer() # doctest:+SKIP |
|
56
|
|
|
>>> m = skchem.Mol.from_smiles('CC.CCC') |
|
57
|
|
|
>>> print(std.transform(m)) # doctest:+SKIP |
|
58
|
|
|
<Mol: CCC> |
|
59
|
|
|
|
|
60
|
|
|
>>> data = [m, skchem.Mol.from_smiles('C=CO'), skchem.Mol.from_smiles('C[O-]')] |
|
61
|
|
|
>>> std.transform(data) # doctest:+SKIP |
|
62
|
|
|
0 <Mol: CCC> |
|
63
|
|
|
1 <Mol: CC=O> |
|
64
|
|
|
2 <Mol: CO> |
|
65
|
|
|
Name: structure, dtype: object |
|
66
|
|
|
|
|
67
|
|
|
>>> will_fail = mol = '''932-97-8 |
|
68
|
|
|
... RDKit 3D |
|
69
|
|
|
... |
|
70
|
|
|
... 9 9 0 0 0 0 0 0 0 0999 V2000 |
|
71
|
|
|
... -0.9646 0.0000 0.0032 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
72
|
|
|
... -0.2894 -1.2163 0.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
73
|
|
|
... -0.2894 1.2163 0.0025 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
74
|
|
|
... -2.2146 0.0000 -0.0004 N 0 0 0 0 0 0 0 0 0 0 0 0 |
|
75
|
|
|
... 1.0710 -1.2610 0.0002 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
76
|
|
|
... 1.0710 1.2610 0.0007 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
77
|
|
|
... -3.3386 0.0000 -0.0037 N 0 0 0 0 0 0 0 0 0 0 0 0 |
|
78
|
|
|
... 1.8248 0.0000 -0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 |
|
79
|
|
|
... 3.0435 0.0000 -0.0026 O 0 0 0 0 0 0 0 0 0 0 0 0 |
|
80
|
|
|
... 1 2 1 0 |
|
81
|
|
|
... 1 3 1 0 |
|
82
|
|
|
... 1 4 2 3 |
|
83
|
|
|
... 2 5 2 0 |
|
84
|
|
|
... 3 6 2 0 |
|
85
|
|
|
... 4 7 2 0 |
|
86
|
|
|
... 5 8 1 0 |
|
87
|
|
|
... 8 9 2 0 |
|
88
|
|
|
... 6 8 1 0 |
|
89
|
|
|
... M CHG 2 4 1 7 -1 |
|
90
|
|
|
... M END |
|
91
|
|
|
... ''' |
|
92
|
|
|
|
|
93
|
|
|
>>> will_fail = skchem.Mol.from_molblock(will_fail) |
|
94
|
|
|
>>> std.transform(will_fail) # doctest:+SKIP |
|
95
|
|
|
nan |
|
96
|
|
|
|
|
97
|
|
|
>>> data = [will_fail] + data |
|
98
|
|
|
|
|
99
|
|
|
>>> std.transform(data) # doctest:+SKIP |
|
100
|
|
|
0 None |
|
101
|
|
|
1 <Mol: CCC> |
|
102
|
|
|
2 <Mol: CC=O> |
|
103
|
|
|
3 <Mol: CO> |
|
104
|
|
|
Name: structure, dtype: object |
|
105
|
|
|
|
|
106
|
|
|
>>> std.transform_filter(data) # doctest:+SKIP |
|
107
|
|
|
1 <Mol: CCC> |
|
108
|
|
|
2 <Mol: CC=O> |
|
109
|
|
|
3 <Mol: CO> |
|
110
|
|
|
Name: structure, dtype: object |
|
111
|
|
|
|
|
112
|
|
|
>>> std.keep_failed = True # doctest:+SKIP |
|
113
|
|
|
>>> std.transform(data) # doctest:+SKIP |
|
114
|
|
|
0 <Mol: [N-]=[N+]=C1C=CC(=O)C=C1> |
|
115
|
|
|
1 <Mol: CCC> |
|
116
|
|
|
2 <Mol: CC=O> |
|
117
|
|
|
3 <Mol: CO> |
|
118
|
|
|
Name: structure, dtype: object |
|
119
|
|
|
|
|
120
|
|
|
""" |
|
121
|
1 |
|
install_hint = """ Install ChemAxon from https://www.chemaxon.com. It requires a license, |
|
122
|
|
|
which can be freely obtained for academics. """ |
|
123
|
|
|
|
|
124
|
1 |
|
DEFAULT_CONFIG = os.path.join(os.path.dirname(__file__), |
|
125
|
|
|
'default_config.xml') |
|
126
|
|
|
|
|
127
|
1 |
|
def __init__(self, config_path=None, keep_failed=False, **kwargs): |
|
128
|
|
|
|
|
129
|
|
|
super(ChemAxonStandardizer, self).__init__(**kwargs) |
|
130
|
|
|
|
|
131
|
|
|
if not config_path: |
|
132
|
|
|
config_path = self.DEFAULT_CONFIG |
|
133
|
|
|
self.config_path = config_path |
|
134
|
|
|
self.keep_failed = keep_failed |
|
135
|
|
|
|
|
136
|
1 |
|
@property |
|
137
|
|
|
def columns(self): |
|
138
|
|
|
return ['structure'] |
|
139
|
|
|
|
|
140
|
1 |
|
def _transform_series(self, ser): |
|
141
|
|
|
|
|
142
|
|
|
# implement keep_failed functionality here |
|
143
|
|
|
res = super(ChemAxonStandardizer, self)._transform_series(ser) |
|
144
|
|
|
mask = pd.isnull(res) |
|
145
|
|
|
|
|
146
|
|
|
for m_in, m_out in zip(ser[~mask], res[~mask]): |
|
147
|
|
|
m_out.name = m_in.name |
|
148
|
|
|
|
|
149
|
|
|
if self.keep_failed: |
|
150
|
|
|
res[mask] = ser.iloc[mask] |
|
151
|
|
|
return res |
|
152
|
|
|
|
|
153
|
1 |
|
def _parse_outfile(self, outfile): |
|
154
|
|
|
""" Reads output file and returns a list""" |
|
155
|
|
|
return io.read_sdf(outfile, read_props=False) |
|
156
|
|
|
|
|
157
|
1 |
|
def _parse_errors(self, errs): |
|
158
|
|
|
""" Reads stderr and parses out failures as a list of indices. """ |
|
159
|
|
|
LOGGER.debug('stderr: %s', errs if errs else None) |
|
160
|
|
|
errs = errs.strip().split('\n') |
|
161
|
|
|
errs = [re.findall('No. ([0-9]+):', err) for err in errs] |
|
162
|
|
|
return [int(err[0]) - 1 for err in errs if len(err)] |
|
163
|
|
|
|
|
164
|
1 |
|
def _cli_args(self, infile, outfile): |
|
165
|
|
|
""" The command line arguments to use for the subprocess. """ |
|
166
|
|
|
|
|
167
|
|
|
return ['standardize', infile, '-c', self.config_path, |
|
168
|
|
|
'-f', 'sdf', '-o', outfile, '--ignore-error'] |
|
169
|
|
|
|
|
170
|
1 |
|
@staticmethod |
|
171
|
|
|
def validate_install(): |
|
172
|
|
|
""" Check if we can call cxcalc. """ |
|
173
|
|
|
try: |
|
174
|
|
|
return subprocess.call(['standardize', '-h'], |
|
175
|
|
|
stdout=subprocess.DEVNULL, |
|
176
|
|
|
stderr=subprocess.DEVNULL) == 0 |
|
177
|
|
|
except NoFoundError: |
|
178
|
|
|
return False |
|
179
|
|
|
|
|
180
|
1 |
|
def monitor_progress(self, filename): |
|
181
|
|
|
return sdf_count(filename) |
|
182
|
|
|
|
|
183
|
1 |
|
def filter(self, *args, **kwargs): |
|
184
|
|
|
warnings.warn('Filter returns the unstandardized Mols. Did you mean to' |
|
185
|
|
|
'use `transform_filter`?') |
|
186
|
|
|
super(ChemAxonStandardizer, self).filter(*args, **kwargs) |
|
187
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.pyfiles in your module folders. Make sure that you place one file in each sub-folder.