1
|
|
|
#! /usr/bin/env python |
2
|
|
|
# |
3
|
|
|
# Copyright (C) 2016 Rich Lewis <[email protected]> |
4
|
|
|
# License: 3-clause BSD |
5
|
|
|
|
6
|
1 |
|
""" |
7
|
|
|
## skchem.standardizers.chemaxon |
8
|
|
|
|
9
|
|
|
Module wrapping ChemAxon Standardizer. Must have standardizer installed and |
10
|
|
|
license activated. |
11
|
|
|
""" |
12
|
|
|
|
13
|
1 |
|
import os |
14
|
1 |
|
import sys |
15
|
1 |
|
import re |
16
|
1 |
|
import subprocess |
17
|
1 |
|
import logging |
18
|
1 |
|
import warnings |
19
|
|
|
|
20
|
1 |
|
import pandas as pd |
|
|
|
|
21
|
|
|
|
22
|
1 |
|
from .. import io |
23
|
1 |
|
from ..utils import sdf_count |
24
|
1 |
|
from ..base import CLIWrapper, Transformer, BatchTransformer |
25
|
1 |
|
from ..filters.base import TransformFilter |
26
|
|
|
|
27
|
1 |
|
LOGGER = logging.getLogger(__name__) |
28
|
|
|
|
29
|
1 |
|
if sys.version_info[0] == 2: |
30
|
1 |
|
NoFoundError = OSError |
31
|
1 |
|
subprocess.DEVNULL = open(os.devnull, 'w') |
32
|
|
|
else: |
33
|
|
|
NoFoundError = FileNotFoundError |
|
|
|
|
34
|
|
|
|
35
|
|
|
|
36
|
1 |
|
class ChemAxonStandardizer(CLIWrapper, BatchTransformer, Transformer, |
|
|
|
|
37
|
|
|
TransformFilter): |
38
|
|
|
|
39
|
|
|
""" ChemAxon Standardizer Wrapper. |
40
|
|
|
|
41
|
|
|
Args: |
42
|
|
|
config_path (str): |
43
|
|
|
The path of the config_file. If None, use the default one. |
44
|
|
|
|
45
|
|
|
Notes: |
46
|
|
|
ChemAxon Standardizer must be installed and accessible as `standardize` |
47
|
|
|
from the shell launching the program. |
48
|
|
|
|
49
|
|
|
Warnings: |
50
|
|
|
Must use a unique index (see #31). |
51
|
|
|
|
52
|
|
|
Examples: |
53
|
|
|
|
54
|
|
|
>>> import skchem |
55
|
|
|
>>> std = skchem.standardizers.ChemAxonStandardizer() # doctest:+SKIP |
56
|
|
|
>>> m = skchem.Mol.from_smiles('CC.CCC') |
57
|
|
|
>>> print(std.transform(m)) # doctest:+SKIP |
58
|
|
|
<Mol: CCC> |
59
|
|
|
|
60
|
|
|
>>> data = [m, skchem.Mol.from_smiles('C=CO'), skchem.Mol.from_smiles('C[O-]')] |
61
|
|
|
>>> std.transform(data) # doctest:+SKIP |
62
|
|
|
0 <Mol: CCC> |
63
|
|
|
1 <Mol: CC=O> |
64
|
|
|
2 <Mol: CO> |
65
|
|
|
Name: structure, dtype: object |
66
|
|
|
|
67
|
|
|
>>> will_fail = mol = '''932-97-8 |
68
|
|
|
... RDKit 3D |
69
|
|
|
... |
70
|
|
|
... 9 9 0 0 0 0 0 0 0 0999 V2000 |
71
|
|
|
... -0.9646 0.0000 0.0032 C 0 0 0 0 0 0 0 0 0 0 0 0 |
72
|
|
|
... -0.2894 -1.2163 0.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 |
73
|
|
|
... -0.2894 1.2163 0.0025 C 0 0 0 0 0 0 0 0 0 0 0 0 |
74
|
|
|
... -2.2146 0.0000 -0.0004 N 0 0 0 0 0 0 0 0 0 0 0 0 |
75
|
|
|
... 1.0710 -1.2610 0.0002 C 0 0 0 0 0 0 0 0 0 0 0 0 |
76
|
|
|
... 1.0710 1.2610 0.0007 C 0 0 0 0 0 0 0 0 0 0 0 0 |
77
|
|
|
... -3.3386 0.0000 -0.0037 N 0 0 0 0 0 0 0 0 0 0 0 0 |
78
|
|
|
... 1.8248 0.0000 -0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 |
79
|
|
|
... 3.0435 0.0000 -0.0026 O 0 0 0 0 0 0 0 0 0 0 0 0 |
80
|
|
|
... 1 2 1 0 |
81
|
|
|
... 1 3 1 0 |
82
|
|
|
... 1 4 2 3 |
83
|
|
|
... 2 5 2 0 |
84
|
|
|
... 3 6 2 0 |
85
|
|
|
... 4 7 2 0 |
86
|
|
|
... 5 8 1 0 |
87
|
|
|
... 8 9 2 0 |
88
|
|
|
... 6 8 1 0 |
89
|
|
|
... M CHG 2 4 1 7 -1 |
90
|
|
|
... M END |
91
|
|
|
... ''' |
92
|
|
|
|
93
|
|
|
>>> will_fail = skchem.Mol.from_molblock(will_fail) |
94
|
|
|
>>> std.transform(will_fail) # doctest:+SKIP |
95
|
|
|
nan |
96
|
|
|
|
97
|
|
|
>>> data = [will_fail] + data |
98
|
|
|
|
99
|
|
|
>>> std.transform(data) # doctest:+SKIP |
100
|
|
|
0 None |
101
|
|
|
1 <Mol: CCC> |
102
|
|
|
2 <Mol: CC=O> |
103
|
|
|
3 <Mol: CO> |
104
|
|
|
Name: structure, dtype: object |
105
|
|
|
|
106
|
|
|
>>> std.transform_filter(data) # doctest:+SKIP |
107
|
|
|
1 <Mol: CCC> |
108
|
|
|
2 <Mol: CC=O> |
109
|
|
|
3 <Mol: CO> |
110
|
|
|
Name: structure, dtype: object |
111
|
|
|
|
112
|
|
|
>>> std.keep_failed = True # doctest:+SKIP |
113
|
|
|
>>> std.transform(data) # doctest:+SKIP |
114
|
|
|
0 <Mol: [N-]=[N+]=C1C=CC(=O)C=C1> |
115
|
|
|
1 <Mol: CCC> |
116
|
|
|
2 <Mol: CC=O> |
117
|
|
|
3 <Mol: CO> |
118
|
|
|
Name: structure, dtype: object |
119
|
|
|
|
120
|
|
|
""" |
121
|
1 |
|
install_hint = """ Install ChemAxon from https://www.chemaxon.com. It requires a license, |
122
|
|
|
which can be freely obtained for academics. """ |
123
|
|
|
|
124
|
1 |
|
DEFAULT_CONFIG = os.path.join(os.path.dirname(__file__), |
125
|
|
|
'default_config.xml') |
126
|
|
|
|
127
|
1 |
|
def __init__(self, config_path=None, keep_failed=False, **kwargs): |
128
|
|
|
|
129
|
|
|
super(ChemAxonStandardizer, self).__init__(**kwargs) |
130
|
|
|
|
131
|
|
|
if not config_path: |
132
|
|
|
config_path = self.DEFAULT_CONFIG |
133
|
|
|
self.config_path = config_path |
134
|
|
|
self.keep_failed = keep_failed |
135
|
|
|
|
136
|
1 |
|
@property |
137
|
|
|
def columns(self): |
138
|
|
|
return ['structure'] |
139
|
|
|
|
140
|
1 |
|
def _transform_series(self, ser): |
141
|
|
|
|
142
|
|
|
# implement keep_failed functionality here |
143
|
|
|
res = super(ChemAxonStandardizer, self)._transform_series(ser) |
144
|
|
|
mask = pd.isnull(res) |
145
|
|
|
|
146
|
|
|
for m_in, m_out in zip(ser[~mask], res[~mask]): |
147
|
|
|
m_out.name = m_in.name |
148
|
|
|
|
149
|
|
|
if self.keep_failed: |
150
|
|
|
res[mask] = ser.iloc[mask] |
151
|
|
|
return res |
152
|
|
|
|
153
|
1 |
|
def _parse_outfile(self, outfile): |
154
|
|
|
""" Reads output file and returns a list""" |
155
|
|
|
return io.read_sdf(outfile, read_props=False) |
156
|
|
|
|
157
|
1 |
|
def _parse_errors(self, errs): |
158
|
|
|
""" Reads stderr and parses out failures as a list of indices. """ |
159
|
|
|
LOGGER.debug('stderr: %s', errs if errs else None) |
160
|
|
|
errs = errs.strip().split('\n') |
161
|
|
|
errs = [re.findall('No. ([0-9]+):', err) for err in errs] |
162
|
|
|
return [int(err[0]) - 1 for err in errs if len(err)] |
163
|
|
|
|
164
|
1 |
|
def _cli_args(self, infile, outfile): |
165
|
|
|
""" The command line arguments to use for the subprocess. """ |
166
|
|
|
|
167
|
|
|
return ['standardize', infile, '-c', self.config_path, |
168
|
|
|
'-f', 'sdf', '-o', outfile, '--ignore-error'] |
169
|
|
|
|
170
|
1 |
|
@staticmethod |
171
|
|
|
def validate_install(): |
172
|
|
|
""" Check if we can call cxcalc. """ |
173
|
|
|
try: |
174
|
|
|
return subprocess.call(['standardize', '-h'], |
175
|
|
|
stdout=subprocess.DEVNULL, |
176
|
|
|
stderr=subprocess.DEVNULL) == 0 |
177
|
|
|
except NoFoundError: |
178
|
|
|
return False |
179
|
|
|
|
180
|
1 |
|
def monitor_progress(self, filename): |
181
|
|
|
return sdf_count(filename) |
182
|
|
|
|
183
|
1 |
|
def filter(self, *args, **kwargs): |
184
|
|
|
warnings.warn('Filter returns the unstandardized Mols. Did you mean to' |
185
|
|
|
'use `transform_filter`?') |
186
|
|
|
super(ChemAxonStandardizer, self).filter(*args, **kwargs) |
187
|
|
|
|
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.