1 | #! /usr/bin/env python |
||
2 | # |
||
3 | # Copyright (C) 2016 Rich Lewis <[email protected]> |
||
4 | # License: 3-clause BSD |
||
5 | |||
6 | 1 | """ |
|
7 | ## skchem.standardizers.chemaxon |
||
8 | |||
9 | Module wrapping ChemAxon Standardizer. Must have standardizer installed and |
||
10 | license activated. |
||
11 | """ |
||
12 | |||
13 | 1 | import os |
|
14 | 1 | import sys |
|
15 | 1 | import re |
|
16 | 1 | import subprocess |
|
17 | 1 | import logging |
|
18 | 1 | import warnings |
|
19 | |||
20 | 1 | import pandas as pd |
|
0 ignored issues
–
show
|
|||
21 | |||
22 | 1 | from .. import io |
|
23 | 1 | from ..utils import sdf_count |
|
24 | 1 | from ..base import CLIWrapper, Transformer, BatchTransformer |
|
25 | 1 | from ..filters.base import TransformFilter |
|
26 | |||
27 | 1 | LOGGER = logging.getLogger(__name__) |
|
28 | |||
29 | 1 | if sys.version_info[0] == 2: |
|
30 | 1 | NoFoundError = OSError |
|
31 | 1 | subprocess.DEVNULL = open(os.devnull, 'w') |
|
32 | else: |
||
33 | NoFoundError = FileNotFoundError |
||
0 ignored issues
–
show
The name
NoFoundError does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$ ).
This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. ![]() Comprehensibility
Best Practice
introduced
by
|
|||
34 | |||
35 | |||
36 | 1 | class ChemAxonStandardizer(CLIWrapper, BatchTransformer, Transformer, |
|
0 ignored issues
–
show
|
|||
37 | TransformFilter): |
||
38 | |||
39 | """ ChemAxon Standardizer Wrapper. |
||
40 | |||
41 | Args: |
||
42 | config_path (str): |
||
43 | The path of the config_file. If None, use the default one. |
||
44 | |||
45 | Notes: |
||
46 | ChemAxon Standardizer must be installed and accessible as `standardize` |
||
47 | from the shell launching the program. |
||
48 | |||
49 | Warnings: |
||
50 | Must use a unique index (see #31). |
||
51 | |||
52 | Examples: |
||
53 | |||
54 | >>> import skchem |
||
55 | >>> std = skchem.standardizers.ChemAxonStandardizer() # doctest:+SKIP |
||
56 | >>> m = skchem.Mol.from_smiles('CC.CCC') |
||
57 | >>> print(std.transform(m)) # doctest:+SKIP |
||
58 | <Mol: CCC> |
||
59 | |||
60 | >>> data = [m, skchem.Mol.from_smiles('C=CO'), skchem.Mol.from_smiles('C[O-]')] |
||
61 | >>> std.transform(data) # doctest:+SKIP |
||
62 | 0 <Mol: CCC> |
||
63 | 1 <Mol: CC=O> |
||
64 | 2 <Mol: CO> |
||
65 | Name: structure, dtype: object |
||
66 | |||
67 | >>> will_fail = mol = '''932-97-8 |
||
68 | ... RDKit 3D |
||
69 | ... |
||
70 | ... 9 9 0 0 0 0 0 0 0 0999 V2000 |
||
71 | ... -0.9646 0.0000 0.0032 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
72 | ... -0.2894 -1.2163 0.0020 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
73 | ... -0.2894 1.2163 0.0025 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
74 | ... -2.2146 0.0000 -0.0004 N 0 0 0 0 0 0 0 0 0 0 0 0 |
||
75 | ... 1.0710 -1.2610 0.0002 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
76 | ... 1.0710 1.2610 0.0007 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
77 | ... -3.3386 0.0000 -0.0037 N 0 0 0 0 0 0 0 0 0 0 0 0 |
||
78 | ... 1.8248 0.0000 -0.0005 C 0 0 0 0 0 0 0 0 0 0 0 0 |
||
79 | ... 3.0435 0.0000 -0.0026 O 0 0 0 0 0 0 0 0 0 0 0 0 |
||
80 | ... 1 2 1 0 |
||
81 | ... 1 3 1 0 |
||
82 | ... 1 4 2 3 |
||
83 | ... 2 5 2 0 |
||
84 | ... 3 6 2 0 |
||
85 | ... 4 7 2 0 |
||
86 | ... 5 8 1 0 |
||
87 | ... 8 9 2 0 |
||
88 | ... 6 8 1 0 |
||
89 | ... M CHG 2 4 1 7 -1 |
||
90 | ... M END |
||
91 | ... ''' |
||
92 | |||
93 | >>> will_fail = skchem.Mol.from_molblock(will_fail) |
||
94 | >>> std.transform(will_fail) # doctest:+SKIP |
||
95 | nan |
||
96 | |||
97 | >>> data = [will_fail] + data |
||
98 | |||
99 | >>> std.transform(data) # doctest:+SKIP |
||
100 | 0 None |
||
101 | 1 <Mol: CCC> |
||
102 | 2 <Mol: CC=O> |
||
103 | 3 <Mol: CO> |
||
104 | Name: structure, dtype: object |
||
105 | |||
106 | >>> std.transform_filter(data) # doctest:+SKIP |
||
107 | 1 <Mol: CCC> |
||
108 | 2 <Mol: CC=O> |
||
109 | 3 <Mol: CO> |
||
110 | Name: structure, dtype: object |
||
111 | |||
112 | >>> std.keep_failed = True # doctest:+SKIP |
||
113 | >>> std.transform(data) # doctest:+SKIP |
||
114 | 0 <Mol: [N-]=[N+]=C1C=CC(=O)C=C1> |
||
115 | 1 <Mol: CCC> |
||
116 | 2 <Mol: CC=O> |
||
117 | 3 <Mol: CO> |
||
118 | Name: structure, dtype: object |
||
119 | |||
120 | """ |
||
121 | 1 | install_hint = """ Install ChemAxon from https://www.chemaxon.com. It requires a license, |
|
122 | which can be freely obtained for academics. """ |
||
123 | |||
124 | 1 | DEFAULT_CONFIG = os.path.join(os.path.dirname(__file__), |
|
125 | 'default_config.xml') |
||
126 | |||
127 | 1 | def __init__(self, config_path=None, keep_failed=False, **kwargs): |
|
128 | |||
129 | super(ChemAxonStandardizer, self).__init__(**kwargs) |
||
130 | |||
131 | if not config_path: |
||
132 | config_path = self.DEFAULT_CONFIG |
||
133 | self.config_path = config_path |
||
134 | self.keep_failed = keep_failed |
||
135 | |||
136 | 1 | @property |
|
137 | def columns(self): |
||
138 | return ['structure'] |
||
139 | |||
140 | 1 | def _transform_series(self, ser): |
|
141 | |||
142 | # implement keep_failed functionality here |
||
143 | res = super(ChemAxonStandardizer, self)._transform_series(ser) |
||
144 | mask = pd.isnull(res) |
||
145 | |||
146 | for m_in, m_out in zip(ser[~mask], res[~mask]): |
||
147 | m_out.name = m_in.name |
||
148 | |||
149 | if self.keep_failed: |
||
150 | res[mask] = ser.iloc[mask] |
||
151 | return res |
||
152 | |||
153 | 1 | def _parse_outfile(self, outfile): |
|
154 | """ Reads output file and returns a list""" |
||
155 | return io.read_sdf(outfile, read_props=False) |
||
156 | |||
157 | 1 | def _parse_errors(self, errs): |
|
158 | """ Reads stderr and parses out failures as a list of indices. """ |
||
159 | LOGGER.debug('stderr: %s', errs if errs else None) |
||
160 | errs = errs.strip().split('\n') |
||
161 | errs = [re.findall('No. ([0-9]+):', err) for err in errs] |
||
162 | return [int(err[0]) - 1 for err in errs if len(err)] |
||
163 | |||
164 | 1 | def _cli_args(self, infile, outfile): |
|
165 | """ The command line arguments to use for the subprocess. """ |
||
166 | |||
167 | return ['standardize', infile, '-c', self.config_path, |
||
168 | '-f', 'sdf', '-o', outfile, '--ignore-error'] |
||
169 | |||
170 | 1 | @staticmethod |
|
171 | def validate_install(): |
||
172 | """ Check if we can call cxcalc. """ |
||
173 | try: |
||
174 | return subprocess.call(['standardize', '-h'], |
||
175 | stdout=subprocess.DEVNULL, |
||
176 | stderr=subprocess.DEVNULL) == 0 |
||
177 | except NoFoundError: |
||
178 | return False |
||
179 | |||
180 | 1 | def monitor_progress(self, filename): |
|
181 | return sdf_count(filename) |
||
182 | |||
183 | 1 | def filter(self, *args, **kwargs): |
|
184 | warnings.warn('Filter returns the unstandardized Mols. Did you mean to' |
||
185 | 'use `transform_filter`?') |
||
186 | super(ChemAxonStandardizer, self).filter(*args, **kwargs) |
||
187 |
This can be caused by one of the following:
1. Missing Dependencies
This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.
2. Missing __init__.py files
This error could also result from missing
__init__.py
files in your module folders. Make sure that you place one file in each sub-folder.