Issues (942)

skchem/standardizers/chemaxon.py (4 issues)

1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2016 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6 1
"""
7
## skchem.standardizers.chemaxon
8
9
Module wrapping ChemAxon Standardizer.  Must have standardizer installed and
10
license activated.
11
"""
12
13 1
import os
14 1
import sys
15 1
import re
16 1
import subprocess
17 1
import logging
18 1
import warnings
19
20 1
import pandas as pd
0 ignored issues
show
The import pandas could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
21
22 1
from .. import io
23 1
from ..utils import sdf_count
24 1
from ..base import CLIWrapper, Transformer, BatchTransformer
25 1
from ..filters.base import TransformFilter
26
27 1
LOGGER = logging.getLogger(__name__)
28
29 1
if sys.version_info[0] == 2:
30 1
    NoFoundError = OSError
31 1
    subprocess.DEVNULL = open(os.devnull, 'w')
32
else:
33
    NoFoundError = FileNotFoundError
0 ignored issues
show
Coding Style Naming introduced by
The name NoFoundError does not conform to the constant naming conventions ((([A-Z_][A-Z0-9_]*)|(__.*__))$).

This check looks for invalid names for a range of different identifiers.

You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements.

If your project includes a Pylint configuration file, the settings contained in that file take precedence.

To find out more about Pylint, please refer to their site.

Loading history...
Comprehensibility Best Practice introduced by
Undefined variable 'FileNotFoundError'
Loading history...
34
35
36 1
class ChemAxonStandardizer(CLIWrapper, BatchTransformer, Transformer,
0 ignored issues
show
Too many ancestors (8/7)
Loading history...
37
                           TransformFilter):
38
39
    """ ChemAxon Standardizer Wrapper.
40
41
    Args:
42
        config_path (str):
43
            The path of the config_file. If None, use the default one.
44
45
    Notes:
46
        ChemAxon Standardizer must be installed and accessible as `standardize`
47
        from the shell launching the program.
48
49
    Warnings:
50
        Must use a unique index (see #31).
51
52
    Examples:
53
54
        >>> import skchem
55
        >>> std = skchem.standardizers.ChemAxonStandardizer() # doctest:+SKIP
56
        >>> m = skchem.Mol.from_smiles('CC.CCC')
57
        >>> print(std.transform(m)) # doctest:+SKIP
58
        <Mol: CCC>
59
60
        >>> data = [m, skchem.Mol.from_smiles('C=CO'), skchem.Mol.from_smiles('C[O-]')]
61
        >>> std.transform(data) # doctest:+SKIP
62
        0     <Mol: CCC>
63
        1    <Mol: CC=O>
64
        2      <Mol: CO>
65
        Name: structure, dtype: object
66
67
        >>> will_fail = mol = '''932-97-8
68
        ...      RDKit          3D
69
        ...
70
        ...   9  9  0  0  0  0  0  0  0  0999 V2000
71
        ...    -0.9646    0.0000    0.0032 C   0  0  0  0  0  0  0  0  0  0  0  0
72
        ...    -0.2894   -1.2163    0.0020 C   0  0  0  0  0  0  0  0  0  0  0  0
73
        ...    -0.2894    1.2163    0.0025 C   0  0  0  0  0  0  0  0  0  0  0  0
74
        ...    -2.2146    0.0000   -0.0004 N   0  0  0  0  0  0  0  0  0  0  0  0
75
        ...     1.0710   -1.2610    0.0002 C   0  0  0  0  0  0  0  0  0  0  0  0
76
        ...     1.0710    1.2610    0.0007 C   0  0  0  0  0  0  0  0  0  0  0  0
77
        ...    -3.3386    0.0000   -0.0037 N   0  0  0  0  0  0  0  0  0  0  0  0
78
        ...     1.8248    0.0000   -0.0005 C   0  0  0  0  0  0  0  0  0  0  0  0
79
        ...     3.0435    0.0000   -0.0026 O   0  0  0  0  0  0  0  0  0  0  0  0
80
        ...   1  2  1  0
81
        ...   1  3  1  0
82
        ...   1  4  2  3
83
        ...   2  5  2  0
84
        ...   3  6  2  0
85
        ...   4  7  2  0
86
        ...   5  8  1  0
87
        ...   8  9  2  0
88
        ...   6  8  1  0
89
        ... M  CHG  2   4   1   7  -1
90
        ... M  END
91
        ... '''
92
93
        >>> will_fail = skchem.Mol.from_molblock(will_fail)
94
        >>> std.transform(will_fail) # doctest:+SKIP
95
        nan
96
97
        >>> data = [will_fail] + data
98
99
        >>> std.transform(data) # doctest:+SKIP
100
        0           None
101
        1     <Mol: CCC>
102
        2    <Mol: CC=O>
103
        3      <Mol: CO>
104
        Name: structure, dtype: object
105
106
        >>> std.transform_filter(data) # doctest:+SKIP
107
        1     <Mol: CCC>
108
        2    <Mol: CC=O>
109
        3      <Mol: CO>
110
        Name: structure, dtype: object
111
112
        >>> std.keep_failed = True # doctest:+SKIP
113
        >>> std.transform(data) # doctest:+SKIP
114
        0    <Mol: [N-]=[N+]=C1C=CC(=O)C=C1>
115
        1                         <Mol: CCC>
116
        2                        <Mol: CC=O>
117
        3                          <Mol: CO>
118
        Name: structure, dtype: object
119
120
    """
121 1
    install_hint = """ Install ChemAxon from https://www.chemaxon.com.  It requires a license,
122
    which can be freely obtained for academics. """
123
124 1
    DEFAULT_CONFIG = os.path.join(os.path.dirname(__file__),
125
                                  'default_config.xml')
126
127 1
    def __init__(self, config_path=None, keep_failed=False, **kwargs):
128
129
        super(ChemAxonStandardizer, self).__init__(**kwargs)
130
131
        if not config_path:
132
            config_path = self.DEFAULT_CONFIG
133
        self.config_path = config_path
134
        self.keep_failed = keep_failed
135
136 1
    @property
137
    def columns(self):
138
        return ['structure']
139
140 1
    def _transform_series(self, ser):
141
142
        # implement keep_failed functionality here
143
        res = super(ChemAxonStandardizer, self)._transform_series(ser)
144
        mask = pd.isnull(res)
145
146
        for m_in, m_out in zip(ser[~mask], res[~mask]):
147
            m_out.name = m_in.name
148
149
        if self.keep_failed:
150
            res[mask] = ser.iloc[mask]
151
        return res
152
153 1
    def _parse_outfile(self, outfile):
154
        """ Reads output file and returns a list"""
155
        return io.read_sdf(outfile, read_props=False)
156
157 1
    def _parse_errors(self, errs):
158
        """ Reads stderr and parses out failures as a list of indices. """
159
        LOGGER.debug('stderr: %s', errs if errs else None)
160
        errs = errs.strip().split('\n')
161
        errs = [re.findall('No. ([0-9]+):', err) for err in errs]
162
        return [int(err[0]) - 1 for err in errs if len(err)]
163
164 1
    def _cli_args(self, infile, outfile):
165
        """ The command line arguments to use for the subprocess. """
166
167
        return ['standardize', infile, '-c', self.config_path,
168
                '-f', 'sdf', '-o', outfile, '--ignore-error']
169
170 1
    @staticmethod
171
    def validate_install():
172
        """ Check if we can call cxcalc. """
173
        try:
174
            return subprocess.call(['standardize', '-h'],
175
                                   stdout=subprocess.DEVNULL,
176
                                   stderr=subprocess.DEVNULL) == 0
177
        except NoFoundError:
178
            return False
179
180 1
    def monitor_progress(self, filename):
181
        return sdf_count(filename)
182
183 1
    def filter(self, *args, **kwargs):
184
        warnings.warn('Filter returns the unstandardized Mols. Did you mean to'
185
                      'use `transform_filter`?')
186
        super(ChemAxonStandardizer, self).filter(*args, **kwargs)
187