Completed
Push — master ( e06060...97f98d )
by Rich
01:41
created

OrganicFilter   A

Complexity

Total Complexity 1

Size/Duplication

Total Lines 51
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
wmc 1
c 1
b 0
f 1
dl 0
loc 51
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
A __init__() 0 2 1
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6
"""
7
8
# skchem.filters.simple
9
10
Simple filters for compounds.
11
12
"""
13
14
from .base import Filter
15
import pandas as pd
0 ignored issues
show
Configuration introduced by
The import pandas could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
Unused Code introduced by
Unused pandas imported as pd
Loading history...
16
17
18
class ElementFilter(Filter):
19
20
    """ Filter by elements.
21
22
    Args:
23
        elements: A list of elements to filter with.  If an element not in
24
        the list is found in a molecule, return False, else return True.
25
    """
26
    def __init__(self, elements, **kwargs):
27
28
        self.elements = elements
29
30
        super(ElementFilter, self).__init__(self.func)
31
32
    def func(self, mol):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
Bug introduced by
This method seems to be hidden by an attribute defined in skchem.filters.base on line 69.
Loading history...
33
34
        return all(atom.element in self.elements for atom in mol.atoms)
35
36
37
class OrganicFilter(ElementFilter):
38
39
    # TODO: rewrite the docs
0 ignored issues
show
Coding Style introduced by
TODO and FIXME comments should generally be avoided.
Loading history...
40
41
    """ Whether a molecule is organic.
42
43
    For the purpose of this function, an organic molecule is defined as having
44
    atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.
45
46
    Args:
47
        mol (skchem.Mol):
48
            The molecule to be tested.
49
50
    Returns:
51
        bool:
52
            Whether the molecule is organic.
53
54
    Examples:
55
56
            Basic usage as a function on molecules:
57
58
            >>> import skchem
59
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
60
            >>> is_organic = skchem.filters.OrganicFilter()
61
            >>> is_organic(m1)
62
            True
63
            >>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
64
                                            name='ferrocene')
65
            >>> is_organic(m2)
66
            False
67
68
            More useful in combination with pandas data frames:
69
70
            >>> import gzip
71
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
72
            >>> data = skchem.read_sdf(sdf)
73
            >>> is_organic.apply(data).value_counts()
74
            True     4253
75
            False      84
76
            Name: structure, dtype: int64
77
78
            >>> len(is_organic.filter(data))
79
            4253
80
            >>> len(is_organic.filter(data, neg=True))
81
            84
82
    """
83
84
    elements = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
85
86
    def __init__(self):
87
        super(OrganicFilter, self).__init__(self.elements)
88
89
90
def n_atoms(mol, above=2, below=75, include_hydrogens=False):
91
92
    """ Whether the number of atoms in a molecule falls in a defined interval.
93
94
    ``above <= n_atoms < below``
95
96
    Args:
97
        mol: (skchem.Mol):
98
            The molecule to be tested.
99
        above (int):
100
            The lower threshold number of atoms (exclusive).
101
            Defaults to None.
102
        below (int):
103
            The higher threshold number of atoms (inclusive).
104
            Defaults to None.
105
106
    Returns:
107
        bool:
108
            Whether the molecule has more atoms than the threshold.
109
110
    Examples:
111
112
        Basic usage as a function on molecules:
113
114
        >>> import skchem
115
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.
116
117
        Lower threshold:
118
119
        >>> skchem.filters.n_atoms(m, above=3)
120
        True
121
        >>> skchem.filters.n_atoms(m, above=8)
122
        False
123
124
        Higher threshold:
125
126
        >>> skchem.filters.n_atoms(m, below=8)
127
        True
128
        >>> skchem.filters.n_atoms(m, below=3)
129
        False
130
131
        Bounds work like Python slices - inclusive lower, exclusive upper:
132
133
        >>> skchem.filters.n_atoms(m, above=6)
134
        True
135
        >>> skchem.filters.n_atoms(m, below=6)
136
        False
137
138
        Both can be used at once:
139
140
        >>> skchem.filters.n_atoms(m, above=3, below=8)
141
        True
142
143
        Can include hydrogens:
144
145
        >>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
146
        False
147
        >>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
148
        True
149
150
    """
151
152
    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
153
154
    n_a = len(mol.atoms)
155
    if include_hydrogens:
156
        n_a += sum(atom.GetNumImplicitHs() for atom in mol.atoms)
157
158
    return above <= n_a < below
159
160
class AtomNumberFilter(Filter):
161
162
    """Filter for whether the number of atoms in a molecule falls in a defined interval.
163
164
    ``above <= n_atoms < below``
165
166
    Args:
167
        above (int):
168
            The lower threshold number of atoms (exclusive).
169
            Defaults to None.
170
        below (int):
171
            The higher threshold number of atoms (inclusive).
172
            Defaults to None.
173
174
    Args:
175
        >>> import skchem
176
        >>> import gzip
177
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
178
        >>> data = skchem.read_sdf(sdf)
179
        >>> f_natom = skchem.filters.AtomNumberFilter(above=3, below=60)
180
        >>> f_natom.apply(data).value_counts()
181
        True     4306
182
        False      31
183
        Name: structure, dtype: int64
184
185
        >>> len(f_natom.filter(data))
186
        4306
187
        >>> len(f_natom.filter(data, neg=True))
188
        31
189
    """
190
191
    def __init__(self, above=3, below=60, include_hydrogens=False, **kwargs):
192
193
        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
194
        self.above = above
195
        self.below = below
196
        self.include_hydrogens = include_hydrogens
197
198
        super(AtomNumberFilter, self).__init__(n_atoms, above=self.above,
199
                                below=self.below,
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
below=self.below,
^ |
Loading history...
200
                                include_hydrogens=self.include_hydrogens,
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
include_hydrogens=self.include_hydrogens,
^ |
Loading history...
201
                                **kwargs)
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
**kwargs)
^ |
Loading history...
202
203
204
def mass(mol, above=10, below=900):
205
206
    """ Whether a the molecular weight of a molecule is lower than a threshold.
207
208
    ``above <= mass < below``
209
210
    Args:
211
        mol: (skchem.Mol):
212
            The molecule to be tested.
213
        above (float):
214
            The lower threshold on the mass.
215
            Defaults to None.
216
        below (float):
217
            The higher threshold on the mass.
218
            Defaults to None.
219
220
    Returns:
221
        bool:
222
            Whether the mass of the molecule is lower than the threshold.
223
224
    Examples:
225
        Basic usage as a function on molecules:
226
227
        >>> import skchem
228
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
229
        >>> skchem.filters.mass(m, above=70)
230
        True
231
        >>> skchem.filters.mass(m, above=80)
232
        False
233
        >>> skchem.filters.mass(m, below=80)
234
        True
235
        >>> skchem.filters.mass(m, below=70)
236
        False
237
        >>> skchem.filters.mass(m, above=70, below=80)
238
        True
239
    """
240
241
    return above <= mol.mass < below
242
243
244
class MassFilter(Filter):
245
    """ Filter whether a the molecular weight of a molecule is lower than a threshold.
246
247
    ``above <= mass < below``
248
249
    Args:
250
        mol: (skchem.Mol):
251
            The molecule to be tested.
252
        above (float):
253
            The lower threshold on the mass.
254
            Defaults to None.
255
        below (float):
256
            The higher threshold on the mass.
257
            Defaults to None.
258
259
    Examples:
260
261
        >>> import skchem
262
        >>> import gzip
263
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
264
        >>> data = skchem.read_sdf(sdf)
265
        >>> f_mass = skchem.filters.MassFilter(above=10, below=900)
266
        >>> f_mass.apply(data).value_counts()
267
        True     4312
268
        False      25
269
        Name: structure, dtype: int64
270
271
        >>> len(f_mass.filter(data))
272
        4312
273
        >>> len(f_mass.filter(data, neg=True))
274
        25
275
    """
276
277
    def __init__(self, above=3, below=900, **kwargs):
278
279
        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
280
        self.above = above
281
        self.below = below
282
283
        super(MassFilter, self).__init__(mass, above=self.above,
284
                                below=self.below, **kwargs)
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
below=self.below, **kwargs)
^ |
Loading history...
285