Completed
Push — master ( 97f98d...9ca1ad )
by Rich
01:51
created

ElementFilter.elements()   A

Complexity

Conditions 2

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 3
rs 10
cc 2
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6
"""
7
8
# skchem.filters.simple
9
10
Simple filters for compounds.
11
12
"""
13
14
from collections import Counter
15
import pandas as pd
0 ignored issues
show
Configuration introduced by
The import pandas could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
import numpy as np
0 ignored issues
show
Configuration introduced by
The import numpy could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
17
18
from .base import Filter
19
from ..data import PERIODIC_TABLE
20
21
ELEMENTS = pd.Index(PERIODIC_TABLE.symbol, name='element')
22
23
class ElementFilter(Filter):
24
25
    """ Filter by elements.
26
27
    Args:
28
        elements (list[str]):
29
            A list of elements to filter with.  If an element not in the list is
30
            found in a molecule, return False, else return True.
31
        as_bits (bool):
32
            Whether to return integer counts or booleans for atoms.
33
        not_in (bool):
34
            Whether to use the element list as elements not to check.
35
36
    Examples:
37
38
        Basic usage on molecules:
39
40
        >>> import skchem
41
        >>> has_halogen = skchem.filters.ElementFilter(['F', 'Cl', 'Br', 'I'])
42
43
        Molecules without any of the atoms transform to `True`.
44
45
        >>> m1 = skchem.Mol.from_smiles('ClC(Cl)Cl', name='chloroform')
46
        >>> has_halogen.transform(m1)
47
        True
48
49
        Molecules with the atom transform to `False`.
50
51
        >>> m2 = skchem.Mol.from_smiles('CC', name='ethane')
52
        >>> has_halogen.transform(m2)
53
        False
54
55
        Can see the atom breakdown by passing `agg` == `False`:
56
        >>> has_halogen.transform(m1, agg=False)
57
        element
58
        F     0
59
        Cl    3
60
        Br    0
61
        I     0
62
        Name: chloroform, dtype: int64
63
64
        Or setting it as a property on the filter:
65
        >>> has_halogen.agg = False
66
        >>> has_halogen.transform(m1)
67
        element
68
        F     0
69
        Cl    3
70
        Br    0
71
        I     0
72
        Name: chloroform, dtype: int64
73
74
        Or even at instantiation:
75
        >>> has_halogen = skchem.filters.ElementFilter(['F', 'Cl', 'Br', 'I'], agg=False)
76
        >>> has_halogen.transform(m1)
77
        element
78
        F     0
79
        Cl    3
80
        Br    0
81
        I     0
82
        Name: chloroform, dtype: int64
83
84
        Can transform series.
85
86
        >>> has_halogen.agg = any
87
        >>> ms = pd.Series({m.name: m for m in (m1, m2)}, name='structure')
88
        >>> has_halogen.transform(ms)
89
        chloroform     True
90
        ethane        False
91
        dtype: bool
92
93
        >>> has_halogen.transform(ms, agg=False)
94
        element     F  Cl  Br  I
95
        chloroform  0   3   0  0
96
        ethane      0   0   0  0
97
98
        Can also filter series for organic.
99
100
        >>> has_halogen.filter(ms)
101
        chloroform    <Mol: ClC(Cl)Cl>
102
        Name: structure, dtype: object
103
104
        >>> has_halogen.filter(ms, neg=True)
105
        ethane    <Mol: CC>
106
        Name: structure, dtype: object
107
108
    """
109
110
    _DEFAULT_AGG = any
111
112
    def __init__(self, elements=None, as_bits=False, not_in=False, **kwargs):
113
        self.not_in = not_in
114
        self.elements = elements
115
        self.as_bits = as_bits
116
        super(ElementFilter, self).__init__(self.func, **kwargs)
117
118
    @property
119
    def elements(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
120
        return self._elements
121
122
    @elements.setter
123
    def elements(self, value):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
124
        if self.not_in:
125
            self._elements = ELEMENTS.drop(value)
0 ignored issues
show
Coding Style introduced by
The attribute _elements was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
126
        else:
127
            self._elements = pd.Index(value, name='element')
0 ignored issues
show
Coding Style introduced by
The attribute _elements was defined outside __init__.

It is generally a good practice to initialize all attributes to default values in the __init__ method:

class Foo:
    def __init__(self, x=None):
        self.x = x
Loading history...
128
129
    @property
130
    def index(self):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
131
        return self.elements
132
133
    def func(self, mol):
0 ignored issues
show
Coding Style introduced by
This method should have a docstring.

The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods:

class SomeClass:
    def some_method(self):
        """Do x and return foo."""

If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions.

Loading history...
Bug introduced by
This method seems to be hidden by an attribute defined in skchem.filters.base on line 77.
Loading history...
134
135
        cntr = Counter()
136
        for atom in mol.atoms:
137
            cntr[atom.element] += 1
138
        res = pd.Series(cntr)
139
        if self.elements is not None:
140
            res = res[self.elements]
141
        if self.as_bits:
142
            res = (res > 0).astype(np.uint8)
143
        return res
144
145
    def _transform(self, ser, **kwargs):
146
        res = ser.apply(self.func, **kwargs).fillna(0)
147
        if not self.as_bits:
148
            res = res.astype(np.int)
149
        return res
150
151
152
class OrganicFilter(ElementFilter):
153
154
    """ Whether a molecule is organic.
155
156
    For the purpose of this function, an organic molecule is defined as having
157
    atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.
158
159
    Args:
160
        mol (skchem.Mol):
161
            The molecule to be tested.
162
163
    Returns:
164
        bool:
165
            Whether the molecule is organic.
166
167
    Examples:
168
169
            Basic usage as a function on molecules:
170
171
            >>> import skchem
172
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
173
            >>> is_organic = skchem.filters.OrganicFilter()
174
            >>> is_organic(m1)
175
            True
176
            >>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
177
                                            name='ferrocene')
178
            >>> is_organic(m2)
179
            False
180
181
            More useful in combination with pandas data frames:
182
183
            >>> import gzip
184
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
185
            >>> data = skchem.read_sdf(sdf)
186
            >>> is_organic.transform(data).value_counts()
187
            True     4253
188
            False      84
189
            dtype: int64
190
191
            >>> len(is_organic.filter(data))
192
            4253
193
            >>> len(is_organic.filter(data, neg=True))
194
            84
195
    """
196
197
    organic = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
198
    _DEFAULT_IS_NEG = True
199
200
    def __init__(self):
201
        super(OrganicFilter, self).__init__(self.organic, not_in=True, agg=any,
202
                                            as_bits=True)
203
204
def n_atoms(mol, above=2, below=75, include_hydrogens=False):
205
206
    """ Whether the number of atoms in a molecule falls in a defined interval.
207
208
    ``above <= n_atoms < below``
209
210
    Args:
211
        mol: (skchem.Mol):
212
            The molecule to be tested.
213
        above (int):
214
            The lower threshold number of atoms (exclusive).
215
            Defaults to None.
216
        below (int):
217
            The higher threshold number of atoms (inclusive).
218
            Defaults to None.
219
220
    Returns:
221
        bool:
222
            Whether the molecule has more atoms than the threshold.
223
224
    Examples:
225
226
        Basic usage as a function on molecules:
227
228
        >>> import skchem
229
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.
230
231
        Lower threshold:
232
233
        >>> skchem.filters.n_atoms(m, above=3)
234
        True
235
        >>> skchem.filters.n_atoms(m, above=8)
236
        False
237
238
        Higher threshold:
239
240
        >>> skchem.filters.n_atoms(m, below=8)
241
        True
242
        >>> skchem.filters.n_atoms(m, below=3)
243
        False
244
245
        Bounds work like Python slices - inclusive lower, exclusive upper:
246
247
        >>> skchem.filters.n_atoms(m, above=6)
248
        True
249
        >>> skchem.filters.n_atoms(m, below=6)
250
        False
251
252
        Both can be used at once:
253
254
        >>> skchem.filters.n_atoms(m, above=3, below=8)
255
        True
256
257
        Can include hydrogens:
258
259
        >>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
260
        False
261
        >>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
262
        True
263
264
    """
265
266
    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
267
268
    n_a = len(mol.atoms)
269
    if include_hydrogens:
270
        n_a += sum(atom.GetNumImplicitHs() for atom in mol.atoms)
271
272
    return above <= n_a < below
273
274
class AtomNumberFilter(Filter):
275
276
    """Filter for whether the number of atoms in a molecule falls in a defined interval.
277
278
    ``above <= n_atoms < below``
279
280
    Args:
281
        above (int):
282
            The lower threshold number of atoms (exclusive).
283
            Defaults to None.
284
        below (int):
285
            The higher threshold number of atoms (inclusive).
286
            Defaults to None.
287
288
    Args:
289
        >>> import skchem
290
        >>> import gzip
291
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
292
        >>> data = skchem.read_sdf(sdf)
293
        >>> f_natom = skchem.filters.AtomNumberFilter(above=3, below=60)
294
        >>> f_natom.transform(data).value_counts()
295
        True     4306
296
        False      31
297
        Name: structure, dtype: int64
298
299
        >>> len(f_natom.filter(data))
300
        4306
301
        >>> len(f_natom.filter(data, neg=True))
302
        31
303
    """
304
305
    def __init__(self, above=3, below=60, include_hydrogens=False, **kwargs):
306
307
        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
308
        self.above = above
309
        self.below = below
310
        self.include_hydrogens = include_hydrogens
311
312
        super(AtomNumberFilter, self).__init__(n_atoms, above=self.above,
313
                                below=self.below,
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
below=self.below,
^ |
Loading history...
314
                                include_hydrogens=self.include_hydrogens,
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
include_hydrogens=self.include_hydrogens,
^ |
Loading history...
315
                                **kwargs)
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
**kwargs)
^ |
Loading history...
316
317
318
def mass(mol, above=10, below=900):
319
320
    """ Whether a the molecular weight of a molecule is lower than a threshold.
321
322
    ``above <= mass < below``
323
324
    Args:
325
        mol: (skchem.Mol):
326
            The molecule to be tested.
327
        above (float):
328
            The lower threshold on the mass.
329
            Defaults to None.
330
        below (float):
331
            The higher threshold on the mass.
332
            Defaults to None.
333
334
    Returns:
335
        bool:
336
            Whether the mass of the molecule is lower than the threshold.
337
338
    Examples:
339
        Basic usage as a function on molecules:
340
341
        >>> import skchem
342
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
343
        >>> skchem.filters.mass(m, above=70)
344
        True
345
        >>> skchem.filters.mass(m, above=80)
346
        False
347
        >>> skchem.filters.mass(m, below=80)
348
        True
349
        >>> skchem.filters.mass(m, below=70)
350
        False
351
        >>> skchem.filters.mass(m, above=70, below=80)
352
        True
353
    """
354
355
    return above <= mol.mass < below
356
357
358
class MassFilter(Filter):
359
    """ Filter whether a the molecular weight of a molecule is lower than a threshold.
360
361
    ``above <= mass < below``
362
363
    Args:
364
        mol: (skchem.Mol):
365
            The molecule to be tested.
366
        above (float):
367
            The lower threshold on the mass.
368
            Defaults to None.
369
        below (float):
370
            The higher threshold on the mass.
371
            Defaults to None.
372
373
    Examples:
374
375
        >>> import skchem
376
        >>> import gzip
377
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
378
        >>> data = skchem.read_sdf(sdf)
379
        >>> f_mass = skchem.filters.MassFilter(above=10, below=900)
380
        >>> f_mass.transform(data).value_counts()
381
        True     4312
382
        False      25
383
        Name: structure, dtype: int64
384
385
        >>> len(f_mass.filter(data))
386
        4312
387
        >>> len(f_mass.filter(data, neg=True))
388
        25
389
    """
390
391
    def __init__(self, above=3, below=900, **kwargs):
392
393
        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
394
        self.above = above
395
        self.below = below
396
397
        super(MassFilter, self).__init__(mass, above=self.above,
398
                                below=self.below, **kwargs)
0 ignored issues
show
Coding Style introduced by
Wrong continued indentation.
below=self.below, **kwargs)
^ |
Loading history...
399