no_pains() - Code Metrics - Inspection of "added filters" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 3371cb...5cb87e )

by Rich

created 2016-05-15 17:11 UTC

no_pains() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
cc	2
c	1
b	0
f	1
dl	0
loc	41
rs	8.8571

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.filters

Chemical filters are defined.

"""

import os

from rdkit import RDConfig
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .core import Mol


def _load_pains():

    """ Load PAINS included in rdkit into a pandas dataframe """

    path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
    pains = pd.read_csv(path, names=['pains', 'names'])
    pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
    return pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)


PAINS = _load_pains()
ORGANIC = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']

def is_organic(mol):

    """ Whether a molecule is organic.

    For the purpose of this function, an organic molecule is defined as having
    atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.

    Args:
        mol (skchem.Mol):
            The molecule to be tested.

    Returns:
        bool:
            Whether the molecule is organic.

    Examples:

            Basic usage as a function on molecules:

            >>> import skchem
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
            >>> skchem.filters.is_organic(m1)
            True
            >>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]',
                                            name='ferrocene')
            >>> skchem.filters.is_organic(m2)
            False

            More useful in combination with pandas data frames:

            >>> import gzip
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
            >>> data = skchem.read_sdf(sdf)
            >>> data.structure.apply(skchem.filters.is_organic).value_counts()
            True     4252
            False      84
            Name: structure, dtype: int64
    """

    return all(atom.element in ORGANIC for atom in mol.atoms)


def no_pains(mol):

    """ Whether a molecule passes the Pan Assay INterference (PAINS) filters.

    These are supplied with RDKit, and were originally proposed by Baell et al.

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.

    Returns:
        bool:
            Whether the molecule passes all the pains filters.

    References:
        [The original paper](http://dx.doi.org/10.1021/jm901137j)

    Examples:

            Basic usage as a function on molecules:

            >>> import skchem
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
            >>> skchem.filters.no_pains(m1)
            True
            >>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
            >>> skchem.filters.no_pains(m2)
            False

            More useful in combination with pandas data frames:

            >>> import gzip
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
            >>> data = skchem.read_sdf(sdf)
            >>> data.structure.apply(skchem.filters.no_pains).value_counts()
            True     3854
            False     482
            Name: structure, dtype: int64
    """

    return all(PAINS.apply(lambda pains: pains not in mol))


def n_atoms(mol, above=None, below=None, include_hydrogens=False):

    """ Whether the number of atoms in a molecule falls in a defined interval.

    ``above <= n_atoms < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (int):
            The lower threshold number of atoms (exclusive).
            Defaults to None.
        below (int):
            The higher threshold number of atoms (inclusive).
            Defaults to None.

    Returns:
        bool:
            Whether the molecule has more atoms than the threshold.

    Examples:

        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.


        Lower threshold:

        >>> skchem.filters.n_atoms(m, above=3)
        True
        >>> skchem.filters.n_atoms(m, above=8)
        False

        Higher threshold:

        >>> skchem.filters.n_atoms(m, below=8)
        True
        >>> skchem.filters.n_atoms(m, below=3)
        False

        Bounds work like Python slices - inclusive lower, exclusive upper:

        >>> skchem.filters.n_atoms(m, above=6)
        True
        >>> skchem.filters.n_atoms(m, below=6)
        False

        Both can be used at once:

        >>> skchem.filters.n_atoms(m, above=3, below=8)
        True

        More useful in combination with pandas data frames:

        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> data.structure.apply(skchem.filters.n_atoms, above=5, below=50).value_counts()
        True     4113
        False     223
        Name: structure, dtype: int64

    """
    if not above:
        above = 0
    if not below:
        below = 1000000 # arbitrarily large number

    if not include_hydrogens:
        n_a = len([a for a in mol.atoms if a.element is not 'H'])
    else:
        n_a = len(mol.atoms)

    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
    return above <= n_a < below


def mass(mol, above=None, below=None):

    """ Whether a the molecular weight of a molecule is lower than a threshold.

    ``above <= mass < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (float):
            The lower threshold on the mass.
            Defaults to None.
        below (float):
            The higher threshold on the mass.
            Defaults to None.

    Returns:
        bool:
            Whether the mass of the molecule is lower than the threshold.

    Examples:
        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
        >>> skchem.filters.n_atoms(m, above=70)
        True
        >>> skchem.filters.n_atoms(m, above=80)
        False
        >>> skchem.filters.n_atoms(m, below=80)
        True
        >>> skchem.filters.n_atoms(m, below=70)
        False
        >>> skchem.filters.n_atoms(m, above=70, below=80)
        True

        More useful in combination with pandas data frames:

        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> data.structure.apply(skchem.filters.mass, below=900).value_counts()
        True     4311
        False      25
        Name: structure, dtype: int64
    """

    if not above:
        above = 0
    if not below:
        below = 1000000

    return above <= mol.mass < below


Push — master ( 3371cb...5cb87e )

no_pains() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			# skchem.filters
8
9			Chemical filters are defined.
10
11			"""
12
13			import os
14
15			from rdkit import RDConfig
			0 ignored issues – show Configuration introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The import `rdkit` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
17
18			from .core import Mol
19
20
21			def _load_pains():
22
23			""" Load PAINS included in rdkit into a pandas dataframe """
24
25			path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
26			pains = pd.read_csv(path, names=['pains', 'names'])
27			pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
28			return pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)
			0 ignored issues – show Bug introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
29
30			PAINS = _load_pains()
31			ORGANIC = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
32
33			def is_organic(mol):
34
35			""" Whether a molecule is organic.
36
37			For the purpose of this function, an organic molecule is defined as having
38			atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.
39
40			Args:
41			mol (skchem.Mol):
42			The molecule to be tested.
43
44			Returns:
45			bool:
46			Whether the molecule is organic.
47
48			Examples:
49
50			Basic usage as a function on molecules:
51
52			>>> import skchem
53			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
54			>>> skchem.filters.is_organic(m1)
55			True
56			>>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]',
57			name='ferrocene')
58			>>> skchem.filters.is_organic(m2)
59			False
60
61			More useful in combination with pandas data frames:
62
63			>>> import gzip
64			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
65			>>> data = skchem.read_sdf(sdf)
66			>>> data.structure.apply(skchem.filters.is_organic).value_counts()
67			True 4252
68			False 84
69			Name: structure, dtype: int64
70			"""
71
72			return all(atom.element in ORGANIC for atom in mol.atoms)
73
74
75			def no_pains(mol):
76
77			""" Whether a molecule passes the Pan Assay INterference (PAINS) filters.
78
79			These are supplied with RDKit, and were originally proposed by Baell et al.
80
81			Args:
82			mol: (skchem.Mol):
83			The molecule to be tested.
84
85			Returns:
86			bool:
87			Whether the molecule passes all the pains filters.
88
89			References:
90			[The original paper](http://dx.doi.org/10.1021/jm901137j)
91
92			Examples:
93
94			Basic usage as a function on molecules:
95
96			>>> import skchem
97			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
98			>>> skchem.filters.no_pains(m1)
99			True
100			>>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
101			>>> skchem.filters.no_pains(m2)
102			False
103
104			More useful in combination with pandas data frames:
105
106			>>> import gzip
107			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
108			>>> data = skchem.read_sdf(sdf)
109			>>> data.structure.apply(skchem.filters.no_pains).value_counts()
110			True 3854
111			False 482
112			Name: structure, dtype: int64
113			"""
114
115			return all(PAINS.apply(lambda pains: pains not in mol))
116
117
118			def n_atoms(mol, above=None, below=None, include_hydrogens=False):
119
120			""" Whether the number of atoms in a molecule falls in a defined interval.
121
122			``above <= n_atoms < below``
123
124			Args:
125			mol: (skchem.Mol):
126			The molecule to be tested.
127			above (int):
128			The lower threshold number of atoms (exclusive).
129			Defaults to None.
130			below (int):
131			The higher threshold number of atoms (inclusive).
132			Defaults to None.
133
134			Returns:
135			bool:
136			Whether the molecule has more atoms than the threshold.
137
138			Examples:
139
140			Basic usage as a function on molecules:
141
142			>>> import skchem
143			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.
144
145
146			Lower threshold:
147
148			>>> skchem.filters.n_atoms(m, above=3)
149			True
150			>>> skchem.filters.n_atoms(m, above=8)
151			False
152
153			Higher threshold:
154
155			>>> skchem.filters.n_atoms(m, below=8)
156			True
157			>>> skchem.filters.n_atoms(m, below=3)
158			False
159
160			Bounds work like Python slices - inclusive lower, exclusive upper:
161
162			>>> skchem.filters.n_atoms(m, above=6)
163			True
164			>>> skchem.filters.n_atoms(m, below=6)
165			False
166
167			Both can be used at once:
168
169			>>> skchem.filters.n_atoms(m, above=3, below=8)
170			True
171
172			More useful in combination with pandas data frames:
173
174			>>> import gzip
175			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
176			>>> data = skchem.read_sdf(sdf)
177			>>> data.structure.apply(skchem.filters.n_atoms, above=5, below=50).value_counts()
178			True 4113
179			False 223
180			Name: structure, dtype: int64
181
182			"""
183			if not above:
184			above = 0
185			if not below:
186			below = 1000000 # arbitrarily large number
187
188			if not include_hydrogens:
189			n_a = len([a for a in mol.atoms if a.element is not 'H'])
190			else:
191			n_a = len(mol.atoms)
192
193			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
194			return above <= n_a < below
195
196
197			def mass(mol, above=None, below=None):
198
199			""" Whether a the molecular weight of a molecule is lower than a threshold.
200
201			``above <= mass < below``
202
203			Args:
204			mol: (skchem.Mol):
205			The molecule to be tested.
206			above (float):
207			The lower threshold on the mass.
208			Defaults to None.
209			below (float):
210			The higher threshold on the mass.
211			Defaults to None.
212
213			Returns:
214			bool:
215			Whether the mass of the molecule is lower than the threshold.
216
217			Examples:
218			Basic usage as a function on molecules:
219
220			>>> import skchem
221			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
222			>>> skchem.filters.n_atoms(m, above=70)
223			True
224			>>> skchem.filters.n_atoms(m, above=80)
225			False
226			>>> skchem.filters.n_atoms(m, below=80)
227			True
228			>>> skchem.filters.n_atoms(m, below=70)
229			False
230			>>> skchem.filters.n_atoms(m, above=70, below=80)
231			True
232
233			More useful in combination with pandas data frames:
234
235			>>> import gzip
236			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
237			>>> data = skchem.read_sdf(sdf)
238			>>> data.structure.apply(skchem.filters.mass, below=900).value_counts()
239			True 4311
240			False 25
241			Name: structure, dtype: int64
242			"""
243
244			if not above:
245			above = 0
246			if not below:
247			below = 1000000
248
249			return above <= mol.mass < below
250

richlewis42 / scikit-chem

Push — master ( 3371cb...5cb87e )

no_pains() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files