n_atoms() - Code Metrics - Inspection of "fixed doctests" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 5cb87e...e8ffd6 )

by Rich

created 2016-05-16 10:30 UTC

n_atoms() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	2
Bugs	1	Features	1

Metric	Value
cc	6
c	2
b	1
f	1
dl	0
loc	82
rs	7.3557

How to fix Long Method

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.filters

Chemical filters are defined.

"""

import os

from rdkit import RDConfig
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .core import Mol


def _load_pains():

    """ Load PAINS included in rdkit into a pandas dataframe """

    path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
    pains = pd.read_csv(path, names=['pains', 'names'])
    pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
    return pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)


PAINS = _load_pains()
ORGANIC = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']

def is_organic(mol):

    """ Whether a molecule is organic.

    For the purpose of this function, an organic molecule is defined as having
    atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.

    Args:
        mol (skchem.Mol):
            The molecule to be tested.

    Returns:
        bool:
            Whether the molecule is organic.

    Examples:

            Basic usage as a function on molecules:

            >>> import skchem
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
            >>> skchem.filters.is_organic(m1)
            True
            >>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
                                            name='ferrocene')
            >>> skchem.filters.is_organic(m2)
            False

            More useful in combination with pandas data frames:

            >>> import gzip
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
            >>> data = skchem.read_sdf(sdf)
            >>> data.structure.apply(skchem.filters.is_organic).value_counts()
            True     4253
            False      84
            Name: structure, dtype: int64
    """

    return all(atom.element in ORGANIC for atom in mol.atoms)


def no_pains(mol):

    """ Whether a molecule passes the Pan Assay INterference (PAINS) filters.

    These are supplied with RDKit, and were originally proposed by Baell et al.

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.

    Returns:
        bool:
            Whether the molecule passes all the pains filters.

    References:
        [The original paper](http://dx.doi.org/10.1021/jm901137j)

    Examples:

            Basic usage as a function on molecules:

            >>> import skchem
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
            >>> skchem.filters.no_pains(m1)
            True
            >>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
            >>> skchem.filters.no_pains(m2)
            False

            More useful in combination with pandas data frames:

            >>> import gzip
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
            >>> data = skchem.read_sdf(sdf)
            >>> data.structure.apply(skchem.filters.no_pains).value_counts()
            True     3855
            False     482
            Name: structure, dtype: int64
    """

    return all(PAINS.apply(lambda pains: pains not in mol))


def n_atoms(mol, above=None, below=None, include_hydrogens=False):

    """ Whether the number of atoms in a molecule falls in a defined interval.

    ``above <= n_atoms < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (int):
            The lower threshold number of atoms (exclusive).
            Defaults to None.
        below (int):
            The higher threshold number of atoms (inclusive).
            Defaults to None.

    Returns:
        bool:
            Whether the molecule has more atoms than the threshold.

    Examples:

        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.

        Lower threshold:

        >>> skchem.filters.n_atoms(m, above=3)
        True
        >>> skchem.filters.n_atoms(m, above=8)
        False

        Higher threshold:

        >>> skchem.filters.n_atoms(m, below=8)
        True
        >>> skchem.filters.n_atoms(m, below=3)
        False

        Bounds work like Python slices - inclusive lower, exclusive upper:

        >>> skchem.filters.n_atoms(m, above=6)
        True
        >>> skchem.filters.n_atoms(m, below=6)
        False

        Both can be used at once:

        >>> skchem.filters.n_atoms(m, above=3, below=8)
        True

        Can include hydrogens:

        >>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
        False
        >>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
        True

        More useful in combination with pandas data frames:

        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> data.structure.apply(skchem.filters.n_atoms, above=5, below=50).value_counts()
        True     4211
        False     126
        Name: structure, dtype: int64

    """
    if not above:
        above = 0
    if not below:
        below = 1000000 # arbitrarily large number

    n_a = len(mol.atoms)
    if include_hydrogens:
        n_a += sum(a.GetNumImplicitHs() for a in mol.atoms)

    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
    return above <= n_a < below


def mass(mol, above=None, below=None):

    """ Whether a the molecular weight of a molecule is lower than a threshold.

    ``above <= mass < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (float):
            The lower threshold on the mass.
            Defaults to None.
        below (float):
            The higher threshold on the mass.
            Defaults to None.

    Returns:
        bool:
            Whether the mass of the molecule is lower than the threshold.

    Examples:
        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
        >>> skchem.filters.mass(m, above=70)
        True
        >>> skchem.filters.mass(m, above=80)
        False
        >>> skchem.filters.mass(m, below=80)
        True
        >>> skchem.filters.mass(m, below=70)
        False
        >>> skchem.filters.mass(m, above=70, below=80)
        True

        More useful in combination with pandas data frames:

        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> data.structure.apply(skchem.filters.mass, below=900).value_counts()
        True     4312
        False      25
        Name: structure, dtype: int64
    """

    if not above:
        above = 0
    if not below:
        below = 1000000

    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
    return above <= mol.mass < below


Push — master ( 5cb87e...e8ffd6 )

n_atoms() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			# skchem.filters
8
9			Chemical filters are defined.
10
11			"""
12
13			import os
14
15			from rdkit import RDConfig
			0 ignored issues – show Configuration introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The import `rdkit` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
17
18			from .core import Mol
19
20
21			def _load_pains():
22
23			""" Load PAINS included in rdkit into a pandas dataframe """
24
25			path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
26			pains = pd.read_csv(path, names=['pains', 'names'])
27			pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
28			return pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)
			0 ignored issues – show Bug introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
29
30			PAINS = _load_pains()
31			ORGANIC = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
32
33			def is_organic(mol):
34
35			""" Whether a molecule is organic.
36
37			For the purpose of this function, an organic molecule is defined as having
38			atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.
39
40			Args:
41			mol (skchem.Mol):
42			The molecule to be tested.
43
44			Returns:
45			bool:
46			Whether the molecule is organic.
47
48			Examples:
49
50			Basic usage as a function on molecules:
51
52			>>> import skchem
53			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
54			>>> skchem.filters.is_organic(m1)
55			True
56			>>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
57			name='ferrocene')
58			>>> skchem.filters.is_organic(m2)
59			False
60
61			More useful in combination with pandas data frames:
62
63			>>> import gzip
64			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
65			>>> data = skchem.read_sdf(sdf)
66			>>> data.structure.apply(skchem.filters.is_organic).value_counts()
67			True 4253
68			False 84
69			Name: structure, dtype: int64
70			"""
71
72			return all(atom.element in ORGANIC for atom in mol.atoms)
73
74
75			def no_pains(mol):
76
77			""" Whether a molecule passes the Pan Assay INterference (PAINS) filters.
78
79			These are supplied with RDKit, and were originally proposed by Baell et al.
80
81			Args:
82			mol: (skchem.Mol):
83			The molecule to be tested.
84
85			Returns:
86			bool:
87			Whether the molecule passes all the pains filters.
88
89			References:
90			[The original paper](http://dx.doi.org/10.1021/jm901137j)
91
92			Examples:
93
94			Basic usage as a function on molecules:
95
96			>>> import skchem
97			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
98			>>> skchem.filters.no_pains(m1)
99			True
100			>>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
101			>>> skchem.filters.no_pains(m2)
102			False
103
104			More useful in combination with pandas data frames:
105
106			>>> import gzip
107			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
108			>>> data = skchem.read_sdf(sdf)
109			>>> data.structure.apply(skchem.filters.no_pains).value_counts()
110			True 3855
111			False 482
112			Name: structure, dtype: int64
113			"""
114
115			return all(PAINS.apply(lambda pains: pains not in mol))
116
117
118			def n_atoms(mol, above=None, below=None, include_hydrogens=False):
119
120			""" Whether the number of atoms in a molecule falls in a defined interval.
121
122			``above <= n_atoms < below``
123
124			Args:
125			mol: (skchem.Mol):
126			The molecule to be tested.
127			above (int):
128			The lower threshold number of atoms (exclusive).
129			Defaults to None.
130			below (int):
131			The higher threshold number of atoms (inclusive).
132			Defaults to None.
133
134			Returns:
135			bool:
136			Whether the molecule has more atoms than the threshold.
137
138			Examples:
139
140			Basic usage as a function on molecules:
141
142			>>> import skchem
143			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.
144
145			Lower threshold:
146
147			>>> skchem.filters.n_atoms(m, above=3)
148			True
149			>>> skchem.filters.n_atoms(m, above=8)
150			False
151
152			Higher threshold:
153
154			>>> skchem.filters.n_atoms(m, below=8)
155			True
156			>>> skchem.filters.n_atoms(m, below=3)
157			False
158
159			Bounds work like Python slices - inclusive lower, exclusive upper:
160
161			>>> skchem.filters.n_atoms(m, above=6)
162			True
163			>>> skchem.filters.n_atoms(m, below=6)
164			False
165
166			Both can be used at once:
167
168			>>> skchem.filters.n_atoms(m, above=3, below=8)
169			True
170
171			Can include hydrogens:
172
173			>>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
174			False
175			>>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
176			True
177
178			More useful in combination with pandas data frames:
179
180			>>> import gzip
181			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
182			>>> data = skchem.read_sdf(sdf)
183			>>> data.structure.apply(skchem.filters.n_atoms, above=5, below=50).value_counts()
184			True 4211
185			False 126
186			Name: structure, dtype: int64
187
188			"""
189			if not above:
190			above = 0
191			if not below:
192			below = 1000000 # arbitrarily large number
193
194			n_a = len(mol.atoms)
195			if include_hydrogens:
196			n_a += sum(a.GetNumImplicitHs() for a in mol.atoms)
197
198			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
199			return above <= n_a < below
200
201
202			def mass(mol, above=None, below=None):
203
204			""" Whether a the molecular weight of a molecule is lower than a threshold.
205
206			``above <= mass < below``
207
208			Args:
209			mol: (skchem.Mol):
210			The molecule to be tested.
211			above (float):
212			The lower threshold on the mass.
213			Defaults to None.
214			below (float):
215			The higher threshold on the mass.
216			Defaults to None.
217
218			Returns:
219			bool:
220			Whether the mass of the molecule is lower than the threshold.
221
222			Examples:
223			Basic usage as a function on molecules:
224
225			>>> import skchem
226			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
227			>>> skchem.filters.mass(m, above=70)
228			True
229			>>> skchem.filters.mass(m, above=80)
230			False
231			>>> skchem.filters.mass(m, below=80)
232			True
233			>>> skchem.filters.mass(m, below=70)
234			False
235			>>> skchem.filters.mass(m, above=70, below=80)
236			True
237
238			More useful in combination with pandas data frames:
239
240			>>> import gzip
241			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
242			>>> data = skchem.read_sdf(sdf)
243			>>> data.structure.apply(skchem.filters.mass, below=900).value_counts()
244			True 4312
245			False 25
246			Name: structure, dtype: int64
247			"""
248
249			if not above:
250			above = 0
251			if not below:
252			below = 1000000
253
254			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
255			return above <= mol.mass < below
256

richlewis42 / scikit-chem

Push — master ( 5cb87e...e8ffd6 )

n_atoms() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files