ElementFilter - Code Metrics - Inspection of "fixed atom descriptor, added error checking to phy..." - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( e06060...97f98d )

by Rich

created 2016-06-28 09:54 UTC

ElementFilter A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	17
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
wmc	3
c	1
b	0
f	1
dl	0
loc	17
rs	10

2 Methods

Rating	Name	Duplication	Size	Complexity
A	func()	0	3	2
A	__init__()	0	5	1

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""

# skchem.filters.simple

Simple filters for compounds.

"""

from .base import Filter
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3


class ElementFilter(Filter):

    """ Filter by elements.

    Args:
        elements: A list of elements to filter with.  If an element not in
        the list is found in a molecule, return False, else return True.
    """
    def __init__(self, elements, **kwargs):

        self.elements = elements

        super(ElementFilter, self).__init__(self.func)

    def func(self, mol):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        return all(atom.element in self.elements for atom in mol.atoms)


class OrganicFilter(ElementFilter):

    # TODO: rewrite the docs


    """ Whether a molecule is organic.

    For the purpose of this function, an organic molecule is defined as having
    atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.

    Args:
        mol (skchem.Mol):
            The molecule to be tested.

    Returns:
        bool:
            Whether the molecule is organic.

    Examples:

            Basic usage as a function on molecules:

            >>> import skchem
            >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
            >>> is_organic = skchem.filters.OrganicFilter()
            >>> is_organic(m1)
            True
            >>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
                                            name='ferrocene')
            >>> is_organic(m2)
            False

            More useful in combination with pandas data frames:

            >>> import gzip
            >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
            >>> data = skchem.read_sdf(sdf)
            >>> is_organic.apply(data).value_counts()
            True     4253
            False      84
            Name: structure, dtype: int64

            >>> len(is_organic.filter(data))
            4253
            >>> len(is_organic.filter(data, neg=True))
            84
    """

    elements = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']

    def __init__(self):
        super(OrganicFilter, self).__init__(self.elements)


def n_atoms(mol, above=2, below=75, include_hydrogens=False):

    """ Whether the number of atoms in a molecule falls in a defined interval.

    ``above <= n_atoms < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (int):
            The lower threshold number of atoms (exclusive).
            Defaults to None.
        below (int):
            The higher threshold number of atoms (inclusive).
            Defaults to None.

    Returns:
        bool:
            Whether the molecule has more atoms than the threshold.

    Examples:

        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.

        Lower threshold:

        >>> skchem.filters.n_atoms(m, above=3)
        True
        >>> skchem.filters.n_atoms(m, above=8)
        False

        Higher threshold:

        >>> skchem.filters.n_atoms(m, below=8)
        True
        >>> skchem.filters.n_atoms(m, below=3)
        False

        Bounds work like Python slices - inclusive lower, exclusive upper:

        >>> skchem.filters.n_atoms(m, above=6)
        True
        >>> skchem.filters.n_atoms(m, below=6)
        False

        Both can be used at once:

        >>> skchem.filters.n_atoms(m, above=3, below=8)
        True

        Can include hydrogens:

        >>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
        False
        >>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
        True

    """

    assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)

    n_a = len(mol.atoms)
    if include_hydrogens:
        n_a += sum(atom.GetNumImplicitHs() for atom in mol.atoms)

    return above <= n_a < below

class AtomNumberFilter(Filter):

    """Filter for whether the number of atoms in a molecule falls in a defined interval.

    ``above <= n_atoms < below``

    Args:
        above (int):
            The lower threshold number of atoms (exclusive).
            Defaults to None.
        below (int):
            The higher threshold number of atoms (inclusive).
            Defaults to None.

    Args:
        >>> import skchem
        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> f_natom = skchem.filters.AtomNumberFilter(above=3, below=60)
        >>> f_natom.apply(data).value_counts()
        True     4306
        False      31
        Name: structure, dtype: int64

        >>> len(f_natom.filter(data))
        4306
        >>> len(f_natom.filter(data, neg=True))
        31
    """

    def __init__(self, above=3, below=60, include_hydrogens=False, **kwargs):

        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
        self.above = above
        self.below = below
        self.include_hydrogens = include_hydrogens

        super(AtomNumberFilter, self).__init__(n_atoms, above=self.above,
                                below=self.below,

                                include_hydrogens=self.include_hydrogens,

                                **kwargs)



def mass(mol, above=10, below=900):

    """ Whether a the molecular weight of a molecule is lower than a threshold.

    ``above <= mass < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (float):
            The lower threshold on the mass.
            Defaults to None.
        below (float):
            The higher threshold on the mass.
            Defaults to None.

    Returns:
        bool:
            Whether the mass of the molecule is lower than the threshold.

    Examples:
        Basic usage as a function on molecules:

        >>> import skchem
        >>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
        >>> skchem.filters.mass(m, above=70)
        True
        >>> skchem.filters.mass(m, above=80)
        False
        >>> skchem.filters.mass(m, below=80)
        True
        >>> skchem.filters.mass(m, below=70)
        False
        >>> skchem.filters.mass(m, above=70, below=80)
        True
    """

    return above <= mol.mass < below


class MassFilter(Filter):
    """ Filter whether a the molecular weight of a molecule is lower than a threshold.

    ``above <= mass < below``

    Args:
        mol: (skchem.Mol):
            The molecule to be tested.
        above (float):
            The lower threshold on the mass.
            Defaults to None.
        below (float):
            The higher threshold on the mass.
            Defaults to None.

    Examples:

        >>> import skchem
        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> f_mass = skchem.filters.MassFilter(above=10, below=900)
        >>> f_mass.apply(data).value_counts()
        True     4312
        False      25
        Name: structure, dtype: int64

        >>> len(f_mass.filter(data))
        4312
        >>> len(f_mass.filter(data, neg=True))
        25
    """

    def __init__(self, above=3, below=900, **kwargs):

        assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
        self.above = above
        self.below = below

        super(MassFilter, self).__init__(mass, above=self.above,
                                below=self.below, **kwargs)



Push — master ( e06060...97f98d )

ElementFilter A

Complexity

Size/Duplication

Importance

2 Methods

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7
8			# skchem.filters.simple
9
10			Simple filters for compounds.
11
12			"""
13
14			from .base import Filter
15			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history... Unused Code introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Unused pandas imported as pd Loading history...
16
17
18			class ElementFilter(Filter):
19
20			""" Filter by elements.
21
22			Args:
23			elements: A list of elements to filter with. If an element not in
24			the list is found in a molecule, return False, else return True.
25			"""
26			def __init__(self, elements, **kwargs):
27
28			self.elements = elements
29
30			super(ElementFilter, self).__init__(self.func)
31
32			def func(self, mol):
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Bug introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This method seems to be hidden by an attribute defined in `skchem.filters.base` on line 69. Loading history...
33
34			return all(atom.element in self.elements for atom in mol.atoms)
35
36
37			class OrganicFilter(ElementFilter):
38
39			# TODO: rewrite the docs
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report `TODO` and `FIXME` comments should generally be avoided. Loading history...
40
41			""" Whether a molecule is organic.
42
43			For the purpose of this function, an organic molecule is defined as having
44			atoms with elements only in the set H, B, C, N, O, F, P, S, Cl, Br, I.
45
46			Args:
47			mol (skchem.Mol):
48			The molecule to be tested.
49
50			Returns:
51			bool:
52			Whether the molecule is organic.
53
54			Examples:
55
56			Basic usage as a function on molecules:
57
58			>>> import skchem
59			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
60			>>> is_organic = skchem.filters.OrganicFilter()
61			>>> is_organic(m1)
62			True
63			>>> m2 = skchem.Mol.from_smiles('[cH-]1cccc1.[cH-]1cccc1.[Fe+2]', \
64			name='ferrocene')
65			>>> is_organic(m2)
66			False
67
68			More useful in combination with pandas data frames:
69
70			>>> import gzip
71			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
72			>>> data = skchem.read_sdf(sdf)
73			>>> is_organic.apply(data).value_counts()
74			True 4253
75			False 84
76			Name: structure, dtype: int64
77
78			>>> len(is_organic.filter(data))
79			4253
80			>>> len(is_organic.filter(data, neg=True))
81			84
82			"""
83
84			elements = ['H', 'B', 'C', 'N', 'O', 'F', 'P', 'S', 'Cl', 'Br', 'I']
85
86			def __init__(self):
87			super(OrganicFilter, self).__init__(self.elements)
88
89
90			def n_atoms(mol, above=2, below=75, include_hydrogens=False):
91
92			""" Whether the number of atoms in a molecule falls in a defined interval.
93
94			``above <= n_atoms < below``
95
96			Args:
97			mol: (skchem.Mol):
98			The molecule to be tested.
99			above (int):
100			The lower threshold number of atoms (exclusive).
101			Defaults to None.
102			below (int):
103			The higher threshold number of atoms (inclusive).
104			Defaults to None.
105
106			Returns:
107			bool:
108			Whether the molecule has more atoms than the threshold.
109
110			Examples:
111
112			Basic usage as a function on molecules:
113
114			>>> import skchem
115			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has 6 atoms.
116
117			Lower threshold:
118
119			>>> skchem.filters.n_atoms(m, above=3)
120			True
121			>>> skchem.filters.n_atoms(m, above=8)
122			False
123
124			Higher threshold:
125
126			>>> skchem.filters.n_atoms(m, below=8)
127			True
128			>>> skchem.filters.n_atoms(m, below=3)
129			False
130
131			Bounds work like Python slices - inclusive lower, exclusive upper:
132
133			>>> skchem.filters.n_atoms(m, above=6)
134			True
135			>>> skchem.filters.n_atoms(m, below=6)
136			False
137
138			Both can be used at once:
139
140			>>> skchem.filters.n_atoms(m, above=3, below=8)
141			True
142
143			Can include hydrogens:
144
145			>>> skchem.filters.n_atoms(m, above=3, below=8, include_hydrogens=True)
146			False
147			>>> skchem.filters.n_atoms(m, above=9, below=14, include_hydrogens=True)
148			True
149
150			"""
151
152			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
153
154			n_a = len(mol.atoms)
155			if include_hydrogens:
156			n_a += sum(atom.GetNumImplicitHs() for atom in mol.atoms)
157
158			return above <= n_a < below
159
160			class AtomNumberFilter(Filter):
161
162			"""Filter for whether the number of atoms in a molecule falls in a defined interval.
163
164			``above <= n_atoms < below``
165
166			Args:
167			above (int):
168			The lower threshold number of atoms (exclusive).
169			Defaults to None.
170			below (int):
171			The higher threshold number of atoms (inclusive).
172			Defaults to None.
173
174			Args:
175			>>> import skchem
176			>>> import gzip
177			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
178			>>> data = skchem.read_sdf(sdf)
179			>>> f_natom = skchem.filters.AtomNumberFilter(above=3, below=60)
180			>>> f_natom.apply(data).value_counts()
181			True 4306
182			False 31
183			Name: structure, dtype: int64
184
185			>>> len(f_natom.filter(data))
186			4306
187			>>> len(f_natom.filter(data, neg=True))
188			31
189			"""
190
191			def __init__(self, above=3, below=60, include_hydrogens=False, **kwargs):
192
193			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
194			self.above = above
195			self.below = below
196			self.include_hydrogens = include_hydrogens
197
198			super(AtomNumberFilter, self).__init__(n_atoms, above=self.above,
199			below=self.below,
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Wrong continued indentation. below=self.below, ^ \| Loading history...
200			include_hydrogens=self.include_hydrogens,
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Wrong continued indentation. include_hydrogens=self.include_hydrogens, ^ \| Loading history...
201			**kwargs)
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Wrong continued indentation. **kwargs) ^ \| Loading history...
202
203
204			def mass(mol, above=10, below=900):
205
206			""" Whether a the molecular weight of a molecule is lower than a threshold.
207
208			``above <= mass < below``
209
210			Args:
211			mol: (skchem.Mol):
212			The molecule to be tested.
213			above (float):
214			The lower threshold on the mass.
215			Defaults to None.
216			below (float):
217			The higher threshold on the mass.
218			Defaults to None.
219
220			Returns:
221			bool:
222			Whether the mass of the molecule is lower than the threshold.
223
224			Examples:
225			Basic usage as a function on molecules:
226
227			>>> import skchem
228			>>> m = skchem.Mol.from_smiles('c1ccccc1') # benzene has M_r = 78.
229			>>> skchem.filters.mass(m, above=70)
230			True
231			>>> skchem.filters.mass(m, above=80)
232			False
233			>>> skchem.filters.mass(m, below=80)
234			True
235			>>> skchem.filters.mass(m, below=70)
236			False
237			>>> skchem.filters.mass(m, above=70, below=80)
238			True
239			"""
240
241			return above <= mol.mass < below
242
243
244			class MassFilter(Filter):
245			""" Filter whether a the molecular weight of a molecule is lower than a threshold.
246
247			``above <= mass < below``
248
249			Args:
250			mol: (skchem.Mol):
251			The molecule to be tested.
252			above (float):
253			The lower threshold on the mass.
254			Defaults to None.
255			below (float):
256			The higher threshold on the mass.
257			Defaults to None.
258
259			Examples:
260
261			>>> import skchem
262			>>> import gzip
263			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
264			>>> data = skchem.read_sdf(sdf)
265			>>> f_mass = skchem.filters.MassFilter(above=10, below=900)
266			>>> f_mass.apply(data).value_counts()
267			True 4312
268			False 25
269			Name: structure, dtype: int64
270
271			>>> len(f_mass.filter(data))
272			4312
273			>>> len(f_mass.filter(data, neg=True))
274			25
275			"""
276
277			def __init__(self, above=3, below=900, **kwargs):
278
279			assert above < below, 'Interval {} < a < {} undefined.'.format(above, below)
280			self.above = above
281			self.below = below
282
283			super(MassFilter, self).__init__(mass, above=self.above,
284			below=self.below, **kwargs)
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Wrong continued indentation. below=self.below, **kwargs) ^ \| Loading history...
285

richlewis42 / scikit-chem

Push — master ( e06060...97f98d )

ElementFilter A

Complexity

Size/Duplication

Importance

2 Methods

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files