SMARTSFilter - Code Metrics - Inspection of "fixed atom descriptor, added error checking to phy..." - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( e06060...97f98d )

by Rich

created 2016-06-28 09:54 UTC

SMARTSFilter A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	54
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
wmc	5
c	1
b	0
f	1
dl	0
loc	54
rs	10

3 Methods

Rating	Name	Size	Complexity
A	__init__()	12	3
A	read_smarts()	5	2
A	func()	3	2

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.filters.smarts

Module defines SMARTS filters.
"""

from rdkit import RDConfig
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import os
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .base import Filter
from ..core import Mol

class SMARTSFilter(Filter):

    """ Filter a molecule based on smarts.

    Args:
        smarts (pd.Series):
            A series of SMARTS to use in the filter.
        agg (function):
            Option specifying the mode of the filter.

            - None : No filtering takes place
            - any: If any of the substructures are in molecule return True.
            - all: If all of the substructures are in molecule.

    Examples:

        >>> import skchem
        >>> m1 = skchem.Mol.from_smiles('CC')
        >>> m2 = skchem.Mol.from_smiles('c1ccccc1')
        >>> m3 = skchem.Mol.from_smiles('c1ccccc1-c2c(C=O)ccnc2')
        >>> ms = pd.Series({'ethane': m1, 'benzene': m2, 'big': m3})
        >>> f = skchem.filters.SMARTSFilter({'benzene': 'c1ccccc1', 'pyridine': 'c1ccccn1', 'acetyl': 'C=O'})

        >>> f.apply(ms)
                acetyl benzene pyridine
        benzene  False    True    False
        big       True    True     True
        ethane   False   False    False

        >>> f.filter(ms, agg=any)
        benzene                <Mol: c1ccccc1>
        big        <Mol: O=Cc1ccncc1-c1ccccc1>
        dtype: object

        >>> f.filter(ms, agg=all)
        big    <Mol: O=Cc1ccncc1-c1ccccc1>
        dtype: object
    """

    def __init__(self, smarts, **kwargs):

        def read_smarts(s):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            if isinstance(s, str):
                return Mol.from_smarts(s, mergeHs=True)

            else:
                return s

        self.smarts = pd.Series(smarts).apply(read_smarts)

        self.index = self.smarts.index
        super(SMARTSFilter, self).__init__(self.func, **kwargs)

    def func(self, mol):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        return self.smarts.apply(lambda smarts: smarts in mol)



class PAINSFilter(SMARTSFilter):

    """ Whether a molecule passes the Pan Assay INterference (PAINS) filters.

    These are supplied with RDKit, and were originally proposed by Baell et al.

    References:
        [The original paper](http://dx.doi.org/10.1021/jm901137j)

    Examples:

        Basic usage as a function on molecules:

        >>> import skchem
        >>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
        >>> no_pains = PAINSFilter()
        >>> no_pains(m1)
        True
        >>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
        >>> no_pains(m2)
        False

        More useful in combination with pandas DataFrames:

        >>> import gzip
        >>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
        >>> data = skchem.read_sdf(sdf)
        >>> no_pains.apply(data).value_counts()
        True     3855
        False     482
        dtype: int64

        >>> len(no_pains.filter(data))
        3855
    """

    def __init__(self):

        super(PAINSFilter, self).__init__(self._load_pains(), agg=any, neg=True)

    def _load_pains(cls):
class SomeClass:
    def some_method(self):
        # ... do something

        """ Load PAINS included in rdkit into a pandas dataframe and cache as class attribute. """

        if not hasattr(cls, '_pains'):
            path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
            pains = pd.read_csv(path, names=['pains', 'names'])
            pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
            pains = pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)

            cls._pains = pains
        return cls._pains


Push — master ( e06060...97f98d )

SMARTSFilter A

Complexity

Size/Duplication

Importance

3 Methods

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			# skchem.filters.smarts
8
9			Module defines SMARTS filters.
10			"""
11
12			from rdkit import RDConfig
			0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `rdkit` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
14			import os
15			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16
17			from .base import Filter
18			from ..core import Mol
19
20			class SMARTSFilter(Filter):
21
22			""" Filter a molecule based on smarts.
23
24			Args:
25			smarts (pd.Series):
26			A series of SMARTS to use in the filter.
27			agg (function):
28			Option specifying the mode of the filter.
29
30			- None : No filtering takes place
31			- any: If any of the substructures are in molecule return True.
32			- all: If all of the substructures are in molecule.
33
34			Examples:
35
36			>>> import skchem
37			>>> m1 = skchem.Mol.from_smiles('CC')
38			>>> m2 = skchem.Mol.from_smiles('c1ccccc1')
39			>>> m3 = skchem.Mol.from_smiles('c1ccccc1-c2c(C=O)ccnc2')
40			>>> ms = pd.Series({'ethane': m1, 'benzene': m2, 'big': m3})
41			>>> f = skchem.filters.SMARTSFilter({'benzene': 'c1ccccc1', 'pyridine': 'c1ccccn1', 'acetyl': 'C=O'})
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (109/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
42			>>> f.apply(ms)
43			acetyl benzene pyridine
44			benzene False True False
45			big True True True
46			ethane False False False
47
48			>>> f.filter(ms, agg=any)
49			benzene <Mol: c1ccccc1>
50			big <Mol: O=Cc1ccncc1-c1ccccc1>
51			dtype: object
52
53			>>> f.filter(ms, agg=all)
54			big <Mol: O=Cc1ccncc1-c1ccccc1>
55			dtype: object
56			"""
57
58			def __init__(self, smarts, **kwargs):
59
60			def read_smarts(s):
			0 ignored issues – show Coding Style Naming introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The name `s` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
61			if isinstance(s, str):
62			return Mol.from_smarts(s, mergeHs=True)
			0 ignored issues – show Bug introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
63			else:
64			return s
65
66			self.smarts = pd.Series(smarts).apply(read_smarts)
67
68			self.index = self.smarts.index
69			super(SMARTSFilter, self).__init__(self.func, **kwargs)
70
71			def func(self, mol):
			0 ignored issues – show Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Bug introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This method seems to be hidden by an attribute defined in `skchem.filters.base` on line 69. Loading history...
72
73			return self.smarts.apply(lambda smarts: smarts in mol)
74
75
76
77			class PAINSFilter(SMARTSFilter):
78
79			""" Whether a molecule passes the Pan Assay INterference (PAINS) filters.
80
81			These are supplied with RDKit, and were originally proposed by Baell et al.
82
83			References:
84			[The original paper](http://dx.doi.org/10.1021/jm901137j)
85
86			Examples:
87
88			Basic usage as a function on molecules:
89
90			>>> import skchem
91			>>> m1 = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
92			>>> no_pains = PAINSFilter()
93			>>> no_pains(m1)
94			True
95			>>> m2 = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
96			>>> no_pains(m2)
97			False
98
99			More useful in combination with pandas DataFrames:
100
101			>>> import gzip
102			>>> sdf = gzip.open(skchem.data.resource('ames_mutagenicity.sdf.gz'))
103			>>> data = skchem.read_sdf(sdf)
104			>>> no_pains.apply(data).value_counts()
105			True 3855
106			False 482
107			dtype: int64
108
109			>>> len(no_pains.filter(data))
110			3855
111			"""
112
113			def __init__(self):
114
115			super(PAINSFilter, self).__init__(self._load_pains(), agg=any, neg=True)
116
117			def _load_pains(cls):
			0 ignored issues – show Coding Style Best Practice introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report Methods should have `self` as first argument. It is a widespread convention and generally a good practice to name the first argument of methods `self`. class SomeClass: def some_method(self): # ... do something Loading history...
118
119			""" Load PAINS included in rdkit into a pandas dataframe and cache as class attribute. """
120
121			if not hasattr(cls, '_pains'):
122			path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
123			pains = pd.read_csv(path, names=['pains', 'names'])
124			pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
125			pains = pains.set_index('names').pains.apply(Mol.from_smarts, mergeHs=True)
			0 ignored issues – show Bug introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
126			cls._pains = pains
127			return cls._pains
128

richlewis42 / scikit-chem

Push — master ( e06060...97f98d )

SMARTSFilter A

Complexity

Size/Duplication

Importance

3 Methods

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files