SMARTSFilter - Code Metrics - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

SMARTSFilter A
last analyzed 2016-09-01 14:43 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	77
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
wmc	6
c	1
b	0
f	1
dl	0
loc	77
ccs	13
cts	13
cp	1
rs	10

4 Methods

Rating	Name	Size	Complexity
B	__init__()	30	3
A	read_smarts()	5	2
A	columns()	3	1
A	_transform_mol()	2	2

#! /usr/bin/env python
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.filters.smarts

Module defines SMARTS filters.
"""

from rdkit import RDConfig
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import os
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .base import Filter
from ..core import Mol


class SMARTSFilter(Filter):

    """ Filter a molecule based on smarts.

    Examples:

        >>> import skchem

        >>> data = [
        ...         skchem.Mol.from_smiles('CC', name='ethane'),
        ...         skchem.Mol.from_smiles('c1ccccc1', name='benzene'),
        ...         skchem.Mol.from_smiles('c1ccccc1-c2c(C=O)ccnc2', name='bg')
        ... ]

        >>> f = skchem.filters.SMARTSFilter({'benzene': 'c1ccccc1',
        ...                                  'pyridine': 'c1ccccn1',
        ...                                  'acetyl': 'C=O'})
        >>> f.transform(data, agg=False)
                acetyl benzene pyridine
        ethane   False   False    False
        benzene  False    True    False
        bg       True    True     True

        >>> f.transform(data)
        ethane     False
        benzene     True
        bg         True
        dtype: bool

        >>> f.filter(data)
        benzene                <Mol: c1ccccc1>
        bg        <Mol: O=Cc1ccncc1-c1ccccc1>
        Name: structure, dtype: object

        >>> f.agg = all
        >>> f.filter(data)
        bg    <Mol: O=Cc1ccncc1-c1ccccc1>
        Name: structure, dtype: object
    """

    def __init__(self, smarts, agg='any', merge_hs=True, n_jobs=1,

                 verbose=True):

        """ Initialize a `SMARTSFilter` object.

        Args:
            smarts (pd.Series or dict):
                A series of SMARTS to use in the filter.
            agg (str or callable):
                Option specifying the mode of the filter:
                - 'any': If any of the substructures are in molecule.
                - 'all': If all of the substructures are in molecule.
            n_jobs (int):
                The number of processes to run the filter in.
            verbose (bool):
                Whether to output a progress bar.

        """

        self.merge_hs = merge_hs

        def read_smarts(s):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            if isinstance(s, str):
                return Mol.from_smarts(s, mergeHs=self.merge_hs)

            else:
                return s

        self.smarts = pd.Series(smarts).apply(read_smarts)
        super(SMARTSFilter, self).__init__(agg=agg, n_jobs=n_jobs,
                                           verbose=verbose)

    def _transform_mol(self, mol):

        return self.smarts.apply(lambda smarts: smarts in mol).values

    @property
    def columns(self):
        return self.smarts.index


class PAINSFilter(SMARTSFilter):

    """ Whether a molecule passes the Pan Assay INterference (PAINS) filters.

    These are supplied with RDKit, and were originally proposed by Baell et al.

    Attributes:
        _pains (pd.Series): a series of smarts template molecules.

    References:
        [The original paper](http://dx.doi.org/10.1021/jm901137j)

    Examples:

        Basic usage as a function on molecules:

        >>> import skchem
        >>> benzene = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
        >>> pf = skchem.filters.PAINSFilter()
        >>> pf.transform(benzene)
        True
        >>> catechol = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
        >>> pf.transform(catechol)
        False

        >>> res = pf.transform(catechol, agg=False)
        >>> res[res]
        names
        catechol_A(92)    True
        Name: PAINSFilter, dtype: bool

        More useful in combination with pandas DataFrames:

        >>> data = [benzene, catechol]
        >>> pf.transform(data)
        benzene      True
        catechol    False
        dtype: bool

        >>> pf.filter(data)
        benzene    <Mol: c1ccccc1>
        Name: structure, dtype: object
    """

    _pains = None

    def __init__(self, n_jobs=1, verbose=True):

        """ Initialize a `PAINSFilter` object.

        Args:
            n_jobs (int):
                The number of procesess to run the filter in.
            verbose (bool):
                Whether to output a progress bar.
        """

        super(PAINSFilter, self).__init__(self._load_pains(), agg='not any',
                                          n_jobs=n_jobs, verbose=verbose)

    @classmethod
    def _load_pains(cls):

        """ Load  PAINS into a `pd.Series` and cache as class attribute. """

        if cls._pains is None:
            path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
            pains = pd.read_csv(path, names=['pains', 'names'])
            pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
            pains = pains.set_index('names').pains.apply(Mol.from_smarts,

                                                         mergeHs=True)
            cls._pains = pains
        return cls._pains


1		#! /usr/bin/env python
2		#
3		# Copyright (C) 2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	"""
7		# skchem.filters.smarts
8
9		Module defines SMARTS filters.
10		"""
11
12	1	from rdkit import RDConfig
		0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `rdkit` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13	1	import os
14	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
15
16	1	from .base import Filter
17	1	from ..core import Mol
18
19
20	1	class SMARTSFilter(Filter):
21
22		""" Filter a molecule based on smarts.
23
24		Examples:
25
26		>>> import skchem
27
28		>>> data = [
29		... skchem.Mol.from_smiles('CC', name='ethane'),
30		... skchem.Mol.from_smiles('c1ccccc1', name='benzene'),
31		... skchem.Mol.from_smiles('c1ccccc1-c2c(C=O)ccnc2', name='bg')
32		... ]
33
34		>>> f = skchem.filters.SMARTSFilter({'benzene': 'c1ccccc1',
35		... 'pyridine': 'c1ccccn1',
36		... 'acetyl': 'C=O'})
37		>>> f.transform(data, agg=False)
38		acetyl benzene pyridine
39		ethane False False False
40		benzene False True False
41		bg True True True
42
43		>>> f.transform(data)
44		ethane False
45		benzene True
46		bg True
47		dtype: bool
48
49		>>> f.filter(data)
50		benzene <Mol: c1ccccc1>
51		bg <Mol: O=Cc1ccncc1-c1ccccc1>
52		Name: structure, dtype: object
53
54		>>> f.agg = all
55		>>> f.filter(data)
56		bg <Mol: O=Cc1ccncc1-c1ccccc1>
57		Name: structure, dtype: object
58		"""
59
60	1	def __init__(self, smarts, agg='any', merge_hs=True, n_jobs=1,
		0 ignored issues – show best-practice introduced 2016-08-18 20:05 UTC by Report Bug Copy Issue Report Too many arguments (6/5) Loading history...
61		verbose=True):
62
63		""" Initialize a `SMARTSFilter` object.
64
65		Args:
66		smarts (pd.Series or dict):
67		A series of SMARTS to use in the filter.
68		agg (str or callable):
69		Option specifying the mode of the filter:
70		- 'any': If any of the substructures are in molecule.
71		- 'all': If all of the substructures are in molecule.
72		n_jobs (int):
73		The number of processes to run the filter in.
74		verbose (bool):
75		Whether to output a progress bar.
76
77		"""
78
79	1	self.merge_hs = merge_hs
80
81	1	def read_smarts(s):
		0 ignored issues – show Coding Style Naming introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The name `s` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report This function should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
82	1	if isinstance(s, str):
83	1	return Mol.from_smarts(s, mergeHs=self.merge_hs)
		0 ignored issues – show Bug introduced 2016-08-18 20:05 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
84		else:
85	1	return s
86
87	1	self.smarts = pd.Series(smarts).apply(read_smarts)
88	1	super(SMARTSFilter, self).__init__(agg=agg, n_jobs=n_jobs,
89		verbose=verbose)
90
91	1	def _transform_mol(self, mol):
		0 ignored issues – show Bug introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report This method seems to be hidden by an attribute defined in `skchem.filters.base` on line 174. Loading history...
92	1	return self.smarts.apply(lambda smarts: smarts in mol).values
93
94	1	@property
95		def columns(self):
96	1	return self.smarts.index
97
98
99	1	class PAINSFilter(SMARTSFilter):
100
101		""" Whether a molecule passes the Pan Assay INterference (PAINS) filters.
102
103		These are supplied with RDKit, and were originally proposed by Baell et al.
104
105		Attributes:
106		_pains (pd.Series): a series of smarts template molecules.
107
108		References:
109		[The original paper](http://dx.doi.org/10.1021/jm901137j)
110
111		Examples:
112
113		Basic usage as a function on molecules:
114
115		>>> import skchem
116		>>> benzene = skchem.Mol.from_smiles('c1ccccc1', name='benzene')
117		>>> pf = skchem.filters.PAINSFilter()
118		>>> pf.transform(benzene)
119		True
120		>>> catechol = skchem.Mol.from_smiles('Oc1c(O)cccc1', name='catechol')
121		>>> pf.transform(catechol)
122		False
123
124		>>> res = pf.transform(catechol, agg=False)
125		>>> res[res]
126		names
127		catechol_A(92) True
128		Name: PAINSFilter, dtype: bool
129
130		More useful in combination with pandas DataFrames:
131
132		>>> data = [benzene, catechol]
133		>>> pf.transform(data)
134		benzene True
135		catechol False
136		dtype: bool
137
138		>>> pf.filter(data)
139		benzene <Mol: c1ccccc1>
140		Name: structure, dtype: object
141		"""
142
143	1	_pains = None
144
145	1	def __init__(self, n_jobs=1, verbose=True):
146
147		""" Initialize a `PAINSFilter` object.
148
149		Args:
150		n_jobs (int):
151		The number of procesess to run the filter in.
152		verbose (bool):
153		Whether to output a progress bar.
154		"""
155
156	1	super(PAINSFilter, self).__init__(self._load_pains(), agg='not any',
157		n_jobs=n_jobs, verbose=verbose)
158
159	1	@classmethod
160		def _load_pains(cls):
161
162		""" Load PAINS into a `pd.Series` and cache as class attribute. """
163
164	1	if cls._pains is None:
165	1	path = os.path.join(RDConfig.RDDataDir, 'Pains', 'wehi_pains.csv')
166	1	pains = pd.read_csv(path, names=['pains', 'names'])
167	1	pains['names'] = pains.names.str.lstrip('<regId=').str.rstrip('>')
168	1	pains = pains.set_index('names').pains.apply(Mol.from_smarts,
		0 ignored issues – show Bug introduced 2016-06-28 09:56 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smarts`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
169		mergeHs=True)
170	1	cls._pains = pains
171		return cls._pains
172

SMARTSFilter A
last analyzed 2016-09-01 14:43 UTC

Complexity

Size/Duplication

Test Coverage

Importance

4 Methods

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

richlewis42 / scikit-chem

SMARTSFilter A last analyzed 2016-09-01 14:43 UTC

Complexity

Size/Duplication

Test Coverage

Importance

4 Methods

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

SMARTSFilter A
last analyzed 2016-09-01 14:43 UTC

2. Missing init.py files

2. Missing init.py files