ord_neigh() - Code Metrics - Inspection of "fixed integer division errors for py2" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 5b00a3...5fddd3 )

by Rich

created 2016-08-25 15:11 UTC

ord_neigh() B

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	1	Features	0

Metric	Value
c	1
b	1
f	0
dl	0
loc	41
rs	8.8571
cc	3

#! /usr/bin/env python
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.features.descriptors.information

Information content indices.
"""

from functools import partial

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .decorators import requires_dmat, requires_h_filled
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3


def entropy(l):

    """ Entropy for a list.

    Examples:
        >>> entropy([1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3])  # doctest: +ELLIPSIS
        1.505...

        >>> entropy([1, 1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 8])   # doctest: +ELLIPSIS
        2.75...
    """
    n_envs = pd.value_counts(l).values
    l = n_envs / len(l)
    return sum(- l * np.log2(l))


@requires_h_filled
@requires_dmat
def _ic(mol, m):


    """ neighbourhood information content of orders 0 to n """

    res = np.zeros((m + 1,))

    if len(mol.atoms) == 0:
        return res

    n_atoms = len(mol.atoms)
    atomic = mol.atoms.atomic_number
    env = atomic.copy()
    d_mat = mol._dMat
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent

    for i in range(m + 1):
        shell_mat = d_mat == i
        arr = np.zeros((n_atoms, n_atoms))
        np.fill_diagonal(arr, env)
        arr[shell_mat] = np.tile(atomic, n_atoms)[shell_mat.flatten()]
        # hash the envs - string fastest immutable
        env = np.array([hash(np.sort(x).tostring()) for x in arr])
        res[i] = entropy(env)

    return np.array(res)


def ic(mol, m):


    """ Neighborhood Information Content of order m.


    The $m$th order neighbourhood Information Content $IC_m$ is calculated as

    $$ IC_m = - \sum{g=1}{G}{\log_2{\frac{A_g}{A}}} = - \sum{g=1}{G}{p_g \cdot \log_2{p_g} $$

    where $G$ is the number of equivalence classes and $A_g$ is the cardinality
    of the $g$th equivalence class, and $p_g$ is the probability of randomly
    selecting a vertex of the $g$th class.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of the environments to use.

    Returns:
        float

    Note:
        The function memoizes orders below 7.

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> ic(mol, 0)  # doctest: +ELLIPSIS
        0.918...

        >>> ic(mol, 1)  # doctest: +ELLIPSIS
        1.375...

        >>> ic(mol, 2)  # doctest: +ELLIPSIS
        1.871...

        >>> ic(mol, 3)  # doctest: +ELLIPSIS
        2.422...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766
    """

    if m > 6:
        return _ic(mol, m)[m]

    if not hasattr(mol, '_ic'):
        mol._ic = _ic(mol, 6)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent

    return mol._ic[m]
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent


@requires_h_filled
def tic(mol, m):


    """ Neighborhood Total Information Content of order m.


    The $m$th order total neighbourhood Information Content $TIC_m$
    is defined as:

    $$ IC_m = A \cdot IC_m $$

    where $A$ is the number of graph vertices.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of the environments to use.

    Returns:
        float

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> tic(mol, 0)  # doctest: +ELLIPSIS
        13.774...

        >>> tic(mol, 1)  # doctest: +ELLIPSIS
        20.629...

        >>> tic(mol, 2)  # doctest: +ELLIPSIS
        28.074...

        >>> tic(mol, 3)  # doctest: +ELLIPSIS
        36.338...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766
    """

    return len(mol.atoms) * ic(mol, m)

@requires_h_filled
def sic(mol, m):


    """ Structural Information Content of order *m*.


    The m'th order SIC_m is defined in a normalized form of the information
    content to delete the influence of graph size.

    $$ SIC_m = \frac{IC_m}{\log_2{A}} $$

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of sugraphs to use.

    Returns:
        float

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> sic(mol, 0)  # doctest: +ELLIPSIS
        0.235...

        >>> sic(mol, 1)  # doctest: +ELLIPSIS
        0.352...

        >>> sic(mol, 2)  # doctest: +ELLIPSIS
        0.479...

        >>> sic(mol, 3)  # doctest: +ELLIPSIS
        0.620...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766

    """

    n_atoms = len(mol.atoms)
    if n_atoms <= 1:
        return 0.0
    return ic(mol, m) / np.log2(n_atoms)


@requires_h_filled
def bic(mol, m):


    """ Bonding Information Content of order $m$.


    The $m$th order $BIC_m$ is defined in a normalized form as the $SIC_m$, but
    taking into account the number of edges and their multiplicity,

    $$ BIC_m = \frac{IC_m}{\log{2}{(\sum{b=1}{B} \pi_b^{*})}} $$

    where B is the number of edges and $\pi_b^{*} is the conventional bond
    order of the edge $b$. In the original definition, the denominator was
    simply considered to be the edge number $B$.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of the sugraphs to use.

    Returns:
        float

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> bic(mol, 0)  # doctest: +ELLIPSIS
        0.235...

        >>> bic(mol, 1)  # doctest: +ELLIPSIS
        0.352...

        >>> bic(mol, 2)  # doctest: +ELLIPSIS
        0.479...

        >>> bic(mol, 3)  # doctest: +ELLIPSIS
        0.620...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766

    """

    return ic(mol, m) / np.log2(sum(mol.bonds.order))


@requires_h_filled
def cic(mol, m):


    """ Complementary Information Content of order $m$.


    The $m$th order $CIC_m$ measures the deviation of $IC_m$ from its maximum
    value, that corresponds to the vertex partition into equivalence classes
    containing one element each:

    $$ CIC_m = \log{2}{A} - IC_m $$

    where $A$ is the number of graph vertices.

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of sugraphs to use.

    Returns:
        float

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> cic(mol, 0)  # doctest: +ELLIPSIS
        2.988...

        >>> cic(mol, 1)  # doctest: +ELLIPSIS
        2.531...

        >>> cic(mol, 2)  # doctest: +ELLIPSIS
        2.035...

        >>> cic(mol, 3)  # doctest: +ELLIPSIS
        1.484...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766
    """

    n_atoms = len(mol.atoms)
    if n_atoms <= 1:
        return 0.0
    return np.log2(n_atoms) - ic(mol, m)


def ric(mol, m):


    """ The redundant information content.


    A measure of relative redundancy of a graph obtained by normalizing the
    complementary information content, defined as:

    $$ R_m = \frac{CIC_m}{\log{2}{A}} = 1 - SIC_m $$

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

        m (int):
            The order of sugraphs to use.

    Returns:
        float

    Examples:

        Extrapolated from MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> ric(mol, 0)  # doctest: +ELLIPSIS
        -1.988...

        >>> ric(mol, 1)  # doctest: +ELLIPSIS
        -1.531...

        >>> ric(mol, 2)  # doctest: +ELLIPSIS
        -1.035...

        >>> ric(mol, 3)  # doctest: +ELLIPSIS
        -0.484...

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766

    """

    return 1 - cic(mol, m)


def ord_neigh(mol):

    """ The order of neighbourhood.


    The order of neighbourhood is defined as the order $m$ of the $IC_m$ index
    when it reaches the maximum value:

    \DeclareMathOperator*{\argmax}{arg\,max}

    $$ O = \min \Big( \argmax_m IC_m \Big) $$

    Args:
        mol (skchem.Mol):
            The molecule for which to calculate the descriptor.

    Returns:
        int

    Examples:

        From MDC:

        >>> import skchem
        >>> mol = skchem.Mol.from_smiles('CC(C)=CC')
        >>> ord_neigh(mol)
        3

    References:
        Molecular Descriptors for Chemoinformatics, pp 408-411
        doi:10.1002/9783527628766

    """

    if not hasattr(mol, '_ic'):
        mol._ic = _ic(mol, 6)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent

    res = np.argmax(mol._ic)
class MyParent:
    def __init__(self):
        self._x = 1;
        self.y = 2;

class MyChild(MyParent):
    def some_method(self):
        return self._x    # Ok, since accessed from a child class

class AnotherClass:
    def some_method(self, instance_of_my_child):
        return instance_of_my_child._x   # Would be flagged as AnotherClass is not
                                         # a child class of MyParent
    if res == 6:
        return np.argmax(_ic(mol, 20))
    else:
        return res


DESCRIPTORS = {'ic_{}'.format(i): partial(ic, m=i) for i in range(7)}
DESCRIPTORS.update({'tic_{}'.format(i): partial(ic, m=i) for i in range(7)})
DESCRIPTORS.update({'sic_{}'.format(i): partial(sic, m=i) for i in range(7)})
DESCRIPTORS.update({'cic_{}'.format(i): partial(cic, m=i) for i in range(7)})
DESCRIPTORS.update({'bic_{}'.format(i): partial(bic, m=i) for i in range(7)})
DESCRIPTORS.update({'ric_{}'.format(i): partial(ric, m=i) for i in range(7)})
DESCRIPTORS['ord_neigh'] = ord_neigh

__all__ = ['ic', 'tic','sic', 'cic', 'bic', 'ric', 'ord_neigh', 'DESCRIPTORS']


Push — master ( 5b00a3...5fddd3 )

ord_neigh() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2016 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			# skchem.features.descriptors.information
8
9			Information content indices.
10			"""
11
12			from functools import partial
13
14			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-08-25 15:25 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
15			import numpy as np
			0 ignored issues – show Configuration introduced 2016-08-25 15:25 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16
17			from .decorators import requires_dmat, requires_h_filled
			0 ignored issues – show Configuration introduced 2016-08-25 15:25 UTC by Report Bug Copy Issue Report Unable to import 'decorators' (invalid syntax (<string>, line 107)) This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
18
19
20			def entropy(l):
			0 ignored issues – show Coding Style Naming introduced 2016-08-25 15:25 UTC by Report Bug Copy Issue Report The name `l` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
21			""" Entropy for a list.
22
23			Examples:
24			>>> entropy([1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3]) # doctest: +ELLIPSIS
25			1.505...
26
27			>>> entropy([1, 1, 1, 1, 2, 3, 4, 5, 5, 6, 7, 7, 8, 8, 8, 8]) # doctest: +ELLIPSIS
28			2.75...
29			"""
30			n_envs = pd.value_counts(l).values
31			l = n_envs / len(l)
32			return sum(- l * np.log2(l))
33
34
35			@requires_h_filled
36			@requires_dmat

richlewis42 / scikit-chem

Push — master ( 5b00a3...5fddd3 )

ord_neigh() B

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files