Split - Code Metrics - Inspection of "improved PY2 compat" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 6e14ce...512eb0 )

by Rich

created 2016-08-05 16:19 UTC

Split A

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	37
Duplicated Lines	0 %

Importance

Changes

Metric	Value
wmc	12
c	0
b	0
f	0
dl	0
loc	37
rs	10

6 Methods

Rating	Name	Size	Complexity
A	ref()	3	1
A	indices()	3	1
A	__init__()	4	1
A	to_dict()	7	4
A	contiguous()	8	3
A	save()	5	2

#! /usr/bin/env python
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

import warnings
import logging
import os
from collections import namedtuple

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import h5py
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from fuel.datasets import H5PYDataset
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from ... import forcefields
from ... import filters
from ... import descriptors
from ... import standardizers
from ... import pipeline

logger = logging.getLogger(__name__)



def default_pipeline():
    """ Return a default pipeline to be used for general datasets. """
    return pipeline.Pipeline([
        standardizers.ChemAxonStandardizer(keep_failed=True, warn_on_fail=False),
        forcefields.UFF(add_hs=True, warn_on_fail=False),
        filters.OrganicFilter(),
        filters.AtomNumberFilter(above=5, below=100, include_hydrogens=True),
        filters.MassFilter(below=1000)
    ])

DEFAULT_PYTABLES_KW = {
    'complib': 'bzip2',
    'complevel': 9
}

def contiguous_order(to_order, splits):
    """ Determine a contiguous order from non-overlapping splits, and put data in that order.

    Args:
        to_order (iterable<pd.Series, pd.DataFrame, pd.Panel>):
            The pandas objects to put in contiguous order.
        splits (iterable<pd.Series>):
            The non-overlapping splits, as boolean masks.

    Returns:
        iterable<pd.Series, pd.DataFrame, pd.Panel>: The data in contiguous order.
    """

    member = pd.Series(0, index=splits[0].index)
    for i, split in enumerate(splits):
        member[split] = i
    idx = member.sort_values().index
    return (order.reindex(idx) for order in to_order)

Feature = namedtuple('Feature', ['fper', 'key', 'axis_names'])


def default_features():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    return (
        Feature(fper=descriptors.MorganFeaturizer(),
                key='X_morg',
                axis_names=['batch', 'features']),
        Feature(fper=descriptors.PhysicochemicalFeaturizer(),
                key='X_pc',
                axis_names=['batch', 'features']),
        Feature(fper=descriptors.AtomFeaturizer(max_atoms=100),
                key='A',
                axis_names=['batch', 'atom_idx', 'features']),
        Feature(fper=descriptors.GraphDistanceTransformer(max_atoms=100),
                key='G',
                axis_names=['batch', 'atom_idx', 'atom_idx']),
        Feature(fper=descriptors.SpacialDistanceTransformer(max_atoms=100),
                key='G_d',
                axis_names=['batch', 'atom_idx', 'atom_idx']),
        Feature(fper=descriptors.ChemAxonFeaturizer(features='all'),
                key='X_cx',
                axis_names=['batch', 'features']),
        Feature(fper=descriptors.ChemAxonAtomFeaturizer(features='all', max_atoms=100),
                key='A_cx',
                axis_names=['batch', 'atom_idx', 'features'])
    )


class Split(object):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

    def __init__(self, mask, name, converter):
        self.mask = mask
        self.name = name
        self.converter = converter

    @property
    def contiguous(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        diff = np.ediff1d(self.mask)
        if self.mask.iloc[0] != 0:
            diff[0] = 1
        if self.mask.iloc[-1] != 0:
            diff[-1] = -1
        return sum(diff == -1) == 1 or sum(diff == 1) == 1

    @property
    def indices(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return np.nonzero(self.mask)[0]

    def save(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        self.converter.data_file[self.name + '_indices'] = self.indices
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.mask.to_hdf(self.converter.data_file.filename, '/indices/' + self.name)

    @property
    def ref(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.converter.data_file[self.name + '_indices'].ref

    def to_dict(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        idx = self.indices
        if self.contiguous:
            low, high = min(idx), max(idx)
            return {source: (low, high) for source in self.converter.source_names}
        else:
            return {source: (-1, -1, self.ref) for source in self.converter.source_names}


class Converter(object):
    """ Create a fuel dataset from molecules and targets. """

    def __init__(self, directory, output_directory, output_filename='default.h5'):

        raise NotImplemented


    def run(self, ms, y, output_path, splits=None, features=None, pytables_kws=DEFAULT_PYTABLES_KW):
# Bad:
# If array_param is modified inside the function, the next invocation will
# receive the modified object.
def some_function(array_param=[]):
    # ...

# Better: Create an array on each invocation
def some_function(array_param=None):
    array_param = array_param or []
    # ...

        """
           Args:
        ms (pd.Series):
            The molecules of the dataset.
        ys (pd.Series or pd.DataFrame):
            The target labels of the dataset.
        output_path (str):
            The path to which the dataset should be saved.
        features (list[Feature]):
            The features to calculate. Defaults are used if `None`.
        splits (iterable<(name, split)>):
            An iterable of name, split tuples. Splits are provided as boolean arrays of the whole data.

        """

        self.pytables_kws = pytables_kws
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.output_path = output_path
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.features = features if features is not None else default_features()
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.feature_names = [feat.key for feat in self.features]
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.task_names = ['y']
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.splits = [Split(split, name, self) for name, split in splits]
class Foo:
    def __init__(self, x=None):
        self.x = x

        self.create_file(output_path)

        self.save_splits()
        self.save_molecules(ms)
        self.save_targets(y)
        self.save_features(ms)

    @property
    def source_names(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.feature_names + self.task_names

    @property
    def split_names(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.splits

    def create_file(self, path):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        logger.info('Creating h5 file at %s...', self.output_path)
        self.data_file = h5py.File(path, 'w')
class Foo:
    def __init__(self, x=None):
        self.x = x
        return self.data_file

    def save_molecules(self, mols):

        """ Save the molecules to the data file. """

        logger.info('Writing molecules to file...')
        logger.debug('Writing %s molecules to %s', len(mols), self.data_file.filename)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            mols.to_hdf(self.data_file.filename, 'structure', **self.pytables_kws)
            mols.apply(lambda m: m.to_smiles().encode('utf-8')).to_hdf(self.data_file.filename, 'smiles')


    def save_frame(self, data, name, prefix='targets'):

        """ Save the a frame to the data file. """

        logger.info('Writing %s', name)
        logger.debug('Writing data of shape %s to %s', data.shape, self.data_file.filename)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            if len(data.shape) > 2:
                data = data.transpose(2, 1, 0)  # panel serializes backwards for some reason...
            data.to_hdf(self.data_file.filename,
                        key='/{prefix}/{name}'.format(prefix=prefix, name=name),
                        **self.pytables_kws)

        if isinstance(data, pd.Series):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.index.name

        elif isinstance(data, pd.DataFrame):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.index.name
            self.data_file[name].dims[1].label = data.columns.name

        elif isinstance(data, pd.Panel):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.minor_axis.name # as panel serializes backwards

            self.data_file[name].dims[1].label = data.major_axis.name
            self.data_file[name].dims[2].label = data.items.name

    def save_targets(self, y):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        self.save_frame(y, name='y', prefix='targets')

    def save_features(self, ms):


        """ Save all features for the dataset. """
        logger.debug('Saving features')
        for feat in self.features:
            self._save_feature(ms, feat)

    def _save_feature(self, ms, feat):


        """ Calculate and save a feature to the data file. """
        logger.info('Calculating %s', feat.key)

        fps = feat.fper.transform(ms)
        self.save_frame(fps, name=feat.key, prefix='feats')

    def save_splits(self):

        """ Save the splits to the data file. """

        logger.info('Producing dataset splits...')
        for split in self.splits:
            split.save()
        split_dict = {split.name: split.to_dict() for split in self.splits}
        splits = H5PYDataset.create_split_array(split_dict)
        logger.debug('split: %s', splits)
        logger.info('Saving splits...')
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.data_file.attrs['split'] = splits

    @classmethod
    def convert(cls, **kwargs):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        kwargs.setdefault('directory', os.getcwd())
        kwargs.setdefault('output_directory', os.getcwd())

        return cls(**kwargs).output_path,

    @classmethod
    def fill_subparser(cls, subparser):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return cls.convert


Push — master ( 6e14ce...512eb0 )

Split A

Complexity

Size/Duplication

Importance

6 Methods

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
			0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This module should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
2			#
3			# Copyright (C) 2016 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			import warnings
7			import logging
8			import os
9			from collections import namedtuple
10
11			import numpy as np
			0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
12			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13			import h5py
			0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `h5py` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
14			from fuel.datasets import H5PYDataset
			0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `fuel.datasets` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
15
16			from ... import forcefields
17			from ... import filters
18			from ... import descriptors
19			from ... import standardizers
20			from ... import pipeline
21
22			logger = logging.getLogger(__name__)
			0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `logger` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
23
24
25			def default_pipeline():
26			""" Return a default pipeline to be used for general datasets. """
27			return pipeline.Pipeline([
28			standardizers.ChemAxonStandardizer(keep_failed=True, warn_on_fail=False),
29			forcefields.UFF(add_hs=True, warn_on_fail=False),
30			filters.OrganicFilter(),
31			filters.AtomNumberFilter(above=5, below=100, include_hydrogens=True),
32			filters.MassFilter(below=1000)
33			])
34

richlewis42 / scikit-chem

Push — master ( 6e14ce...512eb0 )

Split A

Complexity

Size/Duplication

Importance

6 Methods

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files