Split.contiguous() - Code Metrics - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Split.contiguous() A
last analyzed 2016-09-01 14:43 UTC

↳ Parent: Split

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	1
CRAP Score	8.6667

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	0
loc	8
ccs	1
cts	7
cp	0.1429
rs	9.4285
cc	3
crap	8.6667

#! /usr/bin/env python
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.data.converters.base

Defines the base converter class.
"""

import warnings
import logging
import os
from collections import namedtuple

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import h5py
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from fuel.datasets import H5PYDataset
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from ... import forcefields
from ... import filters
from ... import features
from ... import standardizers
from ... import pipeline

logger = logging.getLogger(__name__)



def default_pipeline():
    """ Return a default pipeline to be used for general datasets. """
    return pipeline.Pipeline([
        standardizers.ChemAxonStandardizer(keep_failed=True, warn_on_fail=False),
        forcefields.UFF(add_hs=True, warn_on_fail=False),
        filters.OrganicFilter(),
        filters.AtomNumberFilter(above=5, below=100, include_hydrogens=True),
        filters.MassFilter(below=1000)
    ])

DEFAULT_PYTABLES_KW = {
    'complib': 'bzip2',
    'complevel': 9
}

def contiguous_order(to_order, splits):
    """ Determine a contiguous order from non-overlapping splits, and put data in that order.

    Args:
        to_order (iterable<pd.Series, pd.DataFrame, pd.Panel>):
            The pandas objects to put in contiguous order.
        splits (iterable<pd.Series>):
            The non-overlapping splits, as boolean masks.

    Returns:
        iterable<pd.Series, pd.DataFrame, pd.Panel>: The data in contiguous order.
    """

    member = pd.Series(0, index=splits[0].index)
    for i, split in enumerate(splits):
        member[split] = i
    idx = member.sort_values().index
    return (order.reindex(idx) for order in to_order)

Feature = namedtuple('Feature', ['fper', 'key', 'axis_names'])


def default_features():
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
    return (
        Feature(fper=features.MorganFeaturizer(),
                key='X_morg',
                axis_names=['batch', 'features']),
        Feature(fper=features.PhysicochemicalFeaturizer(),
                key='X_pc',
                axis_names=['batch', 'features']),
        Feature(fper=features.AtomFeaturizer(max_atoms=100),

                key='A',
                axis_names=['batch', 'atom_idx', 'features']),
        Feature(fper=features.GraphDistanceTransformer(max_atoms=100),

                key='G',
                axis_names=['batch', 'atom_idx', 'atom_idx']),
        Feature(fper=features.SpacialDistanceTransformer(max_atoms=100),

                key='G_d',
                axis_names=['batch', 'atom_idx', 'atom_idx']),
        Feature(fper=features.ChemAxonFeaturizer(features='all'),
                key='X_cx',
                axis_names=['batch', 'features']),
        Feature(fper=features.ChemAxonAtomFeaturizer(features='all', max_atoms=100),

                key='A_cx',
                axis_names=['batch', 'atom_idx', 'features'])
    )


class Split(object):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

    def __init__(self, mask, name, converter):
        self.mask = mask
        self.name = name
        self.converter = converter

    @property
    def contiguous(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        diff = np.ediff1d(self.mask.astype(int))
        if self.mask.iloc[0] != 0:
            diff[0] = 1
        if self.mask.iloc[-1] != 0:
            diff[-1] = -1
        return sum(diff == -1) == 1 or sum(diff == 1) == 1

    @property
    def indices(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return np.nonzero(self.mask)[0]

    def save(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        self.converter.data_file[self.name + '_indices'] = self.indices
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.mask.to_hdf(self.converter.data_file.filename, '/indices/' + self.name)

    @property
    def ref(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.converter.data_file[self.name + '_indices'].ref

    def to_dict(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        idx = self.indices
        if self.contiguous:
            low, high = min(idx), max(idx)
            return {source: (low, high) for source in self.converter.source_names}
        else:
            return {source: (-1, -1, self.ref) for source in self.converter.source_names}


class Converter(object):
    """ Create a fuel dataset from molecules and targets. """

    def __init__(self, directory, output_directory, output_filename='default.h5'):

        raise NotImplemented


    def run(self, ms, y, output_path, splits=None, features=None, pytables_kws=DEFAULT_PYTABLES_KW):
# Bad:
# If array_param is modified inside the function, the next invocation will
# receive the modified object.
def some_function(array_param=[]):
    # ...

# Better: Create an array on each invocation
def some_function(array_param=None):
    array_param = array_param or []
    # ...

        """
           Args:
        ms (pd.Series):
            The molecules of the dataset.
        ys (pd.Series or pd.DataFrame):
            The target labels of the dataset.
        output_path (str):
            The path to which the dataset should be saved.
        features (list[Feature]):
            The features to calculate. Defaults are used if `None`.
        splits (iterable<(name, split)>):
            An iterable of name, split tuples. Splits are provided as boolean arrays of the whole data.

        """

        self.output_path = output_path
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.pytables_kws = pytables_kws
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.features = features if features is not None else default_features()
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.feature_names = [feat.key for feat in self.features]
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.task_names = ['y']
class Foo:
    def __init__(self, x=None):
        self.x = x
        self.splits = [Split(split, name, self) for name, split in splits]
class Foo:
    def __init__(self, x=None):
        self.x = x

        self.create_file(output_path)

        self.save_splits()
        self.save_molecules(ms)
        self.save_targets(y)
        self.save_features(ms)

    @property
    def source_names(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.feature_names + self.task_names

    @property
    def split_names(self):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return self.splits

    def create_file(self, path):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        logger.info('Creating h5 file at %s...', self.output_path)
        self.data_file = h5py.File(path, 'w')
class Foo:
    def __init__(self, x=None):
        self.x = x
        return self.data_file

    def save_molecules(self, mols):

        """ Save the molecules to the data file. """

        logger.info('Writing molecules to file...')
        logger.debug('Writing %s molecules to %s', len(mols), self.data_file.filename)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            mols.to_hdf(self.data_file.filename, 'structure', **self.pytables_kws)
            mols.apply(lambda m: m.to_smiles().encode('utf-8')).to_hdf(self.data_file.filename, 'smiles')


    def save_frame(self, data, name, prefix='targets'):

        """ Save the a frame to the data file. """

        logger.info('Writing %s', name)
        logger.debug('Writing data of shape %s to %s', data.shape, self.data_file.filename)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            if len(data.shape) > 2:
                data = data.transpose(2, 1, 0)  # panel serializes backwards for some reason...
            data.to_hdf(self.data_file.filename,
                        key='/{prefix}/{name}'.format(prefix=prefix, name=name),
                        **self.pytables_kws)

        if isinstance(data, pd.Series):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.index.name

        elif isinstance(data, pd.DataFrame):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.index.name
            self.data_file[name].dims[1].label = data.columns.name

        elif isinstance(data, pd.Panel):
            self.data_file[name] = h5py.SoftLink('/{prefix}/{name}/block0_values'.format(prefix=prefix, name=name))

            self.data_file[name].dims[0].label = data.minor_axis.name # as panel serializes backwards

            self.data_file[name].dims[1].label = data.major_axis.name
            self.data_file[name].dims[2].label = data.items.name

    def save_targets(self, y):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        self.save_frame(y, name='y', prefix='targets')

    def save_features(self, ms):


        """ Save all features for the dataset. """
        logger.debug('Saving features')
        for feat in self.features:
            self._save_feature(ms, feat)

    def _save_feature(self, ms, feat):


        """ Calculate and save a feature to the data file. """
        logger.info('Calculating %s', feat.key)

        fps = feat.fper.transform(ms)
        self.save_frame(fps, name=feat.key, prefix='feats')

    def save_splits(self):

        """ Save the splits to the data file. """

        logger.info('Producing dataset splits...')
        for split in self.splits:
            split.save()
        split_dict = {split.name: split.to_dict() for split in self.splits}
        splits = H5PYDataset.create_split_array(split_dict)
        logger.debug('split: %s', splits)
        logger.info('Saving splits...')
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            self.data_file.attrs['split'] = splits

    @classmethod
    def convert(cls, **kwargs):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        kwargs.setdefault('directory', os.getcwd())
        kwargs.setdefault('output_directory', os.getcwd())

        return cls(**kwargs).output_path,

    @classmethod
    def fill_subparser(cls, subparser):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return cls.convert


Split.contiguous() A
last analyzed 2016-09-01 14:43 UTC

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1		#! /usr/bin/env python
2		#
3		# Copyright (C) 2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	"""
7		# skchem.data.converters.base
8
9		Defines the base converter class.
10		"""
11
12	1	import warnings
13	1	import logging
14	1	import os
15	1	from collections import namedtuple
16
17	1	import numpy as np
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
18	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
19	1	import h5py
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `h5py` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
20	1	from fuel.datasets import H5PYDataset
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `fuel.datasets` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
21
22	1	from ... import forcefields
23	1	from ... import filters
24	1	from ... import features
25	1	from ... import standardizers
26	1	from ... import pipeline
27
28	1	logger = logging.getLogger(__name__)
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `logger` does not conform to the constant naming conventions (`(([A-Z_][A-Z0-9_])\|(__.__))$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
29
30
31	1	def default_pipeline():
32		""" Return a default pipeline to be used for general datasets. """
33		return pipeline.Pipeline([
34		standardizers.ChemAxonStandardizer(keep_failed=True, warn_on_fail=False),
35		forcefields.UFF(add_hs=True, warn_on_fail=False),

richlewis42 / scikit-chem

Split.contiguous() A last analyzed 2016-09-01 14:43 UTC

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

Split.contiguous() A
last analyzed 2016-09-01 14:43 UTC

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files