MullerAmesConverter.__init__() - Code Metrics - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

MullerAmesConverter.init() A
last analyzed 2016-09-01 14:43 UTC

↳ Parent: MullerAmesConverter

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	1
CRAP Score	10.7163

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
c	1
b	0
f	1
dl	0
loc	49
ccs	1
cts	20
cp	0.05
rs	9.2258
cc	3
crap	10.7163

#! /usr/bin/env python
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

import os
import zipfile
import logging
LOGGER = logging.getLogger(__name__)

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import skchem

from .base import Converter

from ... import standardizers


PATCHES = {
    '820-75-7': r'NNC(=O)CNC(=O)C=[N+]=[N-]',
    '2435-76-9': r'[N-]=[N+]=C1C=NC(=O)NC1=O',
    '817-99-2': r'NC(=O)CNC(=O)\C=[N+]=[N-]',
    '116539-70-9': r'CCCCN(CC(O)C1=C\C(=[N+]=[N-])\C(=O)C=C1)N=O',
    '115-02-6': r'NC(COC(=O)\C=[N+]=[N-])C(=O)O',
    '122341-55-3': r'NC(COC(=O)\C=[N+]=[N-])C(=O)O'
}

class MullerAmesConverter(Converter):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

    def __init__(self, directory, output_directory, output_filename='muller_ames.h5'):
class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)

        """
        Args:
            directory (str):
                Directory in which input files reside.
            output_directory (str):
                Directory in which to save the converted dataset.
            output_filename (str):
                Name of the saved dataset. Defaults to `muller_ames.h5`.

        Returns:
            tuple of str:
                Single-element tuple containing the path to the converted dataset.
        """

        zip_path = os.path.join(directory, 'ci900161g_si_001.zip')
        output_path = os.path.join(output_directory, output_filename)

        with zipfile.ZipFile(zip_path) as f:

            f.extractall()

        # create dataframe
        data = pd.read_csv(os.path.join(directory, 'smiles_cas_N6512.smi'),
                           delimiter='\t', index_col=1,
                           converters={1: lambda s: s.strip()},
                           header=None, names=['structure', 'id', 'is_mutagen'])

        data = self.patch_data(data, PATCHES)

        data['structure'] = data.structure.apply(skchem.Mol.from_smiles)


        data = self.standardize(data)

        data = self.optimize(data)

        keep = self.filter(data)


        ms, ys = keep.structure, keep.is_mutagen


        indices = data.reset_index().index.difference(keep.reset_index().index)

        train = self.parse_splits(os.path.join('splits_train_N6512.csv'))
        train = self.drop_indices(train, indices)
        splits = self.create_split_dict(train, 'train')

        test = self.parse_splits(os.path.join(directory, 'splits_test_N6512.csv'))
        test = self.drop_indices(test, indices)
        splits.update(self.create_split_dict(test, 'test'))

        self.run(ms, ys, output_path, splits=splits)

    def patch_data(self, data, patches):
class Foo:
    def some_method(self, x, y):
        return x + y;
        """ Patch smiles in a DataFrame with rewritten ones that specify diazo
        groups in rdkit friendly way. """

        LOGGER.info('Patching data...')
        for cas, smiles in patches.items():
            data.loc[cas, 'structure'] = smiles

        return data

    def parse_splits(self, f_path):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        LOGGER.info('Parsing splits...')
        with open(f_path) as f:

            splits = [split for split in f.read().strip().splitlines()]

        splits = [[n for n in split.strip().split(',')] for split in splits]
        splits = [sorted(int(n) for n in split) for split in splits] # sorted ints
        return [np.array(split) - 1 for split in splits] # zero based indexing

    def drop_indices(self, splits, indices):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        LOGGER.info('Dropping failed compounds from split indices...')
        for i, split in enumerate(splits):
            split = split - sum(split > ix for ix in indices)
            splits[i] = np.delete(split, indices)

        return splits

    def create_split_dict(self, splits, name):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return {'{}_{}'.format(name, i + 1): split \
                        for i, split in enumerate(splits)}

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    LOGGER.info('Converting Muller Ames Dataset...')
    MullerAmesConverter.convert()


MullerAmesConverter.init() A
last analyzed 2016-09-01 14:43 UTC

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1		#! /usr/bin/env python
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This module should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
2		#
3		# Copyright (C) 2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	import os
7	1	import zipfile
8	1	import logging
9	1	LOGGER = logging.getLogger(__name__)
10
11	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
12	1	import numpy as np
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13	1	import skchem
14
15	1	from .base import Converter
16
17	1	from ... import standardizers
		0 ignored issues – show Unused Code introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `standardizers` seems to be unused. Loading history...
18
19	1	PATCHES = {
20		'820-75-7': r'NNC(=O)CNC(=O)C=[N+]=[N-]',
21		'2435-76-9': r'[N-]=[N+]=C1C=NC(=O)NC1=O',
22		'817-99-2': r'NC(=O)CNC(=O)\C=[N+]=[N-]',
23		'116539-70-9': r'CCCCN(CC(O)C1=C\C(=[N+]=[N-])\C(=O)C=C1)N=O',
24		'115-02-6': r'NC(COC(=O)\C=[N+]=[N-])C(=O)O',
25		'122341-55-3': r'NC(COC(=O)\C=[N+]=[N-])C(=O)O'
26		}
27
28	1	class MullerAmesConverter(Converter):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This class should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
29
30	1	def __init__(self, directory, output_directory, output_filename='muller_ames.h5'):
		0 ignored issues – show Bug introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The `__init__` method of the super-class `Converter` is not called. It is generally advisable to initialize the super-class by calling its `__init__` method: class SomeParent: def __init__(self): self.x = 1 class SomeChild(SomeParent): def __init__(self): # Initialize the super class SomeParent.__init__(self) Loading history...
31
32		"""
33		Args:
34		directory (str):
35		Directory in which input files reside.
36		output_directory (str):
37		Directory in which to save the converted dataset.
38		output_filename (str):
39		Name of the saved dataset. Defaults to `muller_ames.h5`.
40
41		Returns:
42		tuple of str:
43		Single-element tuple containing the path to the converted dataset.
44		"""
45
46		zip_path = os.path.join(directory, 'ci900161g_si_001.zip')
47		output_path = os.path.join(output_directory, output_filename)
48
49		with zipfile.ZipFile(zip_path) as f:
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `f` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
50		f.extractall()
51
52		# create dataframe
53		data = pd.read_csv(os.path.join(directory, 'smiles_cas_N6512.smi'),
54		delimiter='\t', index_col=1,
55		converters={1: lambda s: s.strip()},
56		header=None, names=['structure', 'id', 'is_mutagen'])
57
58		data = self.patch_data(data, PATCHES)
59
60		data['structure'] = data.structure.apply(skchem.Mol.from_smiles)
		0 ignored issues – show Bug introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smiles`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
61
62		data = self.standardize(data)
		0 ignored issues – show Bug introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The Instance of `MullerAmesConverter` does not seem to have a member named `standardize`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
63		data = self.optimize(data)
		0 ignored issues – show Bug introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report The Instance of `MullerAmesConverter` does not seem to have a member named `optimize`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
64		keep = self.filter(data)
		0 ignored issues – show Bug introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The Instance of `MullerAmesConverter` does not seem to have a member named `filter`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
65
66		ms, ys = keep.structure, keep.is_mutagen
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `ms` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `ys` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
67
68		indices = data.reset_index().index.difference(keep.reset_index().index)
69
70		train = self.parse_splits(os.path.join('splits_train_N6512.csv'))
71		train = self.drop_indices(train, indices)
72		splits = self.create_split_dict(train, 'train')
73
74		test = self.parse_splits(os.path.join(directory, 'splits_test_N6512.csv'))
75		test = self.drop_indices(test, indices)
76		splits.update(self.create_split_dict(test, 'test'))
77
78		self.run(ms, ys, output_path, splits=splits)
79
80	1	def patch_data(self, data, patches):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
81		""" Patch smiles in a DataFrame with rewritten ones that specify diazo
82		groups in rdkit friendly way. """
83
84		LOGGER.info('Patching data...')
85		for cas, smiles in patches.items():
86		data.loc[cas, 'structure'] = smiles
87
88		return data
89
90	1	def parse_splits(self, f_path):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
91		LOGGER.info('Parsing splits...')
92		with open(f_path) as f:
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `f` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
93		splits = [split for split in f.read().strip().splitlines()]
94
95		splits = [[n for n in split.strip().split(',')] for split in splits]
96		splits = [sorted(int(n) for n in split) for split in splits] # sorted ints
97		return [np.array(split) - 1 for split in splits] # zero based indexing
98
99	1	def drop_indices(self, splits, indices):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
100		LOGGER.info('Dropping failed compounds from split indices...')
101		for i, split in enumerate(splits):
102		split = split - sum(split > ix for ix in indices)
103		splits[i] = np.delete(split, indices)
104
105		return splits
106
107	1	def create_split_dict(self, splits, name):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
108		return {'{}_{}'.format(name, i + 1): split \
109		for i, split in enumerate(splits)}
110
111	1	if __name__ == '__main__':
112		logging.basicConfig(level=logging.INFO)
113		LOGGER.info('Converting Muller Ames Dataset...')
114		MullerAmesConverter.convert()
115

richlewis42 / scikit-chem

MullerAmesConverter.__init__() A last analyzed 2016-09-01 14:43 UTC

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

MullerAmesConverter.init() A
last analyzed 2016-09-01 14:43 UTC

2. Missing init.py files

2. Missing init.py files