Tox21Converter - Code Metrics - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Tox21Converter A
last analyzed 2016-09-01 14:43 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	105
Duplicated Lines	0 %

Test Coverage

Coverage

14.06%

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
wmc	12
c	1
b	0
f	1
dl	0
loc	105
ccs	9
cts	64
cp	0.1406
rs	10

8 Methods

Rating	Name	Size	Complexity
B	__init__()	32	2
A	extract()	12	4
A	patch_test()	9	1
A	fix_id()	3	1
A	read_valid()	7	1
A	read_test()	13	1
A	fix_assay_name()	3	1
A	read_train()	14	1

#! /usr/bin/env python
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
## skchem.data.transformers.tox21

Module defining transformation techniques for tox21.
"""

import zipfile
import os
import logging
LOGGER = logging.getLogger(__name__)

import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .base import Converter, default_pipeline
from ... import io
from ... import core

class Tox21Converter(Converter):

    """ Class to build tox21 dataset.

    """
    def __init__(self, directory, output_directory, output_filename='tox21.h5'):
class SomeParent:
    def __init__(self):
        self.x = 1

class SomeChild(SomeParent):
    def __init__(self):
        # Initialize the super class
        SomeParent.__init__(self)

        output_path = os.path.join(output_directory, output_filename)

        # extract data
        train, valid, test = self.extract(directory)

        # read data
        train = self.read_train(train)
        valid = self.read_valid(valid)
        test = self.read_test(test, os.path.join(directory, 'test.txt'))

        # combine into full dataset
        data = pd.concat([train, valid, test], keys=['train', 'valid', 'test']).sort_index()
        data.index.names = 'ds', 'id'

        ms, y = data.structure, data.drop('structure', axis=1)


        pipeline = default_pipeline()
        ms, y = pipeline.transform_filter(ms, y)


        # generate splits
        ms, y = ms.reset_index(0), y.reset_index(0)

        split_arr = ms.pop('ds')
        y.pop('ds')

        splits = [(split, split_arr == split) for split in ('train', 'valid', 'test')]

        y.columns.name = 'tasks'

        # call the Converter to make the final dataset
        self.run(ms, y, output_path, splits=splits)

    @staticmethod
    def fix_id(s):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return s.split('-')[0]

    @staticmethod
    def fix_assay_name(s):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        return s.replace('-', '_')

    @staticmethod
    def patch_test(test):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
        test_1 = pd.Series({
            'structure': core.Mol.from_smiles('FC(F)(F)c1[nH]c(c(C#N)c1Br)C1=CC=C(Cl)C=C1', name='NCGC00357062'),

            'stochiometry': 0,
            'Compound ID': 'NCGC00357062',
            'Sample ID': 'NCGC00357062-01'}, name='NCGC00357062')
        test['NCGC00357062'] = test_1
        return test

    def read_train(self, train):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        train = io.read_sdf(train)
        train.columns = train.columns.to_series().apply(self.fix_assay_name)
        train.index = train.index.to_series().apply(self.fix_id)
        self.assays = train.columns[-12:]
        self.keep_cols = ['structure'] + self.assays.tolist()
        train[self.assays] = train[self.assays].astype(float)
        train = train[self.keep_cols]
        train = train.sort_index()
        ms = train.structure[~train.index.duplicated()]

        train = train[self.assays].groupby(train.index).max()
        train = ms.to_frame().join(train)
        return train

    def read_valid(self, valid):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        valid = io.read_sdf(valid)
        valid.columns = valid.columns.to_series().apply(self.fix_assay_name)
        valid = valid[self.keep_cols]
        valid[self.assays] = valid[self.assays].astype(float)
        return valid

    def read_test(self, test, test_data):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        test = io.read_sdf(test)
        test = self.patch_test(test)
        test_data = pd.read_table(test_data)
        test_data['Sample ID'] = test_data['Sample ID'].apply(self.fix_id)
        test = test.join(test_data.set_index('Sample ID'))

        test.columns = test.columns.to_series().apply(self.fix_assay_name)
        test = test[self.keep_cols]
        test[test == 'x'] = np.nan
        test[self.assays] = test[self.assays].astype(float)
        return test

    def extract(self, directory):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        with zipfile.ZipFile(os.path.join(directory, 'train.sdf.zip')) as f:

            train = f.extract('tox21_10k_data_all.sdf')

        with zipfile.ZipFile(os.path.join(directory, 'valid.sdf.zip')) as f:

            valid = f.extract('tox21_10k_challenge_test.sdf')

        with zipfile.ZipFile(os.path.join(directory, 'test.sdf.zip')) as f:

            test = f.extract('tox21_10k_challenge_score.sdf')

        return train, valid, test

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    LOGGER.info('Converting Tox21 Dataset...')
    Tox21Converter.convert()


1		#! /usr/bin/env python
2		#
3		# Copyright (C) 2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	"""
7		## skchem.data.transformers.tox21
8
9		Module defining transformation techniques for tox21.
10		"""
11
12	1	import zipfile
13	1	import os
14	1	import logging
15	1	LOGGER = logging.getLogger(__name__)
16
17	1	import numpy as np
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
18	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
19
20	1	from .base import Converter, default_pipeline
21	1	from ... import io
22	1	from ... import core
23
24	1	class Tox21Converter(Converter):
25
26		""" Class to build tox21 dataset.
27
28		"""
29	1	def __init__(self, directory, output_directory, output_filename='tox21.h5'):
		0 ignored issues – show Bug introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The `__init__` method of the super-class `Converter` is not called. It is generally advisable to initialize the super-class by calling its `__init__` method: class SomeParent: def __init__(self): self.x = 1 class SomeChild(SomeParent): def __init__(self): # Initialize the super class SomeParent.__init__(self) Loading history...
30
31		output_path = os.path.join(output_directory, output_filename)
32
33		# extract data
34		train, valid, test = self.extract(directory)
35
36		# read data
37		train = self.read_train(train)
38		valid = self.read_valid(valid)
39		test = self.read_test(test, os.path.join(directory, 'test.txt'))
40
41		# combine into full dataset
42		data = pd.concat([train, valid, test], keys=['train', 'valid', 'test']).sort_index()
43		data.index.names = 'ds', 'id'
44
45		ms, y = data.structure, data.drop('structure', axis=1)
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `ms` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `y` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
46
47		pipeline = default_pipeline()
48		ms, y = pipeline.transform_filter(ms, y)
		0 ignored issues – show Coding Style Naming introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report The name `ms` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report The name `y` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
49
50		# generate splits
51		ms, y = ms.reset_index(0), y.reset_index(0)
		0 ignored issues – show Coding Style Naming introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report The name `ms` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2016-08-05 16:20 UTC by Report Bug Copy Issue Report The name `y` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
52		split_arr = ms.pop('ds')
53		y.pop('ds')
54
55		splits = [(split, split_arr == split) for split in ('train', 'valid', 'test')]
56
57		y.columns.name = 'tasks'
58
59		# call the Converter to make the final dataset
60		self.run(ms, y, output_path, splits=splits)
61
62	1	@staticmethod
63		def fix_id(s):
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `s` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
64		return s.split('-')[0]
65
66	1	@staticmethod
67		def fix_assay_name(s):
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `s` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
68		return s.replace('-', '_')
69
70	1	@staticmethod
71		def patch_test(test):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
72		test_1 = pd.Series({
73		'structure': core.Mol.from_smiles('FC(F)(F)c1[nH]c(c(C#N)c1Br)C1=CC=C(Cl)C=C1', name='NCGC00357062'),
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (113/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history... Bug introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smiles`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
74		'stochiometry': 0,
75		'Compound ID': 'NCGC00357062',
76		'Sample ID': 'NCGC00357062-01'}, name='NCGC00357062')
77		test['NCGC00357062'] = test_1
78		return test
79
80	1	def read_train(self, train):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
81
82		train = io.read_sdf(train)
83		train.columns = train.columns.to_series().apply(self.fix_assay_name)
84		train.index = train.index.to_series().apply(self.fix_id)
85		self.assays = train.columns[-12:]
86		self.keep_cols = ['structure'] + self.assays.tolist()
87		train[self.assays] = train[self.assays].astype(float)
88		train = train[self.keep_cols]
89		train = train.sort_index()
90		ms = train.structure[~train.index.duplicated()]
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `ms` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
91		train = train[self.assays].groupby(train.index).max()
92		train = ms.to_frame().join(train)
93		return train
94
95	1	def read_valid(self, valid):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
96
97		valid = io.read_sdf(valid)
98		valid.columns = valid.columns.to_series().apply(self.fix_assay_name)
99		valid = valid[self.keep_cols]
100		valid[self.assays] = valid[self.assays].astype(float)
101		return valid
102
103	1	def read_test(self, test, test_data):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
104
105		test = io.read_sdf(test)
106		test = self.patch_test(test)
107		test_data = pd.read_table(test_data)
108		test_data['Sample ID'] = test_data['Sample ID'].apply(self.fix_id)
109		test = test.join(test_data.set_index('Sample ID'))
110
111		test.columns = test.columns.to_series().apply(self.fix_assay_name)
112		test = test[self.keep_cols]
113		test[test == 'x'] = np.nan
114		test[self.assays] = test[self.assays].astype(float)
115		return test
116
117	1	def extract(self, directory):
		0 ignored issues – show Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history... Coding Style introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
118
119		with zipfile.ZipFile(os.path.join(directory, 'train.sdf.zip')) as f:
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `f` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
120		train = f.extract('tox21_10k_data_all.sdf')
121
122		with zipfile.ZipFile(os.path.join(directory, 'valid.sdf.zip')) as f:
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `f` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
123		valid = f.extract('tox21_10k_challenge_test.sdf')
124
125		with zipfile.ZipFile(os.path.join(directory, 'test.sdf.zip')) as f:
		0 ignored issues – show Coding Style Naming introduced 2016-06-12 20:23 UTC by Report Bug Copy Issue Report The name `f` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
126		test = f.extract('tox21_10k_challenge_score.sdf')
127
128		return train, valid, test
129
130	1	if __name__ == '__main__':
131		logging.basicConfig(level=logging.INFO)
132		LOGGER.info('Converting Tox21 Dataset...')
133		Tox21Converter.convert()
134

Tox21Converter A
last analyzed 2016-09-01 14:43 UTC

Complexity

Size/Duplication

Test Coverage

Importance

8 Methods

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

richlewis42 / scikit-chem

Tox21Converter A last analyzed 2016-09-01 14:43 UTC

Complexity

Size/Duplication

Test Coverage

Importance

8 Methods

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

Tox21Converter A
last analyzed 2016-09-01 14:43 UTC

2. Missing init.py files

2. Missing init.py files