NMRShiftDB2Converter - Code Metrics - Inspection of "small refactors" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 01edc4...2b1c29 )

by Rich

created 2016-07-18 17:47 UTC

NMRShiftDB2Converter B

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	158
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	1

Metric	Value
wmc	41
c	1
b	0
f	1
dl	0
loc	158
rs	8.2769

20 Methods

Rating	Name	Size	Complexity
B	log_dists()	13	6
A	is_spectrum()	2	2
A	__init__()	22	1
A	index_pair()	2	1
A	is_duplicate()	5	2
A	plot_duplicates()	4	1
B	combine_duplicates()	13	7
A	squash_duplicates()	11	4
A	log_duplicates()	10	4
A	log_message()	2	2
B	process_spectra()	14	6
B	aggregate_dicts()	7	5
A	squash()	5	3
A	parse_data()	12	1
B	spectrum_dict()	8	5
A	to_frame()	6	1
A	n_shifts()	2	2
C	get_spectra()	30	7
A	n_spect()	2	1
A	extract_duplicates()	11	3

How to fix Complexity

#! /usr/bin/env python
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
#
# Copyright (C) 2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

import os
import logging
import itertools
from collections import defaultdict

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import numpy as np
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from sklearn import metrics
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from .base import Converter
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
from ... import io
from ... import utils

LOGGER = logging.getLogger(__file__)

class NMRShiftDB2Converter(Converter):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

    def __init__(self, directory, output_directory, output_filename='nmrshiftdb2.h5'):

        output_path = os.path.join(output_directory, output_filename)
        input_path = os.path.join(directory, 'nmrshiftdb2.sdf')
        data = self.parse_data(input_path)

        ys = self.get_spectra(data)

        ys = self.process_spectra(ys)

        ys = self.combine_duplicates(ys)

        self.log_dists(ys)
        self.log_duplicates(ys)
        ys = self.squash_duplicates(ys)


        c13s = self.to_frame(ys.loc[ys['13c'].notnull(), '13c'])
        data = data[['structure']].join(c13s, how='right')

        data = self.standardize(data)

        data = self.filter(data)

        data = self.optimize(data)


        ms, y = data.structure, data.drop('structure', axis=1)

        self.run(ms, y, output_path=output_path)


    def parse_data(self, filepath):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Reads the raw datafile. """

        LOGGER.info('Reading file: %s', filepath)
        data = io.read_sdf(filepath, removeHs=False, warn_bad_mol=False)
        data.index = data['nmrshiftdb2 ID'].astype(int)
        data.index.name = 'nmrshiftdb2_id'
        data.columns = data.columns.to_series().apply(utils.free_to_snail)
        data = data.sort_index()
        LOGGER.info('Read %s molecules.', len(data))
        return data

    def get_spectra(self, data):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Retrieves spectra from raw data. """

        LOGGER.info('Retrieving spectra from raw data...')
        isotopes = [
            '1h',
            '11b',
            '13c',
            '15n',
            '17o',
            '19f',
            '29si',
            '31p',
            '33s',
            '73ge',
            '195pt'
        ]

        def is_spectrum(col_name, ele='c'):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            return any(isotope in col_name for isotope in isotopes)

        spectrum_cols = [c for c in data if is_spectrum(c)]
        data = data[spectrum_cols]

        def index_pair(s):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            return s[0], int(s[1])

        data.columns = pd.MultiIndex.from_tuples([index_pair(i.split('_')[1:]) for i in data.columns])

        return data

    def process_spectra(self, data):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Turn the string representations found in sdf file into a dictionary. """

        def spectrum_dict(spectrum_string):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            if not isinstance(spectrum_string, str):
                return np.nan # no spectra are still nan
            if spectrum_string == '':
                return np.nan # empty spectra are nan
            sigs = spectrum_string.strip().strip('|').strip().split('|') # extract signals
            sig_tup = [tuple(s.split(';')) for s in sigs] # take tuples as (signal, coupling, atom)
            return {int(s[2]): float(s[0]) for s in sig_tup} # make spectrum a dictionary of atom to signal


        return data.applymap(spectrum_dict)

    def combine_duplicates(self, data):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Collect duplicate spectra into one dictionary. All shifts are collected into lists. """

        def aggregate_dicts(ds):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            res = defaultdict(list)
            for d in ds:

                if not isinstance(d, dict): continue

                for k, v in d.items():

                    res[k].append(v)
            return dict(res) if len(res) else np.nan

        return data.groupby(level=0, axis=1).apply(lambda s: s.apply(aggregate_dicts, axis=1))

    def squash_duplicates(self, data):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Take the mean of all the duplicates.  This is where we could do a bit more checking. """

        def squash(d):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            if not isinstance(d, dict):
                return np.nan
            else:
                return {k: np.mean(v) for k, v in d.items()}

        return data.applymap(squash)

    def to_frame(self, data):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Convert a series of dictionaries to a dataframe. """
        res = pd.DataFrame(data.tolist(), index=data.index)
        res.columns.name = 'atom_idx'
        return res

    def extract_duplicates(self, data, kind='13c'):
class Foo:
    def some_method(self, x, y):
        return x + y;

        """ Get all 13c duplicates.  """

        def is_duplicate(ele):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            if not isinstance(ele, dict):
                return False
            else:
                return len(list(ele.values())[0]) > 1

        return data.loc[data[kind].apply(is_duplicate), kind]

    def log_dists(self, data):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        def n_spect(ele):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            return isinstance(ele, dict)

        def n_shifts(ele):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            return len(ele) if isinstance(ele, dict) else 0

        def log_message(func):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""
            return '  '.join('{k}: {v}'.format(k=k, v=v) for k, v in data.applymap(func).sum().to_dict().items())


        LOGGER.info('Number of spectra: %s', log_message(n_spect))
        LOGGER.info('Extracted shifts: %s', log_message(n_shifts))

    def log_duplicates(self, data):
class SomeClass:
    def some_method(self):
        """Do x and return foo."""

        for kind in '1h', '13c':
            dups = self.extract_duplicates(data, kind)
            LOGGER.info('Number of duplicate %s spectra: %s', kind, len(dups))
            res = pd.DataFrame(sum((list(itertools.combinations(l, 2)) for s in dups for k, l in s.items()), []))

            LOGGER.info('Number of duplicate %s pairs: %f', kind, len(res))
            LOGGER.info('MAE for duplicate %s: %.4f', kind, metrics.mean_absolute_error(res[0], res[1]))

            LOGGER.info('MSE for duplicate %s: %.4f', kind, metrics.mean_squared_error(res[0], res[1]))

            LOGGER.info('r2 for duplicate %s: %.4f', kind, metrics.r2_score(res[0], res[1]))

    def plot_duplicates(self, data):

        """ Plot the duplicates """
        pass



if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    LOGGER.info('Converting NMRShiftDB2 Dataset...')
    NMRShiftDB2Converter.convert()


Push — master ( 01edc4...2b1c29 )

NMRShiftDB2Converter B

Complexity

Size/Duplication

Importance

20 Methods

How to fix Complexity

Complex Class

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

1			#! /usr/bin/env python
			0 ignored issues – show Coding Style introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report This module should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
2			#
3			# Copyright (C) 2016 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			import os
7			import logging
8			import itertools
9			from collections import defaultdict
10
11			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
12			import numpy as np
			0 ignored issues – show Configuration introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The import `numpy` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
13			from sklearn import metrics
			0 ignored issues – show Configuration introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The import `sklearn` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
14
15			from .base import Converter
			0 ignored issues – show Configuration introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report Unable to import 'base' (invalid syntax (<string>, line 46)) This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16			from ... import io
17			from ... import utils
18
19			LOGGER = logging.getLogger(__file__)
20
21			class NMRShiftDB2Converter(Converter):
			0 ignored issues – show Coding Style introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report This class should have a docstring. The coding style of this project requires that you add a docstring to this code element. Below, you find an example for methods: class SomeClass: def some_method(self): """Do x and return foo.""" If you would like to know more about docstrings, we recommend to read PEP-257: Docstring Conventions. Loading history...
22
23			def __init__(self, directory, output_directory, output_filename='nmrshiftdb2.h5'):
24
25			output_path = os.path.join(output_directory, output_filename)
26			input_path = os.path.join(directory, 'nmrshiftdb2.sdf')
27			data = self.parse_data(input_path)
28
29			ys = self.get_spectra(data)
			0 ignored issues – show Coding Style Naming introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The name `ys` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
30			ys = self.process_spectra(ys)
			0 ignored issues – show Coding Style Naming introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The name `ys` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
31			ys = self.combine_duplicates(ys)
			0 ignored issues – show Coding Style Naming introduced 2016-07-18 17:48 UTC by Report Bug Copy Issue Report The name `ys` does not conform to the variable naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...

richlewis42 / scikit-chem

Push — master ( 01edc4...2b1c29 )

NMRShiftDB2Converter B

Complexity

Size/Duplication

Importance

20 Methods

How to fix Complexity

Complex Class

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files

2. Missing init.py files