read_sdf() - Code Metrics - Inspection of "updated pylintrc" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 2bc047...202252 )

by Rich

created 2016-04-14 16:47 UTC

read_sdf() F

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
cc	17
dl	0
loc	83
rs	2.0338

How to fix Long Method Complexity

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
skchem.io.sdf

Defining input and output operations for sdf files.
"""

from functools import wraps

import warnings

from rdkit import Chem
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
import skchem
from skchem.utils import Suppressor
import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

def _drop_props(row):
    for prop in row.structure.props.keys():
        row.structure.ClearProp(prop)

def _set_props(row, cols):
    for i in cols:
        row.structure.SetProp(str(i), str(row[i])) # rdkit props can only be strs

def _set_name(row):
    row.structure.name = str(row.name) # rdkit props can only be strs

def read_sdf(sdf, error_bad_mol=False, warn_bad_mol=True, nmols=None,

             skipmols=None, skipfooter=None, read_props=True, mol_props=False,
             *args, **kwargs):

    """
        Read an sdf file into a pandas dataframe.
        The function wraps the RDKit ForwardSDMolSupplier object.

        @param sdf           A file path provided as a :str:, or a :file-like:
                             object.
        @param error_bad_mol A :bool: specifying if an error should be raised if
                             a molecule fails to parse.
        @param warn_bad_mol  A :bool: specifying if a warning should be output
                             if a molecule fails to parse.
        @param nmols         An :int: specifying number of molecules to read.
                             If none, read all molecules.
        @param skipmols      An :int: specifying number of molecules to skip at
                             start.
        @param skipfooter    An :int: specifying number of molecules to skip
                             from the end.
        @param mol_props     A :bool: specifying whether to keep properties in
                             the molecule dictionary.
        Additionally, ForwardSDMolSupplier arguments will be passed.

        @returns df         A dataframe of type :pandas.core.frame.DataFrame:.

    """

    # nmols is actually the index to cutoff.  If we skip some at start, we need
    # to add this number
    if skipmols:
        nmols += skipmols

    if isinstance(sdf, str):
        sdf = open(sdf, 'rb') # use read bytes for python 3 compatibility

    # use the suppression context manager to not pollute our stdout with rdkit
    # errors and warnings.
    # perhaps this should be captured better by Mol etc.
    with Suppressor():

        mol_supp = Chem.ForwardSDMolSupplier(sdf, *args, **kwargs)

        mols = []

        # single loop through sdf
        for i, mol in enumerate(mol_supp):

            if skipmols and i < skipmols:
                continue

            if nmols and i >= nmols:
                break

            # rdkit returns None if it fails to parse a molecule.  We will raise
            # errors unless force is used.
            if mol is None:
                msg = 'Molecule {} could not be decoded.'.format(i + 1)
                if error_bad_mol:
                    raise ValueError(msg)
                elif warn_bad_mol:
                    warnings.warn(msg)
                continue

            mols.append(skchem.Mol(mol))


        if skipfooter:
            mols = mols[:-skipfooter]

    idx = pd.Index((m.name for m in mols), name='name')
    data = pd.DataFrame(mols, columns=['structure'])

    if read_props:
        props = pd.DataFrame([mol.props for mol in mols])
        data = pd.concat([data, props], axis=1)

    # now we have extracted the props, we can delete if required
    if not mol_props:
        data.apply(_drop_props, axis=1)

    data.index = idx
    return data

def write_sdf(df, sdf, write_cols=True, index_as_name=True, mol_props=False,

              *args, **kwargs):

    """ Write an sdf file from a dataframe.

    @param df             Pandas object
    @param sdf            A file path provided as a :str:, or a :file-like: object.
    @param write_cols     :bool: specifying whether columns should be written as props
    @param index_as_name  :bool: specifying whether to use index as the name field
    @param mol_props      :bool: specifying whether to write props on the mol in
                          addition to fields in the frame.
    """

    if isinstance(df, pd.Series):
        df = df.to_frame(name='structure')

    writer = Chem.SDWriter(sdf, *args, **kwargs)

    cols = list(df.columns.drop('structure'))

    if not mol_props:
        df.apply(_drop_props, axis=1)

    if write_cols:
        df.apply(_set_props, cols=cols, axis=1)

    if index_as_name:
        df.apply(_set_name, axis=1)

    df.structure.apply(writer.write)


def to_sdf_series(self, *args, **kwargs):

    """ sdf series """

    return write_sdf(self, write_cols=False, *args, **kwargs)


def to_sdf_df(self, *args, **kwargs):

    """ sdf dataframe """

    return write_sdf(self, *args, **kwargs)

pd.Series.to_sdf = to_sdf_series
pd.DataFrame.to_sdf = to_sdf_df


@classmethod
def from_sdf(_, *args, **kwargs):

    """ Create a DataFrame from an sdf file """

    return read_sdf(*args, **kwargs)

pd.DataFrame.from_sdf = from_sdf


1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			skchem.io.sdf
8
9			Defining input and output operations for sdf files.
10			"""
11
12			from functools import wraps
			0 ignored issues – show Unused Code introduced 2016-04-14 16:48 UTC by Report Bug Copy Issue Report Unused wraps imported from functools Loading history...
13			import warnings
14
15			from rdkit import Chem
			0 ignored issues – show Configuration introduced 2016-01-19 16:21 UTC by Report Bug Copy Issue Report The import `rdkit` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16			import skchem
17			from skchem.utils import Suppressor
18			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-01-19 16:21 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
19
20			def _drop_props(row):
21			for prop in row.structure.props.keys():
22			row.structure.ClearProp(prop)
23
24			def _set_props(row, cols):
25			for i in cols:
26			row.structure.SetProp(str(i), str(row[i])) # rdkit props can only be strs
27
28			def _set_name(row):
29			row.structure.name = str(row.name) # rdkit props can only be strs
30
31			def read_sdf(sdf, error_bad_mol=False, warn_bad_mol=True, nmols=None,
			0 ignored issues – show best-practice introduced 2016-04-14 16:48 UTC by Report Bug Copy Issue Report Too many arguments (8/5) Loading history... Comprehensibility introduced 2016-04-14 16:48 UTC by Report Bug Copy Issue Report This function exceeds the maximum number of variables (18/15). Loading history...
32			skipmols=None, skipfooter=None, read_props=True, mol_props=False,
33			args, *kwargs):
34
35			"""
36			Read an sdf file into a pandas dataframe.
37			The function wraps the RDKit ForwardSDMolSupplier object.
38
39			@param sdf A file path provided as a :str:, or a :file-like:
40			object.
41			@param error_bad_mol A :bool: specifying if an error should be raised if
42			a molecule fails to parse.
43			@param warn_bad_mol A :bool: specifying if a warning should be output
44			if a molecule fails to parse.
45			@param nmols An :int: specifying number of molecules to read.
46			If none, read all molecules.
47			@param skipmols An :int: specifying number of molecules to skip at
48			start.
49			@param skipfooter An :int: specifying number of molecules to skip
50			from the end.
51			@param mol_props A :bool: specifying whether to keep properties in
52			the molecule dictionary.
53			Additionally, ForwardSDMolSupplier arguments will be passed.
54
55			@returns df A dataframe of type :pandas.core.frame.DataFrame:.
56
57			"""
58
59			# nmols is actually the index to cutoff. If we skip some at start, we need
60			# to add this number
61			if skipmols:
62			nmols += skipmols
63
64			if isinstance(sdf, str):
65			sdf = open(sdf, 'rb') # use read bytes for python 3 compatibility
66
67			# use the suppression context manager to not pollute our stdout with rdkit
68			# errors and warnings.
69			# perhaps this should be captured better by Mol etc.
70			with Suppressor():
71
72			mol_supp = Chem.ForwardSDMolSupplier(sdf, args, *kwargs)
73
74			mols = []
75
76			# single loop through sdf
77			for i, mol in enumerate(mol_supp):
78
79			if skipmols and i < skipmols:
80			continue
81
82			if nmols and i >= nmols:
83			break
84
85			# rdkit returns None if it fails to parse a molecule. We will raise
86			# errors unless force is used.
87			if mol is None:
88			msg = 'Molecule {} could not be decoded.'.format(i + 1)
89			if error_bad_mol:
90			raise ValueError(msg)
91			elif warn_bad_mol:
92			warnings.warn(msg)
93			continue
94
95			mols.append(skchem.Mol(mol))
96
97
98			if skipfooter:
99			mols = mols[:-skipfooter]
100
101			idx = pd.Index((m.name for m in mols), name='name')
102			data = pd.DataFrame(mols, columns=['structure'])
103
104			if read_props:
105			props = pd.DataFrame([mol.props for mol in mols])
106			data = pd.concat([data, props], axis=1)
107
108			# now we have extracted the props, we can delete if required
109			if not mol_props:
110			data.apply(_drop_props, axis=1)
111
112			data.index = idx
113			return data
114
115			def write_sdf(df, sdf, write_cols=True, index_as_name=True, mol_props=False,
			0 ignored issues – show Coding Style Naming introduced 2016-04-14 16:48 UTC by Report Bug Copy Issue Report The name `df` does not conform to the argument naming conventions (`[a-z_][a-z0-9_]{2,30}$`). This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
116			args, *kwargs):
117
118			""" Write an sdf file from a dataframe.
119
120			@param df Pandas object
121			@param sdf A file path provided as a :str:, or a :file-like: object.
122			@param write_cols :bool: specifying whether columns should be written as props
123			@param index_as_name :bool: specifying whether to use index as the name field
124			@param mol_props :bool: specifying whether to write props on the mol in
125			addition to fields in the frame.
126			"""
127
128			if isinstance(df, pd.Series):
129			df = df.to_frame(name='structure')
130
131			writer = Chem.SDWriter(sdf, args, *kwargs)
132
133			cols = list(df.columns.drop('structure'))
134
135			if not mol_props:
136			df.apply(_drop_props, axis=1)
137
138			if write_cols:
139			df.apply(_set_props, cols=cols, axis=1)
140
141			if index_as_name:
142			df.apply(_set_name, axis=1)
143
144			df.structure.apply(writer.write)
145
146
147			def to_sdf_series(self, args, *kwargs):
148
149			""" sdf series """
150
151			return write_sdf(self, write_cols=False, args, *kwargs)
152
153
154			def to_sdf_df(self, args, *kwargs):
155
156			""" sdf dataframe """
157
158			return write_sdf(self, args, *kwargs)
159
160			pd.Series.to_sdf = to_sdf_series
161			pd.DataFrame.to_sdf = to_sdf_df
162
163
164			@classmethod
165			def from_sdf(_, args, *kwargs):
166
167			""" Create a DataFrame from an sdf file """
168
169			return read_sdf(args, *kwargs)
170
171			pd.DataFrame.from_sdf = from_sdf
172

Push — master ( 2bc047...202252 )

read_sdf() F

Complexity

Size

Duplication

How to fix Long Method Complexity

Long Method

Complexity

1. Missing Dependencies

2. Missing init.py files

1. Missing Dependencies

2. Missing init.py files

richlewis42 / scikit-chem

Push — master ( 2bc047...202252 )

read_sdf() F

Complexity

Size

Duplication

How to fix Long Method Complexity

Long Method

Complexity

1. Missing Dependencies

2. Missing __init__.py files

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files

2. Missing init.py files