_to_smiles_df() - Code Metrics - Inspection of "added to_smiles functions" - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 1baf98...c0c140 )

by Rich

created 2016-06-07 13:25 UTC

_to_smiles_df() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
c	0
b	0
f	0
dl	0
loc	3
rs	10

#! /usr/bin/env python
#
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.io.smiles

Defining input and output operations for smiles files.
"""

import warnings
from functools import wraps

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from ..utils import Suppressor
from ..core import Mol

def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',

                title_line=False, error_bad_mol=False, warn_bad_mol=True,
                drop_bad_mol=True, *args, **kwargs):

    """Read a smiles file into a pandas dataframe.

    The class wraps the pandas read_csv function.

    smiles_file (str, file-like):
        Location of data to load, specified as a string or passed directly as a
        file-like object.  URLs may also be used, see the pandas.read_csv
        documentation.
    smiles_column (int):
        The column index at which SMILES are provided.
        Defaults to `0`.
    name_column (int):
        The column index at which compound names are provided, for use as the
        index in the dataframe.  If None, use the default index.
        Defaults to `None`.
    delimiter (str):
        The delimiter used.
        Defaults to `\t`.
    title_line (bool):
        Whether a title line is provided, to use as column titles.
        Defaults to `False`.
    error_bad_mol (bool):
        Whether an error should be raised when a molecule fails to parse.
        Defaults to `False`.
    warn_bad_mol (bool):
        Whether a warning should be raised when a molecule fails to parse.
        Defaults to `True`.
    drop_bad_mol (bool):
        If true, drop any column with smiles that failed to parse. Otherwise,
        the field is None. Defaults to `True`.
    *args, **kwargs:
        Arguments will be passed to pandas read_csv arguments.

    Returns:
        pandas.DataFrame:
            The loaded data frame, with Mols supplied in the `structure` field.

    See Also:
        pandas.read_csv
        skchem.Mol.from_smiles
        skchem.io.sdf

    """

    with Suppressor():

        # set the header line to pass to the pandas parser
        # we accept True as being line zero, as is usual for smiles
        # if user specifies a header already, then do nothing

        header = kwargs.pop('header', None)
        if title_line is True:
            header = 0
        elif header is not None:
            pass #remove from the kwargs to not pass it twice
        else:
            header = None

        # read the smiles file
        data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
                           *args, **kwargs)

        # replace the smiles column with the structure column
        lst = list(data.columns)
        lst[smiles_column] = 'structure'
        data.columns = lst

        def parse(row):
            """ Parse smiles for row """
            try:
                return Mol.from_smiles(row.structure)

            except ValueError:
                msg = 'Molecule {} could not be decoded.'.format(row.name)
                if error_bad_mol:
                    raise ValueError(msg)
                elif warn_bad_mol:
                    warnings.warn(msg)

                return None

        data['structure'] = data['structure'].apply(str)
        data['structure'] = data.apply(parse, axis=1)

        if drop_bad_mol:
            data = data[data['structure'].notnull()]

        # set index if passed
        if name_column is not None:
            data = data.set_index(data.columns[name_column])

        return data


def write_smiles(data, smiles_path):

    """ Write a dataframe to a smiles file.

    Args:
        data (pd.Series or pd.DataFrame):
            The dataframe to write.
        smiles_path (str):
            The path to write the dataframe to.
    """

    if isinstance(data, pd.Series):
        data = data.to_frame(name='structure')
    data['structure'] = data.structure.apply(lambda m: m.to_smiles())
    data = data.reset_index()
    cols = list(data.columns)
    cols.insert(0, cols.pop(cols.index('structure')))
    data = data.reindex(columns=cols)[cols]
    data.to_csv(smiles_path, sep='\t', header=None, index=None)


@classmethod
@wraps(read_smiles)
def _from_smiles_df(_, *args, **kwargs):
    return read_smiles(*args, **kwargs)

@classmethod
@wraps(read_smiles)
def _from_smiles_series(_, *args, **kwargs):
    return read_smiles(*args, **kwargs).structure

@wraps(write_smiles)
def _to_smiles_df(self, *args, **kwargs):
    return write_smiles(self, *args, **kwargs)

pd.DataFrame.from_smiles = _from_smiles_df
pd.Series.from_smiles = _from_smiles_series
pd.Series.to_smiles = _to_smiles_df
pd.DataFrame.to_smiles = _to_smiles_df


1			#! /usr/bin/env python
2			#
3			# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4			# License: 3-clause BSD
5
6			"""
7			# skchem.io.smiles
8
9			Defining input and output operations for smiles files.
10			"""
11
12			import warnings
13			from functools import wraps
14
15			import pandas as pd
			0 ignored issues – show Configuration introduced 2016-01-19 16:21 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16
17			from ..utils import Suppressor
18			from ..core import Mol
19
20			def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',
			0 ignored issues – show best-practice introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report Too many arguments (8/5) Loading history...
21			title_line=False, error_bad_mol=False, warn_bad_mol=True,
22			drop_bad_mol=True, args, *kwargs):
23
24			"""Read a smiles file into a pandas dataframe.
25
26			The class wraps the pandas read_csv function.
27
28			smiles_file (str, file-like):
29			Location of data to load, specified as a string or passed directly as a
30			file-like object. URLs may also be used, see the pandas.read_csv
31			documentation.
32			smiles_column (int):
33			The column index at which SMILES are provided.
34			Defaults to `0`.
35			name_column (int):
36			The column index at which compound names are provided, for use as the
37			index in the dataframe. If None, use the default index.
38			Defaults to `None`.
39			delimiter (str):
40			The delimiter used.
41			Defaults to `\t`.
42			title_line (bool):
43			Whether a title line is provided, to use as column titles.
44			Defaults to `False`.
45			error_bad_mol (bool):
46			Whether an error should be raised when a molecule fails to parse.
47			Defaults to `False`.
48			warn_bad_mol (bool):
49			Whether a warning should be raised when a molecule fails to parse.
50			Defaults to `True`.
51			drop_bad_mol (bool):
52			If true, drop any column with smiles that failed to parse. Otherwise,
53			the field is None. Defaults to `True`.
54			args, *kwargs:
55			Arguments will be passed to pandas read_csv arguments.
56
57			Returns:
58			pandas.DataFrame:
59			The loaded data frame, with Mols supplied in the `structure` field.
60
61			See Also:
62			pandas.read_csv
63			skchem.Mol.from_smiles
64			skchem.io.sdf
65
66			"""
67
68			with Suppressor():
69
70			# set the header line to pass to the pandas parser
71			# we accept True as being line zero, as is usual for smiles
72			# if user specifies a header already, then do nothing
73
74			header = kwargs.pop('header', None)
75			if title_line is True:
76			header = 0
77			elif header is not None:
78			pass #remove from the kwargs to not pass it twice
79			else:
80			header = None
81
82			# read the smiles file
83			data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
84			args, *kwargs)
85
86			# replace the smiles column with the structure column
87			lst = list(data.columns)
88			lst[smiles_column] = 'structure'
89			data.columns = lst
90
91			def parse(row):
92			""" Parse smiles for row """
93			try:
94			return Mol.from_smiles(row.structure)
			0 ignored issues – show Bug introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smiles`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
95			except ValueError:
96			msg = 'Molecule {} could not be decoded.'.format(row.name)
97			if error_bad_mol:
98			raise ValueError(msg)
99			elif warn_bad_mol:
100			warnings.warn(msg)
101
102			return None
103
104			data['structure'] = data['structure'].apply(str)
105			data['structure'] = data.apply(parse, axis=1)
106
107			if drop_bad_mol:
108			data = data[data['structure'].notnull()]
109
110			# set index if passed
111			if name_column is not None:
112			data = data.set_index(data.columns[name_column])
113
114			return data
115
116
117			def write_smiles(data, smiles_path):
118
119			""" Write a dataframe to a smiles file.
120
121			Args:
122			data (pd.Series or pd.DataFrame):
123			The dataframe to write.
124			smiles_path (str):
125			The path to write the dataframe to.
126			"""
127
128			if isinstance(data, pd.Series):
129			data = data.to_frame(name='structure')
130			data['structure'] = data.structure.apply(lambda m: m.to_smiles())
131			data = data.reset_index()
132			cols = list(data.columns)
133			cols.insert(0, cols.pop(cols.index('structure')))
134			data = data.reindex(columns=cols)[cols]
135			data.to_csv(smiles_path, sep='\t', header=None, index=None)
136
137
138			@classmethod
139			@wraps(read_smiles)
140			def _from_smiles_df(_, args, *kwargs):
141			return read_smiles(args, *kwargs)
142
143			@classmethod
144			@wraps(read_smiles)
145			def _from_smiles_series(_, args, *kwargs):
146			return read_smiles(args, *kwargs).structure
147
148			@wraps(write_smiles)
149			def _to_smiles_df(self, args, *kwargs):
150			return write_smiles(self, args, *kwargs)
151
152			pd.DataFrame.from_smiles = _from_smiles_df
153			pd.Series.from_smiles = _from_smiles_series
154			pd.Series.to_smiles = _to_smiles_df
155			pd.DataFrame.to_smiles = _to_smiles_df
156

richlewis42 / scikit-chem

Push — master ( 1baf98...c0c140 )

_to_smiles_df() A

Complexity

Size

Duplication

Importance

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files