write_smiles() - Code Metrics - Inspection of "added progressbar to requirements and added stderr..." - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 971a6f...39262f )

by Rich

created 2016-08-11 21:51 UTC

write_smiles() A

↳ Parent: Project

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	1
CRAP Score	9.762

Importance

Changes	2
Bugs	1	Features	1

Metric	Value
c	2
b	1
f	1
dl	0
loc	21
ccs	1
cts	11
cp	0.0909
rs	9.3142
cc	3
crap	9.762

#! /usr/bin/env python
#
# Copyright (C) 2015-2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.io.smiles

Defining input and output operations for smiles files.
"""

import warnings
from functools import wraps

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from ..utils import Suppressor
from ..core import Mol

def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',

                title_line=False, error_bad_mol=False, warn_bad_mol=True,
                drop_bad_mol=True, *args, **kwargs):

    """Read a smiles file into a pandas dataframe.

    The class wraps the pandas read_csv function.

    smiles_file (str, file-like):
        Location of data to load, specified as a string or passed directly as a
        file-like object.  URLs may also be used, see the pandas.read_csv
        documentation.
    smiles_column (int):
        The column index at which SMILES are provided.
        Defaults to `0`.
    name_column (int):
        The column index at which compound names are provided, for use as the
        index in the DataFrame.  If None, use the default index.
        Defaults to `None`.
    delimiter (str):
        The delimiter used.
        Defaults to `\\t`.
    title_line (bool):
        Whether a title line is provided, to use as column titles.
        Defaults to `False`.
    error_bad_mol (bool):
        Whether an error should be raised when a molecule fails to parse.
        Defaults to `False`.
    warn_bad_mol (bool):
        Whether a warning should be raised when a molecule fails to parse.
        Defaults to `True`.
    drop_bad_mol (bool):
        If true, drop any column with smiles that failed to parse. Otherwise,
        the field is None. Defaults to `True`.
    args, kwargs:
        Arguments will be passed to pandas read_csv arguments.

    Returns:
        pandas.DataFrame:
            The loaded data frame, with Mols supplied in the `structure` field.

    See Also:
        pandas.read_csv
        skchem.Mol.from_smiles
        skchem.io.sdf
    """

    with Suppressor():

        # set the header line to pass to the pandas parser
        # we accept True as being line zero, as is usual for smiles
        # if user specifies a header already, then do nothing

        header = kwargs.pop('header', None)
        if title_line is True:
            header = 0
        elif header is not None:
            pass #remove from the kwargs to not pass it twice
        else:
            header = None

        # read the smiles file
        data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
                           *args, **kwargs)

        # replace the smiles column with the structure column
        lst = list(data.columns)
        lst[smiles_column] = 'structure'
        data.columns = lst

        def parse(row):
            """ Parse smiles for row """
            try:
                return Mol.from_smiles(row.structure)

            except ValueError:
                msg = 'Molecule {} could not be decoded.'.format(row.name)
                if error_bad_mol:
                    raise ValueError(msg)
                elif warn_bad_mol:
                    warnings.warn(msg)

                return None

        data['structure'] = data['structure'].apply(str)
        data['structure'] = data.apply(parse, axis=1)

        if drop_bad_mol:
            data = data[data['structure'].notnull()]

        # set index if passed
        if name_column is not None:
            data = data.set_index(data.columns[name_column])

        cols = data.columns.tolist()
        cols.remove('structure')
        data = data[['structure'] + cols]
        return data


def write_smiles(data, smiles_path):

    """ Write a dataframe to a smiles file.

    Args:
        data (pd.Series or pd.DataFrame):
            The dataframe to write.
        smiles_path (str):
            The path to write the dataframe to.
    """

    if isinstance(data, pd.Series):
        data = data.to_frame(name='structure')
    data = data.copy()
    data['structure'] = data.structure.apply(lambda m: m.to_smiles())
    data = data.reset_index()
    cols = list(data.columns)
    cols.insert(0, cols.pop(cols.index('structure')))
    data = data.reindex(columns=cols)[cols]
    data.to_csv(smiles_path, sep='\t', header=None, index=None)
    del data


@classmethod
@wraps(read_smiles)
def _from_smiles_df(_, *args, **kwargs):
    return read_smiles(*args, **kwargs)

@classmethod
@wraps(read_smiles)
def _from_smiles_series(_, *args, **kwargs):
    return read_smiles(*args, **kwargs).structure

@wraps(write_smiles)
def _to_smiles_df(self, *args, **kwargs):
    return write_smiles(self, *args, **kwargs)

pd.DataFrame.from_smiles = _from_smiles_df
pd.Series.from_smiles = _from_smiles_series
pd.Series.to_smiles = _to_smiles_df
pd.DataFrame.to_smiles = _to_smiles_df


Push — master ( 971a6f...39262f )

write_smiles() A

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing init.py files

1		#! /usr/bin/env python
2		#
3		# Copyright (C) 2015-2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	"""
7		# skchem.io.smiles
8
9		Defining input and output operations for smiles files.
10		"""
11
12	1	import warnings
13	1	from functools import wraps
14
15	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-01-19 16:21 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16
17	1	from ..utils import Suppressor
18	1	from ..core import Mol
19
20	1	def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',
		0 ignored issues – show best-practice introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report Too many arguments (8/5) Loading history...
21		title_line=False, error_bad_mol=False, warn_bad_mol=True,
22		drop_bad_mol=True, args, *kwargs):
23
24		"""Read a smiles file into a pandas dataframe.
25
26		The class wraps the pandas read_csv function.
27
28		smiles_file (str, file-like):
29		Location of data to load, specified as a string or passed directly as a
30		file-like object. URLs may also be used, see the pandas.read_csv
31		documentation.
32		smiles_column (int):
33		The column index at which SMILES are provided.
34		Defaults to `0`.
35		name_column (int):
36		The column index at which compound names are provided, for use as the
37		index in the DataFrame. If None, use the default index.
38		Defaults to `None`.
39		delimiter (str):
40		The delimiter used.
41		Defaults to `\\t`.
42		title_line (bool):
43		Whether a title line is provided, to use as column titles.
44		Defaults to `False`.
45		error_bad_mol (bool):
46		Whether an error should be raised when a molecule fails to parse.
47		Defaults to `False`.
48		warn_bad_mol (bool):
49		Whether a warning should be raised when a molecule fails to parse.
50		Defaults to `True`.
51		drop_bad_mol (bool):
52		If true, drop any column with smiles that failed to parse. Otherwise,
53		the field is None. Defaults to `True`.
54		args, kwargs:
55		Arguments will be passed to pandas read_csv arguments.
56
57		Returns:
58		pandas.DataFrame:
59		The loaded data frame, with Mols supplied in the `structure` field.
60
61		See Also:
62		pandas.read_csv
63		skchem.Mol.from_smiles
64		skchem.io.sdf
65		"""
66
67	1	with Suppressor():
68
69		# set the header line to pass to the pandas parser
70		# we accept True as being line zero, as is usual for smiles
71		# if user specifies a header already, then do nothing
72
73	1	header = kwargs.pop('header', None)
74	1	if title_line is True:
75	1	header = 0
76	1	elif header is not None:
77	1	pass #remove from the kwargs to not pass it twice
78		else:
79	1	header = None
80
81		# read the smiles file
82	1	data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
83		args, *kwargs)
84
85		# replace the smiles column with the structure column
86	1	lst = list(data.columns)
87	1	lst[smiles_column] = 'structure'
88	1	data.columns = lst
89
90	1	def parse(row):
91		""" Parse smiles for row """
92	1	try:
93	1	return Mol.from_smiles(row.structure)
		0 ignored issues – show Bug introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smiles`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
94	1	except ValueError:
95	1	msg = 'Molecule {} could not be decoded.'.format(row.name)
96	1	if error_bad_mol:
97	1	raise ValueError(msg)
98	1	elif warn_bad_mol:
99	1	warnings.warn(msg)
100
101	1	return None
102
103	1	data['structure'] = data['structure'].apply(str)
104	1	data['structure'] = data.apply(parse, axis=1)
105
106	1	if drop_bad_mol:
107	1	data = data[data['structure'].notnull()]
108
109		# set index if passed
110	1	if name_column is not None:
111	1	data = data.set_index(data.columns[name_column])
112
113	1	cols = data.columns.tolist()
114	1	cols.remove('structure')
115	1	data = data[['structure'] + cols]
116	1	return data
117
118
119	1	def write_smiles(data, smiles_path):
120
121		""" Write a dataframe to a smiles file.
122
123		Args:
124		data (pd.Series or pd.DataFrame):
125		The dataframe to write.
126		smiles_path (str):
127		The path to write the dataframe to.
128		"""
129
130		if isinstance(data, pd.Series):
131		data = data.to_frame(name='structure')
132		data = data.copy()
133		data['structure'] = data.structure.apply(lambda m: m.to_smiles())
134		data = data.reset_index()
135		cols = list(data.columns)
136		cols.insert(0, cols.pop(cols.index('structure')))
137		data = data.reindex(columns=cols)[cols]
138		data.to_csv(smiles_path, sep='\t', header=None, index=None)
139		del data
140
141
142	1	@classmethod
143	1	@wraps(read_smiles)
144		def _from_smiles_df(_, args, *kwargs):
145		return read_smiles(args, *kwargs)
146
147	1	@classmethod
148	1	@wraps(read_smiles)
149		def _from_smiles_series(_, args, *kwargs):
150		return read_smiles(args, *kwargs).structure
151
152	1	@wraps(write_smiles)
153		def _to_smiles_df(self, args, *kwargs):
154		return write_smiles(self, args, *kwargs)
155
156	1	pd.DataFrame.from_smiles = _from_smiles_df
157	1	pd.Series.from_smiles = _from_smiles_series
158	1	pd.Series.to_smiles = _to_smiles_df
159		pd.DataFrame.to_smiles = _to_smiles_df
160

richlewis42 / scikit-chem

Push — master ( 971a6f...39262f )

write_smiles() A

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

2. Missing init.py files