parse() - Code Metrics - richlewis42/scikit-chem - Measure and Improve Code Quality continuously with Scrutinizer

parse() A
last analyzed 2016-09-01 14:43 UTC

↳ Parent: read_smiles()

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	10
CRAP Score	4

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
c	1
b	0
f	0
dl	0
loc	12
ccs	10
cts	10
cp	1
rs	9.2
cc	4
crap	4

#! /usr/bin/env python
#
# Copyright (C) 2015-2016 Rich Lewis <[email protected]>
# License: 3-clause BSD

"""
# skchem.io.smiles

Defining input and output operations for smiles files.
"""

import warnings
from functools import wraps

import pandas as pd
# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3

from ..utils import Suppressor, squeeze
from ..core import Mol


def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',

                title_line=False, error_bad_mol=False, warn_bad_mol=True,
                drop_bad_mol=True, *args, **kwargs):

    """Read a smiles file into a pandas dataframe.

    The class wraps the pandas read_csv function.

    smiles_file (str, file-like):
        Location of data to load, specified as a string or passed directly as a
        file-like object.  URLs may also be used, see the pandas.read_csv
        documentation.
    smiles_column (int):
        The column index at which SMILES are provided.
        Defaults to `0`.
    name_column (int):
        The column index at which compound names are provided, for use as the
        index in the DataFrame.  If None, use the default index.
        Defaults to `None`.
    delimiter (str):
        The delimiter used.
        Defaults to `\\t`.
    title_line (bool):
        Whether a title line is provided, to use as column titles.
        Defaults to `False`.
    error_bad_mol (bool):
        Whether an error should be raised when a molecule fails to parse.
        Defaults to `False`.
    warn_bad_mol (bool):
        Whether a warning should be raised when a molecule fails to parse.
        Defaults to `True`.
    drop_bad_mol (bool):
        If true, drop any column with smiles that failed to parse. Otherwise,
        the field is None. Defaults to `True`.
    args, kwargs:
        Arguments will be passed to pandas read_csv arguments.

    Returns:
        pandas.DataFrame:
            The loaded data frame, with Mols supplied in the `structure` field.

    See Also:
        pandas.read_csv
        skchem.Mol.from_smiles
        skchem.io.sdf
    """

    with Suppressor():

        # set the header line to pass to the pandas parser
        # we accept True as being line zero, as is usual for smiles
        # if user specifies a header already, then do nothing

        header = kwargs.pop('header', None)
        if title_line is True:
            header = 0
        elif header is not None:
            pass  #remove from the kwargs to not pass it twice
        else:
            header = None

        # read the smiles file
        data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
                           *args, **kwargs)

        # replace the smiles column with the structure column
        lst = list(data.columns)
        lst[smiles_column] = 'structure'
        if name_column:
            lst[name_column] = 'batch'
        data.columns = lst

        def parse(row):
            """ Parse smiles for row """
            try:
                return Mol.from_smiles(row.structure)

            except ValueError:
                msg = 'Molecule {} could not be decoded.'.format(row.name)
                if error_bad_mol:
                    raise ValueError(msg)
                elif warn_bad_mol:
                    warnings.warn(msg)

                return None

        data['structure'] = data['structure'].apply(str)
        data['structure'] = data.apply(parse, axis=1)

        if drop_bad_mol:
            data = data[data['structure'].notnull()]

        # set index if passed
        if name_column is not None:
            data = data.set_index(data.columns[name_column])

        cols = data.columns.tolist()
        cols.remove('structure')
        data = data[['structure'] + cols]
        return squeeze(data, axis=1)


def write_smiles(data, smiles_path):

    """ Write a dataframe to a smiles file.

    Args:
        data (pd.Series or pd.DataFrame):
            The dataframe to write.
        smiles_path (str):
            The path to write the dataframe to.
    """

    if isinstance(data, pd.Series):
        data = data.to_frame(name='structure')
    data = data.copy()
    data['structure'] = data.structure.apply(lambda m: m.to_smiles())
    data = data.reset_index()
    cols = list(data.columns)
    cols.insert(0, cols.pop(cols.index('structure')))
    data = data.reindex(columns=cols)[cols]
    data.to_csv(smiles_path, sep='\t', header=None, index=None)
    del data


@classmethod
@wraps(read_smiles)
def _from_smiles_df(_, *args, **kwargs):
    return read_smiles(*args, **kwargs)


@classmethod
@wraps(read_smiles)
def _from_smiles_series(_, *args, **kwargs):
    return read_smiles(*args, **kwargs).structure


@wraps(write_smiles)
def _to_smiles_df(self, *args, **kwargs):
    return write_smiles(self, *args, **kwargs)

pd.DataFrame.from_smiles = _from_smiles_df
pd.Series.from_smiles = _from_smiles_series
pd.Series.to_smiles = _to_smiles_df
pd.DataFrame.to_smiles = _to_smiles_df


1		#! /usr/bin/env python
2		#
3		# Copyright (C) 2015-2016 Rich Lewis <[email protected]>
4		# License: 3-clause BSD
5
6	1	"""
7		# skchem.io.smiles
8
9		Defining input and output operations for smiles files.
10		"""
11
12	1	import warnings
13	1	from functools import wraps
14
15	1	import pandas as pd
		0 ignored issues – show Configuration introduced 2016-01-19 16:21 UTC by Report Bug Copy Issue Report The import `pandas` could not be resolved. This can be caused by one of the following: 1. Missing Dependencies This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands. # .scrutinizer.yml before_commands: - sudo pip install abc # Python2 - sudo pip3 install abc # Python3 Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version. 2. Missing __init__.py files This error could also result from missing `__init__.py` files in your module folders. Make sure that you place one file in each sub-folder. Loading history...
16
17	1	from ..utils import Suppressor, squeeze
18	1	from ..core import Mol
19
20
21	1	def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',
		0 ignored issues – show best-practice introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report Too many arguments (8/5) Loading history...
22		title_line=False, error_bad_mol=False, warn_bad_mol=True,
23		drop_bad_mol=True, args, *kwargs):
24
25		"""Read a smiles file into a pandas dataframe.
26
27		The class wraps the pandas read_csv function.
28
29		smiles_file (str, file-like):
30		Location of data to load, specified as a string or passed directly as a
31		file-like object. URLs may also be used, see the pandas.read_csv
32		documentation.
33		smiles_column (int):
34		The column index at which SMILES are provided.
35		Defaults to `0`.
36		name_column (int):
37		The column index at which compound names are provided, for use as the
38		index in the DataFrame. If None, use the default index.
39		Defaults to `None`.
40		delimiter (str):
41		The delimiter used.
42		Defaults to `\\t`.
43		title_line (bool):
44		Whether a title line is provided, to use as column titles.
45		Defaults to `False`.
46		error_bad_mol (bool):
47		Whether an error should be raised when a molecule fails to parse.
48		Defaults to `False`.
49		warn_bad_mol (bool):
50		Whether a warning should be raised when a molecule fails to parse.
51		Defaults to `True`.
52		drop_bad_mol (bool):
53		If true, drop any column with smiles that failed to parse. Otherwise,
54		the field is None. Defaults to `True`.
55		args, kwargs:
56		Arguments will be passed to pandas read_csv arguments.
57
58		Returns:
59		pandas.DataFrame:
60		The loaded data frame, with Mols supplied in the `structure` field.
61
62		See Also:
63		pandas.read_csv
64		skchem.Mol.from_smiles
65		skchem.io.sdf
66		"""
67
68	1	with Suppressor():
69
70		# set the header line to pass to the pandas parser
71		# we accept True as being line zero, as is usual for smiles
72		# if user specifies a header already, then do nothing
73
74	1	header = kwargs.pop('header', None)
75	1	if title_line is True:
76	1	header = 0
77	1	elif header is not None:
78	1	pass #remove from the kwargs to not pass it twice
79		else:
80	1	header = None
81
82		# read the smiles file
83	1	data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
84		args, *kwargs)
85
86		# replace the smiles column with the structure column
87	1	lst = list(data.columns)
88	1	lst[smiles_column] = 'structure'
89	1	if name_column:
90	1	lst[name_column] = 'batch'
91	1	data.columns = lst
92
93	1	def parse(row):
94		""" Parse smiles for row """
95	1	try:
96	1	return Mol.from_smiles(row.structure)
		0 ignored issues – show Bug introduced 2016-05-15 17:12 UTC by Report Bug Copy Issue Report The Class `Mol` does not seem to have a member named `from_smiles`. This check looks for calls to members that are non-existent. These calls will fail. The member could have been renamed or removed. Loading history...
97	1	except ValueError:
98	1	msg = 'Molecule {} could not be decoded.'.format(row.name)
99	1	if error_bad_mol:
100	1	raise ValueError(msg)
101	1	elif warn_bad_mol:
102	1	warnings.warn(msg)
103
104	1	return None
105
106	1	data['structure'] = data['structure'].apply(str)
107	1	data['structure'] = data.apply(parse, axis=1)
108
109	1	if drop_bad_mol:
110	1	data = data[data['structure'].notnull()]
111
112		# set index if passed
113	1	if name_column is not None:
114	1	data = data.set_index(data.columns[name_column])
115
116	1	cols = data.columns.tolist()
117	1	cols.remove('structure')
118	1	data = data[['structure'] + cols]
119	1	return squeeze(data, axis=1)
120
121
122	1	def write_smiles(data, smiles_path):
123
124		""" Write a dataframe to a smiles file.
125
126		Args:
127		data (pd.Series or pd.DataFrame):
128		The dataframe to write.
129		smiles_path (str):
130		The path to write the dataframe to.
131		"""
132
133		if isinstance(data, pd.Series):
134		data = data.to_frame(name='structure')
135		data = data.copy()
136		data['structure'] = data.structure.apply(lambda m: m.to_smiles())
137		data = data.reset_index()
138		cols = list(data.columns)
139		cols.insert(0, cols.pop(cols.index('structure')))
140		data = data.reindex(columns=cols)[cols]
141		data.to_csv(smiles_path, sep='\t', header=None, index=None)
142		del data
143
144
145	1	@classmethod
146	1	@wraps(read_smiles)
147		def _from_smiles_df(_, args, *kwargs):
148		return read_smiles(args, *kwargs)
149
150
151	1	@classmethod
152	1	@wraps(read_smiles)
153		def _from_smiles_series(_, args, *kwargs):
154		return read_smiles(args, *kwargs).structure
155
156
157	1	@wraps(write_smiles)
158		def _to_smiles_df(self, args, *kwargs):
159		return write_smiles(self, args, *kwargs)
160
161	1	pd.DataFrame.from_smiles = _from_smiles_df
162	1	pd.Series.from_smiles = _from_smiles_series
163	1	pd.Series.to_smiles = _to_smiles_df
164		pd.DataFrame.to_smiles = _to_smiles_df
165

richlewis42 / scikit-chem

parse() A last analyzed 2016-09-01 14:43 UTC

Complexity

Size

Duplication

Code Coverage

Importance

1. Missing Dependencies

2. Missing __init__.py files

Duplication Side-by-Side

Filter issues like

parse() A
last analyzed 2016-09-01 14:43 UTC

2. Missing init.py files