Completed
Push — master ( 1baf98...c0c140 )
by Rich
01:33
created

_to_smiles_df()   A

Complexity

Conditions 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
c 0
b 0
f 0
dl 0
loc 3
rs 10
1
#! /usr/bin/env python
2
#
3
# Copyright (C) 2007-2009 Rich Lewis <[email protected]>
4
# License: 3-clause BSD
5
6
"""
7
# skchem.io.smiles
8
9
Defining input and output operations for smiles files.
10
"""
11
12
import warnings
13
from functools import wraps
14
15
import pandas as pd
0 ignored issues
show
Configuration introduced by
The import pandas could not be resolved.

This can be caused by one of the following:

1. Missing Dependencies

This error could indicate a configuration issue of Pylint. Make sure that your libraries are available by adding the necessary commands.

# .scrutinizer.yml
before_commands:
    - sudo pip install abc # Python2
    - sudo pip3 install abc # Python3
Tip: We are currently not using virtualenv to run pylint, when installing your modules make sure to use the command for the correct version.

2. Missing __init__.py files

This error could also result from missing __init__.py files in your module folders. Make sure that you place one file in each sub-folder.

Loading history...
16
17
from ..utils import Suppressor
18
from ..core import Mol
19
20
def read_smiles(smiles_file, smiles_column=0, name_column=None, delimiter='\t',
0 ignored issues
show
best-practice introduced by
Too many arguments (8/5)
Loading history...
21
                title_line=False, error_bad_mol=False, warn_bad_mol=True,
22
                drop_bad_mol=True, *args, **kwargs):
23
24
    """Read a smiles file into a pandas dataframe.
25
26
    The class wraps the pandas read_csv function.
27
28
    smiles_file (str, file-like):
29
        Location of data to load, specified as a string or passed directly as a
30
        file-like object.  URLs may also be used, see the pandas.read_csv
31
        documentation.
32
    smiles_column (int):
33
        The column index at which SMILES are provided.
34
        Defaults to `0`.
35
    name_column (int):
36
        The column index at which compound names are provided, for use as the
37
        index in the dataframe.  If None, use the default index.
38
        Defaults to `None`.
39
    delimiter (str):
40
        The delimiter used.
41
        Defaults to `\t`.
42
    title_line (bool):
43
        Whether a title line is provided, to use as column titles.
44
        Defaults to `False`.
45
    error_bad_mol (bool):
46
        Whether an error should be raised when a molecule fails to parse.
47
        Defaults to `False`.
48
    warn_bad_mol (bool):
49
        Whether a warning should be raised when a molecule fails to parse.
50
        Defaults to `True`.
51
    drop_bad_mol (bool):
52
        If true, drop any column with smiles that failed to parse. Otherwise,
53
        the field is None. Defaults to `True`.
54
    *args, **kwargs:
55
        Arguments will be passed to pandas read_csv arguments.
56
57
    Returns:
58
        pandas.DataFrame:
59
            The loaded data frame, with Mols supplied in the `structure` field.
60
61
    See Also:
62
        pandas.read_csv
63
        skchem.Mol.from_smiles
64
        skchem.io.sdf
65
66
    """
67
68
    with Suppressor():
69
70
        # set the header line to pass to the pandas parser
71
        # we accept True as being line zero, as is usual for smiles
72
        # if user specifies a header already, then do nothing
73
74
        header = kwargs.pop('header', None)
75
        if title_line is True:
76
            header = 0
77
        elif header is not None:
78
            pass #remove from the kwargs to not pass it twice
79
        else:
80
            header = None
81
82
        # read the smiles file
83
        data = pd.read_csv(smiles_file, delimiter=delimiter, header=header,
84
                           *args, **kwargs)
85
86
        # replace the smiles column with the structure column
87
        lst = list(data.columns)
88
        lst[smiles_column] = 'structure'
89
        data.columns = lst
90
91
        def parse(row):
92
            """ Parse smiles for row """
93
            try:
94
                return Mol.from_smiles(row.structure)
0 ignored issues
show
Bug introduced by
The Class Mol does not seem to have a member named from_smiles.

This check looks for calls to members that are non-existent. These calls will fail.

The member could have been renamed or removed.

Loading history...
95
            except ValueError:
96
                msg = 'Molecule {} could not be decoded.'.format(row.name)
97
                if error_bad_mol:
98
                    raise ValueError(msg)
99
                elif warn_bad_mol:
100
                    warnings.warn(msg)
101
102
                return None
103
104
        data['structure'] = data['structure'].apply(str)
105
        data['structure'] = data.apply(parse, axis=1)
106
107
        if drop_bad_mol:
108
            data = data[data['structure'].notnull()]
109
110
        # set index if passed
111
        if name_column is not None:
112
            data = data.set_index(data.columns[name_column])
113
114
        return data
115
116
117
def write_smiles(data, smiles_path):
118
119
    """ Write a dataframe to a smiles file.
120
121
    Args:
122
        data (pd.Series or pd.DataFrame):
123
            The dataframe to write.
124
        smiles_path (str):
125
            The path to write the dataframe to.
126
    """
127
128
    if isinstance(data, pd.Series):
129
        data = data.to_frame(name='structure')
130
    data['structure'] = data.structure.apply(lambda m: m.to_smiles())
131
    data = data.reset_index()
132
    cols = list(data.columns)
133
    cols.insert(0, cols.pop(cols.index('structure')))
134
    data = data.reindex(columns=cols)[cols]
135
    data.to_csv(smiles_path, sep='\t', header=None, index=None)
136
137
138
@classmethod
139
@wraps(read_smiles)
140
def _from_smiles_df(_, *args, **kwargs):
141
    return read_smiles(*args, **kwargs)
142
143
@classmethod
144
@wraps(read_smiles)
145
def _from_smiles_series(_, *args, **kwargs):
146
    return read_smiles(*args, **kwargs).structure
147
148
@wraps(write_smiles)
149
def _to_smiles_df(self, *args, **kwargs):
150
    return write_smiles(self, *args, **kwargs)
151
152
pd.DataFrame.from_smiles = _from_smiles_df
153
pd.Series.from_smiles = _from_smiles_series
154
pd.Series.to_smiles = _to_smiles_df
155
pd.DataFrame.to_smiles = _to_smiles_df
156